RJK commited on 9 days ago

Commit

dc26eb1

verified ·

1 Parent(s): 593fee5

upload pi05 and gr00t LIBERO fine-tuned checkpoints (incremental cache)

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +1 -0
gr00t_eagle_3b_libero_10_full_finetune_bs64/checkpoints/step-028548-epoch-18-loss=0.0546.safetensors +3 -0
gr00t_eagle_3b_libero_10_full_finetune_bs64/config.json +6 -9
gr00t_eagle_3b_libero_10_full_finetune_bs64/config.yaml +5 -8
gr00t_eagle_3b_libero_10_full_finetune_bs64/gr00t_eagle_3b_libero_10_full_finetune_2026_05_14_02_41_08.jsonl +0 -0
gr00t_eagle_3b_libero_10_full_finetune_bs64/run-metrics.jsonl +1 -1
gr00t_eagle_3b_libero_10_full_finetune_bs64/tokenizer/merges.txt +1 -1
gr00t_eagle_3b_libero_goal_full_finetune_bs64/checkpoints/step-014886-epoch-18-loss=0.0550.safetensors +3 -0
gr00t_eagle_3b_libero_goal_full_finetune_bs64/config.json +302 -0
gr00t_eagle_3b_libero_goal_full_finetune_bs64/config.yaml +220 -0
gr00t_eagle_3b_libero_goal_full_finetune_bs64/dataset_statistics.json +138 -0
gr00t_eagle_3b_libero_goal_full_finetune_bs64/gr00t_eagle_3b_libero_goal_full_finetune_2026_05_14_02_40_40.jsonl +0 -0
gr00t_eagle_3b_libero_goal_full_finetune_bs64/run-metrics.jsonl +1 -0
gr00t_eagle_3b_libero_goal_full_finetune_bs64/tokenizer/added_tokens.json +39 -0
gr00t_eagle_3b_libero_goal_full_finetune_bs64/tokenizer/merges.txt +0 -0
gr00t_eagle_3b_libero_goal_full_finetune_bs64/tokenizer/special_tokens_map.json +42 -0
gr00t_eagle_3b_libero_goal_full_finetune_bs64/tokenizer/tokenizer_config.json +344 -0
gr00t_eagle_3b_libero_goal_full_finetune_bs64/tokenizer/vocab.json +0 -0
gr00t_eagle_3b_libero_goal_full_finetune_bs64/vlm_backbone_config.json +106 -0
gr00t_eagle_3b_libero_object_full_finetune_bs64/checkpoints/step-018846-epoch-18-loss=0.0701.safetensors +3 -0
gr00t_eagle_3b_libero_object_full_finetune_bs64/config.json +302 -0
gr00t_eagle_3b_libero_object_full_finetune_bs64/config.yaml +220 -0
gr00t_eagle_3b_libero_object_full_finetune_bs64/dataset_statistics.json +104 -0
gr00t_eagle_3b_libero_object_full_finetune_bs64/gr00t_eagle_3b_libero_object_full_finetune_2026_05_14_02_40_05.jsonl +0 -0
gr00t_eagle_3b_libero_object_full_finetune_bs64/run-metrics.jsonl +1 -0
gr00t_eagle_3b_libero_object_full_finetune_bs64/tokenizer/added_tokens.json +39 -0
gr00t_eagle_3b_libero_object_full_finetune_bs64/tokenizer/merges.txt +0 -0
gr00t_eagle_3b_libero_object_full_finetune_bs64/tokenizer/special_tokens_map.json +42 -0
gr00t_eagle_3b_libero_object_full_finetune_bs64/tokenizer/tokenizer_config.json +344 -0
gr00t_eagle_3b_libero_object_full_finetune_bs64/tokenizer/vocab.json +0 -0
gr00t_eagle_3b_libero_object_full_finetune_bs64/vlm_backbone_config.json +106 -0
gr00t_eagle_3b_libero_spatial_full_finetune_bs64/checkpoints/step-014904-epoch-18-loss=0.0780.safetensors +3 -0
gr00t_eagle_3b_libero_spatial_full_finetune_bs64/config.json +302 -0
gr00t_eagle_3b_libero_spatial_full_finetune_bs64/config.yaml +220 -0
gr00t_eagle_3b_libero_spatial_full_finetune_bs64/dataset_statistics.json +104 -0
gr00t_eagle_3b_libero_spatial_full_finetune_bs64/gr00t_eagle_3b_libero_spatial_full_finetune_2026_05_14_02_41_01.jsonl +0 -0
gr00t_eagle_3b_libero_spatial_full_finetune_bs64/run-metrics.jsonl +1 -0
gr00t_eagle_3b_libero_spatial_full_finetune_bs64/tokenizer/added_tokens.json +39 -0
gr00t_eagle_3b_libero_spatial_full_finetune_bs64/tokenizer/merges.txt +0 -0
gr00t_eagle_3b_libero_spatial_full_finetune_bs64/tokenizer/special_tokens_map.json +42 -0
gr00t_eagle_3b_libero_spatial_full_finetune_bs64/tokenizer/tokenizer_config.json +344 -0
gr00t_eagle_3b_libero_spatial_full_finetune_bs64/tokenizer/vocab.json +0 -0
gr00t_eagle_3b_libero_spatial_full_finetune_bs64/vlm_backbone_config.json +106 -0
pi05_paligemma_libero_10_full_finetune_bs64/checkpoints/step-038064-epoch-24-loss=0.0170.safetensors +3 -0
pi05_paligemma_libero_10_full_finetune_bs64/config.json +3 -4
pi05_paligemma_libero_10_full_finetune_bs64/config.yaml +3 -4
pi05_paligemma_libero_10_full_finetune_bs64/pi05_paligemma_libero_10_full_finetune_2026_05_15_09_15_10.jsonl +3 -0
pi05_paligemma_libero_10_full_finetune_bs64/run-metrics.jsonl +1 -1
pi05_paligemma_libero_goal_full_finetune_bs64/checkpoints/step-019848-epoch-24-loss=0.0145.safetensors +3 -0
pi05_paligemma_libero_goal_full_finetune_bs64/config.json +355 -0

.gitattributes CHANGED Viewed

@@ -37,3 +37,4 @@ pi05_libero/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 pi05_base/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 pi0_base/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 gr00t_qwen3vl_0.6b_libero/tokenizer/tokenizer.json filter=lfs diff=lfs merge=lfs -text

 pi05_base/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 pi0_base/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 gr00t_qwen3vl_0.6b_libero/tokenizer/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+pi05_paligemma_libero_10_full_finetune_bs64/pi05_paligemma_libero_10_full_finetune_2026_05_15_09_15_10.jsonl filter=lfs diff=lfs merge=lfs -text

gr00t_eagle_3b_libero_10_full_finetune_bs64/checkpoints/step-028548-epoch-18-loss=0.0546.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f2485a81646356973d21f9c08075fbe3ba567f283ab16e31793fd682fdbb27c5
+size 10896783888

gr00t_eagle_3b_libero_10_full_finetune_bs64/config.json CHANGED Viewed

@@ -54,7 +54,7 @@
           "max_len": 600,
           "num_images": 2,
           "tokenizer": {
-            "model_path": "/limx/tos/users/liyinhao/projects/eagle2_hg_model",
             "type": "PretrainedTokenizer"
           },
           "type": "ProcessPromptsWithImage"
@@ -99,8 +99,7 @@
         "num_attention_heads": 32,
         "num_layers": 16,
         "output_dim": 1024,
-        "positional_embeddings": null,
-        "use_torch_compile": true
       },
       "hidden_size": 1024,
       "input_embedding_dim": 1536,
@@ -173,12 +172,10 @@
     "lr_scheduler_type": "linear-warmup+cosine-decay",
     "max_epochs": 18,
     "max_grad_norm": 1.0,
-    "max_keep_ckpts": 1,
-    "max_steps": null,
     "metric": {
       "active_trackers": [
         "jsonl",
-        "wandb"
       ],
       "grad_accumulation_steps": 1,
       "run_dir": "work_dirs",
@@ -188,7 +185,7 @@
     "mixed_precision_dtype": "bf16",
     "sampler": null,
     "tokenizer": {
-      "model_path": "/limx/tos/users/liyinhao/projects/eagle2_hg_model",
       "type": "PretrainedTokenizer"
     },
     "type": "FSDPTrainRunner",
@@ -372,7 +369,7 @@
             "max_len": 600,
             "num_images": 2,
             "tokenizer": {
-              "model_path": "/limx/tos/users/liyinhao/projects/eagle2_hg_model",
               "type": "PretrainedTokenizer"
             },
             "type": "ProcessPromptsWithImage"
@@ -430,7 +427,7 @@
           "proprio"
         ]
       },
-      "seed": 1,
       "statistic_keys": [
         "observation.state",
         "timestamp",

           "max_len": 600,
           "num_images": 2,
           "tokenizer": {
+            "model_path": "fluxvla/models/third_party_models/eagle2_hg_model",
             "type": "PretrainedTokenizer"
           },
           "type": "ProcessPromptsWithImage"
         "num_attention_heads": 32,
         "num_layers": 16,
         "output_dim": 1024,
+        "positional_embeddings": null
       },
       "hidden_size": 1024,
       "input_embedding_dim": 1536,
     "lr_scheduler_type": "linear-warmup+cosine-decay",
     "max_epochs": 18,
     "max_grad_norm": 1.0,
     "metric": {
       "active_trackers": [
         "jsonl",
+        "tensorboard"
       ],
       "grad_accumulation_steps": 1,
       "run_dir": "work_dirs",
     "mixed_precision_dtype": "bf16",
     "sampler": null,
     "tokenizer": {
+      "model_path": "fluxvla/models/third_party_models/eagle2_hg_model",
       "type": "PretrainedTokenizer"
     },
     "type": "FSDPTrainRunner",
             "max_len": 600,
             "num_images": 2,
             "tokenizer": {
+              "model_path": "fluxvla/models/third_party_models/eagle2_hg_model",
               "type": "PretrainedTokenizer"
             },
             "type": "ProcessPromptsWithImage"
           "proprio"
         ]
       },
+      "seed": 7,
       "statistic_keys": [
         "observation.state",
         "timestamp",

gr00t_eagle_3b_libero_10_full_finetune_bs64/config.yaml CHANGED Viewed

@@ -32,7 +32,7 @@ eval:
     - max_len: 600
       num_images: 2
       tokenizer:
-        model_path: /limx/tos/users/liyinhao/projects/eagle2_hg_model
         type: PretrainedTokenizer
       type: ProcessPromptsWithImage
     - gripper_key: robot0_gripper_qpos
@@ -70,7 +70,6 @@ inference_model:
       num_layers: 16
       output_dim: 1024
       positional_embeddings: null
-      use_torch_compile: true
     hidden_size: 1024
     input_embedding_dim: 1536
     num_heads: 4
@@ -132,12 +131,10 @@ runner:
   lr_scheduler_type: linear-warmup+cosine-decay
   max_epochs: 18
   max_grad_norm: 1.0
-  max_keep_ckpts: 1
-  max_steps: null
   metric:
     active_trackers:
     - jsonl
-    - wandb
     grad_accumulation_steps: 1
     run_dir: work_dirs
     type: VLAMetric
@@ -145,7 +142,7 @@ runner:
   mixed_precision_dtype: bf16
   sampler: null
   tokenizer:
-    model_path: /limx/tos/users/liyinhao/projects/eagle2_hg_model
     type: PretrainedTokenizer
   type: FSDPTrainRunner
   warmup_ratio: 0.03
@@ -296,7 +293,7 @@ train_dataloader:
       - max_len: 600
         num_images: 2
         tokenizer:
-          model_path: /limx/tos/users/liyinhao/projects/eagle2_hg_model
           type: PretrainedTokenizer
         type: ProcessPromptsWithImage
       - height: 224
@@ -331,7 +328,7 @@ train_dataloader:
       - action
       observation.state:
       - proprio
-    seed: 1
     statistic_keys:
     - observation.state
     - timestamp

     - max_len: 600
       num_images: 2
       tokenizer:
+        model_path: fluxvla/models/third_party_models/eagle2_hg_model
         type: PretrainedTokenizer
       type: ProcessPromptsWithImage
     - gripper_key: robot0_gripper_qpos
       num_layers: 16
       output_dim: 1024
       positional_embeddings: null
     hidden_size: 1024
     input_embedding_dim: 1536
     num_heads: 4
   lr_scheduler_type: linear-warmup+cosine-decay
   max_epochs: 18
   max_grad_norm: 1.0
   metric:
     active_trackers:
     - jsonl
+    - tensorboard
     grad_accumulation_steps: 1
     run_dir: work_dirs
     type: VLAMetric
   mixed_precision_dtype: bf16
   sampler: null
   tokenizer:
+    model_path: fluxvla/models/third_party_models/eagle2_hg_model
     type: PretrainedTokenizer
   type: FSDPTrainRunner
   warmup_ratio: 0.03
       - max_len: 600
         num_images: 2
         tokenizer:
+          model_path: fluxvla/models/third_party_models/eagle2_hg_model
           type: PretrainedTokenizer
         type: ProcessPromptsWithImage
       - height: 224
       - action
       observation.state:
       - proprio
+    seed: 7
     statistic_keys:
     - observation.state
     - timestamp

gr00t_eagle_3b_libero_10_full_finetune_bs64/gr00t_eagle_3b_libero_10_full_finetune_2026_05_14_02_41_08.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

gr00t_eagle_3b_libero_10_full_finetune_bs64/run-metrics.jsonl CHANGED Viewed

@@ -1 +1 @@

- {"hparams": "{'model': {'type': 'LlavaVLA', 'pretrained_name_or_path': './checkpoints/GR00T-N1.5-3B', 'vlm_backbone': {'type': 'EagleBackbone', 'dtype': 'bf16', 'vlm_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}, 'vla_head': {'type': 'FlowMatchingHead', 'state_dim': 64, 'hidden_size': 1024, 'input_embedding_dim': 1536, 'num_layers': 1, 'num_heads': 4, 'num_inference_timesteps': 4, 'traj_length': 10, 'action_dim': 32, 'ori_action_dim': 7}, 'freeze_vlm_backbone': False, 'name_mapping': {'vlm_backbone.vlm': 'backbone.eagle_model', 'vla_head': 'action_head'}, 'freeze_projector': False}, 'inference_model': {'type': 'LlavaVLA', 'pretrained_name_or_path': './checkpoints/GR00T-N1.5-3B', 'vlm_backbone': {'type': 'EagleInferenceBackbone', 'vlm_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}, 'vla_head': {'type': 'FlowMatchingInferenceHead', 'state_dim': 64, 'hidden_size': 1024, 'input_embedding_dim': 1536, 'num_layers': 1, 'num_heads': 4, 'num_inference_timesteps': 4, 'traj_length': 10, 'action_dim': 32, 'ori_action_dim': 7, 'diffusion_model_cfg': {'attention_head_dim': 48, 'cross_attention_dim': 2048, 'dropout': 0.2, 'final_dropout': True, 'interleave_self_attention': True, 'norm_type': 'ada_norm', 'num_attention_heads': 32, 'num_layers': 16, 'output_dim': 1024, 'use_torch_compile': True, 'positional_embeddings': None}}}, 'train_dataloader': {'per_device_batch_size': 8, 'per_device_num_workers': 4, 'dataset': {'type': 'DistributedRepeatingDataset', 'seed': 1, 'name_mappings': {'observation.state': ['proprio'], 'action': ['action']}, 'dataset_statistics': {'libero_10_no_noops': {'proprio': {'mean': [-0.0419132679050224, 0.034591788297521735, 0.8265881844959498, 2.90259518190321, -0.5570652600832564, -0.16592166873533284, 0.02845031351083622, -0.02880236273799356], 'std': [0.03756502182067285, 0.05091765880150317, 0.09107525593038836, 0.12327524826514363, 0.4418352294043351, 0.12490994022681218, 0.004662133639412193, 0.00460807817987938], 'min': [-0.48278069496154785, -0.3309336006641388, 0.44550687074661255, 1.1323540210723877, -3.6312508583068848, -1.842738389968872, -0.005453015677630901, -0.04112039878964424], 'max': [0.2103137969970703, 0.38887521624565125, 1.333192229270935, 3.7248642444610596, 3.5618896484375, 1.3863215446472168, 0.041575800627470016, 0.0013126095291227102], 'q01': [-0.1855636807291125, -0.16145669766439186, 0.7064185725262808, 2.5678211534702324, -1.2430377303522737, -0.5195810482339626, 0.01022917473133343, -0.03999379658232052], 'q99': [0.05938728483051665, 0.2361478409238694, 0.9397258571145816, 3.2118708728143526, 0.49082919816100534, 0.2100883989120329, 0.040047131839991014, -0.011104049991952391]}, 'timestamp': {'mean': [7.007510548523206], 'std': [4.457129586378845], 'min': [0.0], 'max': [25.2], 'q01': None, 'q99': None}, 'action': {'mean': [0.01905656634877842, 0.05672475971568838, -0.056239289430234256, 0.004756678478841528, 0.002797492338491304, -0.00714607048416358, 0.54599156235075], 'std': [0.10588348353857541, 0.13552477199270377, 0.13886650724555177, 0.01433739270759898, 0.02038583948325967, 0.033299202425577934, 0.1881810653484855], 'min': [-0.9375, -0.9375, -0.9375, -0.23642857372760773, -0.3053571283817291, -0.3642857074737549, 0.0], 'max': [0.9375, 0.9375, 0.9375, 0.32892856001853943, 0.36964285373687744, 0.375, 1.0], 'q01': [-0.4997477764535965, -0.6992653512084763, -0.6543309163615124, -0.07417070079989778, -0.11898748445770971, -0.15976085962510805, 0.0], 'q99': [0.658747846713789, 0.7333480638990948, 0.768601965587579, 0.09784501244893279, 0.12943469061349036, 0.15137893471596325, 1.0]}}}, 'statistic_keys': ['observation.state', 'timestamp', 'action'], 'statistic_name': 'libero_10_no_noops', 'datasets': {'type': 'ParquetDataset', 'data_root_path': 'datasets/libero_10_no_noops_lerobotv2.1', 'transforms': [{'type': 'ProcessParquetInputs', 'embodiment_id': 2, 'parquet_keys': ['observation.state', 'timestamp', 'actions', 'info', 'stats', 'action_masks'], 'video_keys': ['observation.images.image', 'observation.images.wrist_image'], 'name_mappings': {'observation.state': ['states'], 'actions': ['actions']}}, {'type': 'ParquetPrompter'}, {'type': 'ProcessPromptsWithImage', 'max_len': 600, 'num_images': 2, 'tokenizer': {'type': 'PretrainedTokenizer', 'model_path': './checkpoints/eagle2_hg_model'}}, {'type': 'ResizeImages', 'height': 224, 'width': 224}, {'type': 'NormalizeImages', 'means': [[123.515625, 116.04492188, 103.59375], [123.515625, 116.04492188, 103.59375]], 'stds': [[58.27148438, 57.02636719, 57.27539062], [58.27148438, 57.02636719, 57.27539062]]}, {'type': 'NormalizeStatesAndActions', 'action_dim': 32, 'state_dim': 64, 'state_key': 'proprio', 'action_key': 'action', 'norm_type': 'mean_std'}], 'action_window_size': 10, 'action_key': 'action', 'use_delta': False, 'statistic_name': 'libero_10_no_noops', 'window_start_idx': 0}}}, 'runner': {'type': 'FSDPTrainRunner', 'max_epochs': 18, 'learning_rate': 1.5e-05, 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'sampler': None, 'tokenizer': {'type': 'PretrainedTokenizer', 'model_path': './checkpoints/eagle2_hg_model'}, 'collator': {'type': 'DictCollator', 'keys': ['states', 'observation.eepose', 'timestamp', 'images', 'img_masks', 'lang_tokens', 'lang_masks', 'actions', 'action_masks', 'embodiment_ids'], 'meta_keys': ['task_description', 'prompt', 'info', 'stats']}, 'metric': {'type': 'VLAMetric', 'active_trackers': ('jsonl', 'wandb'), 'run_dir': './work_dirs/73d1dcc4f_gr00t_eagle_3b_libero_10_full_finetune_bs64_seed1', 'grad_accumulation_steps': 1, 'window_size': 1, 'hparams': Config (path: configs/gr00t/gr00t_eagle_3b_libero_10_full_finetune.py): {'model': {'type': 'LlavaVLA', 'pretrained_name_or_path': './checkpoints/GR00T-N1.5-3B', 'vlm_backbone': {'type': 'EagleBackbone', 'dtype': 'bf16', 'vlm_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}, 'vla_head': {'type': 'FlowMatchingHead', 'state_dim': 64, 'hidden_size': 1024, 'input_embedding_dim': 1536, 'num_layers': 1, 'num_heads': 4, 'num_inference_timesteps': 4, 'traj_length': 10, 'action_dim': 32, 'ori_action_dim': 7}, 'freeze_vlm_backbone': False, 'name_mapping': {'vlm_backbone.vlm': 'backbone.eagle_model', 'vla_head': 'action_head'}, 'freeze_projector': False}, 'inference_model': {'type': 'LlavaVLA', 'pretrained_name_or_path': './checkpoints/GR00T-N1.5-3B', 'vlm_backbone': {'type': 'EagleInferenceBackbone', 'vlm_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}, 'vla_head': {'type': 'FlowMatchingInferenceHead', 'state_dim': 64, 'hidden_size': 1024, 'input_embedding_dim': 1536, 'num_layers': 1, 'num_heads': 4, 'num_inference_timesteps': 4, 'traj_length': 10, 'action_dim': 32, 'ori_action_dim': 7, 'diffusion_model_cfg': {'attention_head_dim': 48, 'cross_attention_dim': 2048, 'dropout': 0.2, 'final_dropout': True, 'interleave_self_attention': True, 'norm_type': 'ada_norm', 'num_attention_heads': 32, 'num_layers': 16, 'output_dim': 1024, 'use_torch_compile': True, 'positional_embeddings': None}}}, 'train_dataloader': {'per_device_batch_size': 8, 'per_device_num_workers': 4, 'dataset': {'type': 'DistributedRepeatingDataset', 'seed': 1, 'name_mappings': {'observation.state': ['proprio'], 'action': ['action']}, 'dataset_statistics': {'libero_10_no_noops': {'proprio': {'mean': [-0.0419132679050224, 0.034591788297521735, 0.8265881844959498, 2.90259518190321, -0.5570652600832564, -0.16592166873533284, 0.02845031351083622, -0.02880236273799356], 'std': [0.03756502182067285, 0.05091765880150317, 0.09107525593038836, 0.12327524826514363, 0.4418352294043351, 0.12490994022681218, 0.004662133639412193, 0.00460807817987938], 'min': [-0.48278069496154785, -0.3309336006641388, 0.44550687074661255, 1.1323540210723877, -3.6312508583068848, -1.842738389968872, -0.005453015677630901, -0.04112039878964424], 'max': [0.2103137969970703, 0.38887521624565125, 1.333192229270935, 3.7248642444610596, 3.5618896484375, 1.3863215446472168, 0.041575800627470016, 0.0013126095291227102], 'q01': [-0.1855636807291125, -0.16145669766439186, 0.7064185725262808, 2.5678211534702324, -1.2430377303522737, -0.5195810482339626, 0.01022917473133343, -0.03999379658232052], 'q99': [0.05938728483051665, 0.2361478409238694, 0.9397258571145816, 3.2118708728143526, 0.49082919816100534, 0.2100883989120329, 0.040047131839991014, -0.011104049991952391]}, 'timestamp': {'mean': [7.007510548523206], 'std': [4.457129586378845], 'min': [0.0], 'max': [25.2], 'q01': None, 'q99': None}, 'action': {'mean': [0.01905656634877842, 0.05672475971568838, -0.056239289430234256, 0.004756678478841528, 0.002797492338491304, -0.00714607048416358, 0.54599156235075], 'std': [0.10588348353857541, 0.13552477199270377, 0.13886650724555177, 0.01433739270759898, 0.02038583948325967, 0.033299202425577934, 0.1881810653484855], 'min': [-0.9375, -0.9375, -0.9375, -0.23642857372760773, -0.3053571283817291, -0.3642857074737549, 0.0], 'max': [0.9375, 0.9375, 0.9375, 0.32892856001853943, 0.36964285373687744, 0.375, 1.0], 'q01': [-0.4997477764535965, -0.6992653512084763, -0.6543309163615124, -0.07417070079989778, -0.11898748445770971, -0.15976085962510805, 0.0], 'q99': [0.658747846713789, 0.7333480638990948, 0.768601965587579, 0.09784501244893279, 0.12943469061349036, 0.15137893471596325, 1.0]}}}, 'statistic_keys': ['observation.state', 'timestamp', 'action'], 'statistic_name': 'libero_10_no_noops', 'datasets': {'type': 'ParquetDataset', 'data_root_path': 'datasets/libero_10_no_noops_lerobotv2.1', 'transforms': [{'type': 'ProcessParquetInputs', 'embodiment_id': 2, 'parquet_keys': ['observation.state', 'timestamp', 'actions', 'info', 'stats', 'action_masks'], 'video_keys': ['observation.images.image', 'observation.images.wrist_image'], 'name_mappings': {'observation.state': ['states'], 'actions': ['actions']}}, {'type': 'ParquetPrompter'}, {'type': 'ProcessPromptsWithImage', 'max_len': 600, 'num_images': 2, 'tokenizer': {'type': 'PretrainedTokenizer', 'model_path': './checkpoints/eagle2_hg_model'}}, {'type': 'ResizeImages', 'height': 224, 'width': 224}, {'type': 'NormalizeImages', 'means': [[123.515625, 116.04492188, 103.59375], [123.515625, 116.04492188, 103.59375]], 'stds': [[58.27148438, 57.02636719, 57.27539062], [58.27148438, 57.02636719, 57.27539062]]}, {'type': 'NormalizeStatesAndActions', 'action_dim': 32, 'state_dim': 64, 'state_key': 'proprio', 'action_key': 'action', 'norm_type': 'mean_std'}], 'action_window_size': 10, 'action_key': 'action', 'use_delta': False, 'statistic_name': 'libero_10_no_noops', 'window_start_idx': 0}}}, 'runner': {'type': 'FSDPTrainRunner', 'max_epochs': 18, 'learning_rate': 1.5e-05, 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'sampler': None, 'tokenizer': {'type': 'PretrainedTokenizer', 'model_path': './checkpoints/eagle2_hg_model'}, 'collator': {'type': 'DictCollator', 'keys': ['states', 'observation.eepose', 'timestamp', 'images', 'img_masks', 'lang_tokens', 'lang_masks', 'actions', 'action_masks', 'embodiment_ids'], 'meta_keys': ['task_description', 'prompt', 'info', 'stats']}, 'metric': {'type': 'VLAMetric', 'active_trackers': ('jsonl', 'wandb'), 'run_dir': './work_dirs/73d1dcc4f_gr00t_eagle_3b_libero_10_full_finetune_bs64_seed1', 'grad_accumulation_steps': 1, 'window_size': 1, 'hparams': Config (path: configs/gr00t/gr00t_eagle_3b_libero_10_full_finetune.py): {...}, 'run_id': 'gr00t_eagle_3b_libero_10_full_finetune_2026_03_15_08_28_06'}, 'lr_scheduler_type': 'linear-warmup+cosine-decay', 'warmup_ratio': 0.03, 'enable_gradient_checkpointing': False, 'enable_mixed_precision_training': True, 'mixed_precision_dtype': 'bf16', 'change_key_name': False, 'max_steps': None, 'max_keep_ckpts': 1, 'cfg': Config (path: configs/gr00t/gr00t_eagle_3b_libero_10_full_finetune.py): {...}, 'args': Namespace(config='configs/gr00t/gr00t_eagle_3b_libero_10_full_finetune.py', work_dir='./work_dirs/73d1dcc4f_gr00t_eagle_3b_libero_10_full_finetune_bs64_seed1', cfg_options={'train_dataloader.per_device_batch_size': 8, 'runner.max_epochs': 18, 'runner.max_steps': None, 'runner.max_keep_ckpts': 1, 'train_dataloader.dataset.seed': 1}, eval_after_train=True, resume_from=None)}, 'eval': {'type': 'LiberoEvalRunner', 'task_suite_name': 'libero_10', 'model_family': 'pi0', 'eval_chunk_size': 10, 'resize_size': 224, 'num_trials_per_task': 50, 'num_steps_wait': 10, 'seed': 7, 'dataset': {'type': 'LiberoParquetEvalDataset', 'transforms': [{'type': 'ProcessLiberoEvalInputs', 'embodiment_id': 2, 'img_keys': ['agentview_image', 'robot0_eye_in_hand_image']}, {'type': 'TransformImage', 'image_resize_strategy': 'resize-naive', 'input_sizes': [[3, 224, 224], [3, 224, 224]], 'means': [[123.515625, 116.04492188, 103.59375], [123.515625, 116.04492188, 103.59375]], 'stds': [[58.27148438, 57.02636719, 57.27539062], [58.27148438, 57.02636719, 57.27539062]]}, {'type': 'ProcessPromptsWithImage', 'max_len': 600, 'num_images': 2, 'tokenizer': {'type': 'PretrainedTokenizer', 'model_path': './checkpoints/eagle2_hg_model'}}, {'type': 'LiberoProprioFromInputs', 'state_dim': 64, 'norm_type': 'mean_std', 'pos_key': 'robot0_eef_pos', 'quat_key': 'robot0_eef_quat', 'gripper_key': 'robot0_gripper_qpos', 'out_key': 'states'}]}, 'denormalize_action': {'type': 'DenormalizeLiberoAction', 'norm_type': 'mean_std'}}}, 'run_id': 'gr00t_eagle_3b_libero_10_full_finetune_2026_03_15_08_28_06'}, 'lr_scheduler_type': 'linear-warmup+cosine-decay', 'warmup_ratio': 0.03, 'enable_gradient_checkpointing': False, 'enable_mixed_precision_training': True, 'mixed_precision_dtype': 'bf16', 'change_key_name': False, 'max_steps': None, 'max_keep_ckpts': 1, 'cfg': Config (path: configs/gr00t/gr00t_eagle_3b_libero_10_full_finetune.py): {'model': {'type': 'LlavaVLA', 'pretrained_name_or_path': './checkpoints/GR00T-N1.5-3B', 'vlm_backbone': {'type': 'EagleBackbone', 'dtype': 'bf16', 'vlm_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}, 'vla_head': {'type': 'FlowMatchingHead', 'state_dim': 64, 'hidden_size': 1024, 'input_embedding_dim': 1536, 'num_layers': 1, 'num_heads': 4, 'num_inference_timesteps': 4, 'traj_length': 10, 'action_dim': 32, 'ori_action_dim': 7}, 'freeze_vlm_backbone': False, 'name_mapping': {'vlm_backbone.vlm': 'backbone.eagle_model', 'vla_head': 'action_head'}, 'freeze_projector': False}, 'inference_model': {'type': 'LlavaVLA', 'pretrained_name_or_path': './checkpoints/GR00T-N1.5-3B', 'vlm_backbone': {'type': 'EagleInferenceBackbone', 'vlm_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}, 'vla_head': {'type': 'FlowMatchingInferenceHead', 'state_dim': 64, 'hidden_size': 1024, 'input_embedding_dim': 1536, 'num_layers': 1, 'num_heads': 4, 'num_inference_timesteps': 4, 'traj_length': 10, 'action_dim': 32, 'ori_action_dim': 7, 'diffusion_model_cfg': {'attention_head_dim': 48, 'cross_attention_dim': 2048, 'dropout': 0.2, 'final_dropout': True, 'interleave_self_attention': True, 'norm_type': 'ada_norm', 'num_attention_heads': 32, 'num_layers': 16, 'output_dim': 1024, 'use_torch_compile': True, 'positional_embeddings': None}}}, 'train_dataloader': {'per_device_batch_size': 8, 'per_device_num_workers': 4, 'dataset': {'type': 'DistributedRepeatingDataset', 'seed': 1, 'name_mappings': {'observation.state': ['proprio'], 'action': ['action']}, 'dataset_statistics': {'libero_10_no_noops': {'proprio': {'mean': [-0.0419132679050224, 0.034591788297521735, 0.8265881844959498, 2.90259518190321, -0.5570652600832564, -0.16592166873533284, 0.02845031351083622, -0.02880236273799356], 'std': [0.03756502182067285, 0.05091765880150317, 0.09107525593038836, 0.12327524826514363, 0.4418352294043351, 0.12490994022681218, 0.004662133639412193, 0.00460807817987938], 'min': [-0.48278069496154785, -0.3309336006641388, 0.44550687074661255, 1.1323540210723877, -3.6312508583068848, -1.842738389968872, -0.005453015677630901, -0.04112039878964424], 'max': [0.2103137969970703, 0.38887521624565125, 1.333192229270935, 3.7248642444610596, 3.5618896484375, 1.3863215446472168, 0.041575800627470016, 0.0013126095291227102], 'q01': [-0.1855636807291125, -0.16145669766439186, 0.7064185725262808, 2.5678211534702324, -1.2430377303522737, -0.5195810482339626, 0.01022917473133343, -0.03999379658232052], 'q99': [0.05938728483051665, 0.2361478409238694, 0.9397258571145816, 3.2118708728143526, 0.49082919816100534, 0.2100883989120329, 0.040047131839991014, -0.011104049991952391]}, 'timestamp': {'mean': [7.007510548523206], 'std': [4.457129586378845], 'min': [0.0], 'max': [25.2], 'q01': None, 'q99': None}, 'action': {'mean': [0.01905656634877842, 0.05672475971568838, -0.056239289430234256, 0.004756678478841528, 0.002797492338491304, -0.00714607048416358, 0.54599156235075], 'std': [0.10588348353857541, 0.13552477199270377, 0.13886650724555177, 0.01433739270759898, 0.02038583948325967, 0.033299202425577934, 0.1881810653484855], 'min': [-0.9375, -0.9375, -0.9375, -0.23642857372760773, -0.3053571283817291, -0.3642857074737549, 0.0], 'max': [0.9375, 0.9375, 0.9375, 0.32892856001853943, 0.36964285373687744, 0.375, 1.0], 'q01': [-0.4997477764535965, -0.6992653512084763, -0.6543309163615124, -0.07417070079989778, -0.11898748445770971, -0.15976085962510805, 0.0], 'q99': [0.658747846713789, 0.7333480638990948, 0.768601965587579, 0.09784501244893279, 0.12943469061349036, 0.15137893471596325, 1.0]}}}, 'statistic_keys': ['observation.state', 'timestamp', 'action'], 'statistic_name': 'libero_10_no_noops', 'datasets': {'type': 'ParquetDataset', 'data_root_path': 'datasets/libero_10_no_noops_lerobotv2.1', 'transforms': [{'type': 'ProcessParquetInputs', 'embodiment_id': 2, 'parquet_keys': ['observation.state', 'timestamp', 'actions', 'info', 'stats', 'action_masks'], 'video_keys': ['observation.images.image', 'observation.images.wrist_image'], 'name_mappings': {'observation.state': ['states'], 'actions': ['actions']}}, {'type': 'ParquetPrompter'}, {'type': 'ProcessPromptsWithImage', 'max_len': 600, 'num_images': 2, 'tokenizer': {'type': 'PretrainedTokenizer', 'model_path': './checkpoints/eagle2_hg_model'}}, {'type': 'ResizeImages', 'height': 224, 'width': 224}, {'type': 'NormalizeImages', 'means': [[123.515625, 116.04492188, 103.59375], [123.515625, 116.04492188, 103.59375]], 'stds': [[58.27148438, 57.02636719, 57.27539062], [58.27148438, 57.02636719, 57.27539062]]}, {'type': 'NormalizeStatesAndActions', 'action_dim': 32, 'state_dim': 64, 'state_key': 'proprio', 'action_key': 'action', 'norm_type': 'mean_std'}], 'action_window_size': 10, 'action_key': 'action', 'use_delta': False, 'statistic_name': 'libero_10_no_noops', 'window_start_idx': 0}}}, 'runner': {'type': 'FSDPTrainRunner', 'max_epochs': 18, 'learning_rate': 1.5e-05, 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'sampler': None, 'tokenizer': {'type': 'PretrainedTokenizer', 'model_path': './checkpoints/eagle2_hg_model'}, 'collator': {'type': 'DictCollator', 'keys': ['states', 'observation.eepose', 'timestamp', 'images', 'img_masks', 'lang_tokens', 'lang_masks', 'actions', 'action_masks', 'embodiment_ids'], 'meta_keys': ['task_description', 'prompt', 'info', 'stats']}, 'metric': {'type': 'VLAMetric', 'active_trackers': ('jsonl', 'wandb'), 'run_dir': './work_dirs/73d1dcc4f_gr00t_eagle_3b_libero_10_full_finetune_bs64_seed1', 'grad_accumulation_steps': 1, 'window_size': 1, 'hparams': Config (path: configs/gr00t/gr00t_eagle_3b_libero_10_full_finetune.py): {...}, 'run_id': 'gr00t_eagle_3b_libero_10_full_finetune_2026_03_15_08_28_06'}, 'lr_scheduler_type': 'linear-warmup+cosine-decay', 'warmup_ratio': 0.03, 'enable_gradient_checkpointing': False, 'enable_mixed_precision_training': True, 'mixed_precision_dtype': 'bf16', 'change_key_name': False, 'max_steps': None, 'max_keep_ckpts': 1, 'cfg': Config (path: configs/gr00t/gr00t_eagle_3b_libero_10_full_finetune.py): {...}, 'args': Namespace(config='configs/gr00t/gr00t_eagle_3b_libero_10_full_finetune.py', work_dir='./work_dirs/73d1dcc4f_gr00t_eagle_3b_libero_10_full_finetune_bs64_seed1', cfg_options={'train_dataloader.per_device_batch_size': 8, 'runner.max_epochs': 18, 'runner.max_steps': None, 'runner.max_keep_ckpts': 1, 'train_dataloader.dataset.seed': 1}, eval_after_train=True, resume_from=None)}, 'eval': {'type': 'LiberoEvalRunner', 'task_suite_name': 'libero_10', 'model_family': 'pi0', 'eval_chunk_size': 10, 'resize_size': 224, 'num_trials_per_task': 50, 'num_steps_wait': 10, 'seed': 7, 'dataset': {'type': 'LiberoParquetEvalDataset', 'transforms': [{'type': 'ProcessLiberoEvalInputs', 'embodiment_id': 2, 'img_keys': ['agentview_image', 'robot0_eye_in_hand_image']}, {'type': 'TransformImage', 'image_resize_strategy': 'resize-naive', 'input_sizes': [[3, 224, 224], [3, 224, 224]], 'means': [[123.515625, 116.04492188, 103.59375], [123.515625, 116.04492188, 103.59375]], 'stds': [[58.27148438, 57.02636719, 57.27539062], [58.27148438, 57.02636719, 57.27539062]]}, {'type': 'ProcessPromptsWithImage', 'max_len': 600, 'num_images': 2, 'tokenizer': {'type': 'PretrainedTokenizer', 'model_path': './checkpoints/eagle2_hg_model'}}, {'type': 'LiberoProprioFromInputs', 'state_dim': 64, 'norm_type': 'mean_std', 'pos_key': 'robot0_eef_pos', 'quat_key': 'robot0_eef_quat', 'gripper_key': 'robot0_gripper_qpos', 'out_key': 'states'}]}, 'denormalize_action': {'type': 'DenormalizeLiberoAction', 'norm_type': 'mean_std'}}}, 'args': Namespace(config='configs/gr00t/gr00t_eagle_3b_libero_10_full_finetune.py', work_dir='./work_dirs/73d1dcc4f_gr00t_eagle_3b_libero_10_full_finetune_bs64_seed1', cfg_options={'train_dataloader.per_device_batch_size': 8, 'runner.max_epochs': 18, 'runner.max_steps': None, 'runner.max_keep_ckpts': 1, 'train_dataloader.dataset.seed': 1}, eval_after_train=True, resume_from=None)}, 'eval': {'type': 'LiberoEvalRunner', 'task_suite_name': 'libero_10', 'model_family': 'pi0', 'eval_chunk_size': 10, 'resize_size': 224, 'num_trials_per_task': 50, 'num_steps_wait': 10, 'seed': 7, 'dataset': {'type': 'LiberoParquetEvalDataset', 'transforms': [{'type': 'ProcessLiberoEvalInputs', 'embodiment_id': 2, 'img_keys': ['agentview_image', 'robot0_eye_in_hand_image']}, {'type': 'TransformImage', 'image_resize_strategy': 'resize-naive', 'input_sizes': [[3, 224, 224], [3, 224, 224]], 'means': [[123.515625, 116.04492188, 103.59375], [123.515625, 116.04492188, 103.59375]], 'stds': [[58.27148438, 57.02636719, 57.27539062], [58.27148438, 57.02636719, 57.27539062]]}, {'type': 'ProcessPromptsWithImage', 'max_len': 600, 'num_images': 2, 'tokenizer': {'type': 'PretrainedTokenizer', 'model_path': './checkpoints/eagle2_hg_model'}}, {'type': 'LiberoProprioFromInputs', 'state_dim': 64, 'norm_type': 'mean_std', 'pos_key': 'robot0_eef_pos', 'quat_key': 'robot0_eef_quat', 'gripper_key': 'robot0_gripper_qpos', 'out_key': 'states'}]}, 'denormalize_action': {'type': 'DenormalizeLiberoAction', 'norm_type': 'mean_std'}}}", "run_id": "gr00t_eagle_3b_libero_10_full_finetune_2026_03_15_08_28_06"}

+ {"hparams": "{'model': {'type': 'LlavaVLA', 'pretrained_name_or_path': './checkpoints/GR00T-N1.5-3B', 'vlm_backbone': {'type': 'EagleBackbone', 'dtype': 'bf16', 'vlm_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}, 'vla_head': {'type': 'FlowMatchingHead', 'state_dim': 64, 'hidden_size': 1024, 'input_embedding_dim': 1536, 'num_layers': 1, 'num_heads': 4, 'num_inference_timesteps': 4, 'traj_length': 10, 'action_dim': 32, 'ori_action_dim': 7}, 'freeze_vlm_backbone': False, 'name_mapping': {'vlm_backbone.vlm': 'backbone.eagle_model', 'vla_head': 'action_head'}, 'freeze_projector': False}, 'inference_model': {'type': 'LlavaVLA', 'pretrained_name_or_path': './checkpoints/GR00T-N1.5-3B', 'vlm_backbone': {'type': 'EagleInferenceBackbone', 'vlm_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}, 'vla_head': {'type': 'FlowMatchingInferenceHead', 'state_dim': 64, 'hidden_size': 1024, 'input_embedding_dim': 1536, 'num_layers': 1, 'num_heads': 4, 'num_inference_timesteps': 4, 'traj_length': 10, 'action_dim': 32, 'ori_action_dim': 7, 'diffusion_model_cfg': {'attention_head_dim': 48, 'cross_attention_dim': 2048, 'dropout': 0.2, 'final_dropout': True, 'interleave_self_attention': True, 'norm_type': 'ada_norm', 'num_attention_heads': 32, 'num_layers': 16, 'output_dim': 1024, 'positional_embeddings': None}}}, 'train_dataloader': {'per_device_batch_size': 8, 'per_device_num_workers': 4, 'dataset': {'type': 'DistributedRepeatingDataset', 'seed': 7, 'name_mappings': {'observation.state': ['proprio'], 'action': ['action']}, 'dataset_statistics': {'libero_10_no_noops': {'proprio': {'mean': [-0.0419132679050224, 0.034591788297521735, 0.8265881844959498, 2.90259518190321, -0.5570652600832564, -0.16592166873533284, 0.02845031351083622, -0.02880236273799356], 'std': [0.03756502182067285, 0.05091765880150317, 0.09107525593038836, 0.12327524826514363, 0.4418352294043351, 0.12490994022681218, 0.004662133639412193, 0.00460807817987938], 'min': [-0.48278069496154785, -0.3309336006641388, 0.44550687074661255, 1.1323540210723877, -3.6312508583068848, -1.842738389968872, -0.005453015677630901, -0.04112039878964424], 'max': [0.2103137969970703, 0.38887521624565125, 1.333192229270935, 3.7248642444610596, 3.5618896484375, 1.3863215446472168, 0.041575800627470016, 0.0013126095291227102], 'q01': [-0.1855636807291125, -0.16145669766439186, 0.7064185725262808, 2.5678211534702324, -1.2430377303522737, -0.5195810482339626, 0.01022917473133343, -0.03999379658232052], 'q99': [0.05938728483051665, 0.2361478409238694, 0.9397258571145816, 3.2118708728143526, 0.49082919816100534, 0.2100883989120329, 0.040047131839991014, -0.011104049991952391]}, 'timestamp': {'mean': [7.007510548523206], 'std': [4.457129586378845], 'min': [0.0], 'max': [25.2], 'q01': None, 'q99': None}, 'action': {'mean': [0.01905656634877842, 0.05672475971568838, -0.056239289430234256, 0.004756678478841528, 0.002797492338491304, -0.00714607048416358, 0.54599156235075], 'std': [0.10588348353857541, 0.13552477199270377, 0.13886650724555177, 0.01433739270759898, 0.02038583948325967, 0.033299202425577934, 0.1881810653484855], 'min': [-0.9375, -0.9375, -0.9375, -0.23642857372760773, -0.3053571283817291, -0.3642857074737549, 0.0], 'max': [0.9375, 0.9375, 0.9375, 0.32892856001853943, 0.36964285373687744, 0.375, 1.0], 'q01': [-0.4997477764535965, -0.6992653512084763, -0.6543309163615124, -0.07417070079989778, -0.11898748445770971, -0.15976085962510805, 0.0], 'q99': [0.658747846713789, 0.7333480638990948, 0.768601965587579, 0.09784501244893279, 0.12943469061349036, 0.15137893471596325, 1.0]}}}, 'statistic_keys': ['observation.state', 'timestamp', 'action'], 'statistic_name': 'libero_10_no_noops', 'datasets': {'type': 'ParquetDataset', 'data_root_path': 'datasets/libero_10_no_noops_lerobotv2.1', 'transforms': [{'type': 'ProcessParquetInputs', 'embodiment_id': 2, 'parquet_keys': ['observation.state', 'timestamp', 'actions', 'info', 'stats', 'action_masks'], 'video_keys': ['observation.images.image', 'observation.images.wrist_image'], 'name_mappings': {'observation.state': ['states'], 'actions': ['actions']}}, {'type': 'ParquetPrompter'}, {'type': 'ProcessPromptsWithImage', 'max_len': 600, 'num_images': 2, 'tokenizer': {'type': 'PretrainedTokenizer', 'model_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}}, {'type': 'ResizeImages', 'height': 224, 'width': 224}, {'type': 'NormalizeImages', 'means': [[123.515625, 116.04492188, 103.59375], [123.515625, 116.04492188, 103.59375]], 'stds': [[58.27148438, 57.02636719, 57.27539062], [58.27148438, 57.02636719, 57.27539062]]}, {'type': 'NormalizeStatesAndActions', 'action_dim': 32, 'state_dim': 64, 'state_key': 'proprio', 'action_key': 'action', 'norm_type': 'mean_std'}], 'action_window_size': 10, 'action_key': 'action', 'use_delta': False, 'statistic_name': 'libero_10_no_noops', 'window_start_idx': 0}}}, 'runner': {'type': 'FSDPTrainRunner', 'max_epochs': 18, 'learning_rate': 1.5e-05, 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'sampler': None, 'tokenizer': {'type': 'PretrainedTokenizer', 'model_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}, 'collator': {'type': 'DictCollator', 'keys': ['states', 'observation.eepose', 'timestamp', 'images', 'img_masks', 'lang_tokens', 'lang_masks', 'actions', 'action_masks', 'embodiment_ids'], 'meta_keys': ['task_description', 'prompt', 'info', 'stats']}, 'metric': {'type': 'VLAMetric', 'active_trackers': ['jsonl', 'tensorboard'], 'run_dir': '/limx/tos/users/jikun/wk_dir/gr00t/libero_10', 'grad_accumulation_steps': 1, 'window_size': 1, 'hparams': Config (path: configs/gr00t/gr00t_eagle_3b_libero_10_full_finetune.py): {'model': {'type': 'LlavaVLA', 'pretrained_name_or_path': './checkpoints/GR00T-N1.5-3B', 'vlm_backbone': {'type': 'EagleBackbone', 'dtype': 'bf16', 'vlm_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}, 'vla_head': {'type': 'FlowMatchingHead', 'state_dim': 64, 'hidden_size': 1024, 'input_embedding_dim': 1536, 'num_layers': 1, 'num_heads': 4, 'num_inference_timesteps': 4, 'traj_length': 10, 'action_dim': 32, 'ori_action_dim': 7}, 'freeze_vlm_backbone': False, 'name_mapping': {'vlm_backbone.vlm': 'backbone.eagle_model', 'vla_head': 'action_head'}, 'freeze_projector': False}, 'inference_model': {'type': 'LlavaVLA', 'pretrained_name_or_path': './checkpoints/GR00T-N1.5-3B', 'vlm_backbone': {'type': 'EagleInferenceBackbone', 'vlm_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}, 'vla_head': {'type': 'FlowMatchingInferenceHead', 'state_dim': 64, 'hidden_size': 1024, 'input_embedding_dim': 1536, 'num_layers': 1, 'num_heads': 4, 'num_inference_timesteps': 4, 'traj_length': 10, 'action_dim': 32, 'ori_action_dim': 7, 'diffusion_model_cfg': {'attention_head_dim': 48, 'cross_attention_dim': 2048, 'dropout': 0.2, 'final_dropout': True, 'interleave_self_attention': True, 'norm_type': 'ada_norm', 'num_attention_heads': 32, 'num_layers': 16, 'output_dim': 1024, 'positional_embeddings': None}}}, 'train_dataloader': {'per_device_batch_size': 8, 'per_device_num_workers': 4, 'dataset': {'type': 'DistributedRepeatingDataset', 'seed': 7, 'name_mappings': {'observation.state': ['proprio'], 'action': ['action']}, 'dataset_statistics': {'libero_10_no_noops': {'proprio': {'mean': [-0.0419132679050224, 0.034591788297521735, 0.8265881844959498, 2.90259518190321, -0.5570652600832564, -0.16592166873533284, 0.02845031351083622, -0.02880236273799356], 'std': [0.03756502182067285, 0.05091765880150317, 0.09107525593038836, 0.12327524826514363, 0.4418352294043351, 0.12490994022681218, 0.004662133639412193, 0.00460807817987938], 'min': [-0.48278069496154785, -0.3309336006641388, 0.44550687074661255, 1.1323540210723877, -3.6312508583068848, -1.842738389968872, -0.005453015677630901, -0.04112039878964424], 'max': [0.2103137969970703, 0.38887521624565125, 1.333192229270935, 3.7248642444610596, 3.5618896484375, 1.3863215446472168, 0.041575800627470016, 0.0013126095291227102], 'q01': [-0.1855636807291125, -0.16145669766439186, 0.7064185725262808, 2.5678211534702324, -1.2430377303522737, -0.5195810482339626, 0.01022917473133343, -0.03999379658232052], 'q99': [0.05938728483051665, 0.2361478409238694, 0.9397258571145816, 3.2118708728143526, 0.49082919816100534, 0.2100883989120329, 0.040047131839991014, -0.011104049991952391]}, 'timestamp': {'mean': [7.007510548523206], 'std': [4.457129586378845], 'min': [0.0], 'max': [25.2], 'q01': None, 'q99': None}, 'action': {'mean': [0.01905656634877842, 0.05672475971568838, -0.056239289430234256, 0.004756678478841528, 0.002797492338491304, -0.00714607048416358, 0.54599156235075], 'std': [0.10588348353857541, 0.13552477199270377, 0.13886650724555177, 0.01433739270759898, 0.02038583948325967, 0.033299202425577934, 0.1881810653484855], 'min': [-0.9375, -0.9375, -0.9375, -0.23642857372760773, -0.3053571283817291, -0.3642857074737549, 0.0], 'max': [0.9375, 0.9375, 0.9375, 0.32892856001853943, 0.36964285373687744, 0.375, 1.0], 'q01': [-0.4997477764535965, -0.6992653512084763, -0.6543309163615124, -0.07417070079989778, -0.11898748445770971, -0.15976085962510805, 0.0], 'q99': [0.658747846713789, 0.7333480638990948, 0.768601965587579, 0.09784501244893279, 0.12943469061349036, 0.15137893471596325, 1.0]}}}, 'statistic_keys': ['observation.state', 'timestamp', 'action'], 'statistic_name': 'libero_10_no_noops', 'datasets': {'type': 'ParquetDataset', 'data_root_path': 'datasets/libero_10_no_noops_lerobotv2.1', 'transforms': [{'type': 'ProcessParquetInputs', 'embodiment_id': 2, 'parquet_keys': ['observation.state', 'timestamp', 'actions', 'info', 'stats', 'action_masks'], 'video_keys': ['observation.images.image', 'observation.images.wrist_image'], 'name_mappings': {'observation.state': ['states'], 'actions': ['actions']}}, {'type': 'ParquetPrompter'}, {'type': 'ProcessPromptsWithImage', 'max_len': 600, 'num_images': 2, 'tokenizer': {'type': 'PretrainedTokenizer', 'model_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}}, {'type': 'ResizeImages', 'height': 224, 'width': 224}, {'type': 'NormalizeImages', 'means': [[123.515625, 116.04492188, 103.59375], [123.515625, 116.04492188, 103.59375]], 'stds': [[58.27148438, 57.02636719, 57.27539062], [58.27148438, 57.02636719, 57.27539062]]}, {'type': 'NormalizeStatesAndActions', 'action_dim': 32, 'state_dim': 64, 'state_key': 'proprio', 'action_key': 'action', 'norm_type': 'mean_std'}], 'action_window_size': 10, 'action_key': 'action', 'use_delta': False, 'statistic_name': 'libero_10_no_noops', 'window_start_idx': 0}}}, 'runner': {'type': 'FSDPTrainRunner', 'max_epochs': 18, 'learning_rate': 1.5e-05, 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'sampler': None, 'tokenizer': {'type': 'PretrainedTokenizer', 'model_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}, 'collator': {'type': 'DictCollator', 'keys': ['states', 'observation.eepose', 'timestamp', 'images', 'img_masks', 'lang_tokens', 'lang_masks', 'actions', 'action_masks', 'embodiment_ids'], 'meta_keys': ['task_description', 'prompt', 'info', 'stats']}, 'metric': {'type': 'VLAMetric', 'active_trackers': ['jsonl', 'tensorboard'], 'run_dir': '/limx/tos/users/jikun/wk_dir/gr00t/libero_10', 'grad_accumulation_steps': 1, 'window_size': 1, 'hparams': Config (path: configs/gr00t/gr00t_eagle_3b_libero_10_full_finetune.py): {...}, 'run_id': 'gr00t_eagle_3b_libero_10_full_finetune_2026_05_14_02_41_08'}, 'lr_scheduler_type': 'linear-warmup+cosine-decay', 'warmup_ratio': 0.03, 'enable_gradient_checkpointing': False, 'enable_mixed_precision_training': True, 'mixed_precision_dtype': 'bf16', 'change_key_name': False, 'cfg': Config (path: configs/gr00t/gr00t_eagle_3b_libero_10_full_finetune.py): {...}, 'args': Namespace(config='configs/gr00t/gr00t_eagle_3b_libero_10_full_finetune.py', work_dir='/limx/tos/users/jikun/wk_dir/gr00t/libero_10', cfg_options={'runner.metric.active_trackers': ['jsonl', 'tensorboard']}, eval_after_train=True, resume_from=None)}, 'eval': {'type': 'LiberoEvalRunner', 'task_suite_name': 'libero_10', 'model_family': 'pi0', 'eval_chunk_size': 10, 'resize_size': 224, 'num_trials_per_task': 50, 'num_steps_wait': 10, 'seed': 7, 'dataset': {'type': 'LiberoParquetEvalDataset', 'transforms': [{'type': 'ProcessLiberoEvalInputs', 'embodiment_id': 2, 'img_keys': ['agentview_image', 'robot0_eye_in_hand_image']}, {'type': 'TransformImage', 'image_resize_strategy': 'resize-naive', 'input_sizes': [[3, 224, 224], [3, 224, 224]], 'means': [[123.515625, 116.04492188, 103.59375], [123.515625, 116.04492188, 103.59375]], 'stds': [[58.27148438, 57.02636719, 57.27539062], [58.27148438, 57.02636719, 57.27539062]]}, {'type': 'ProcessPromptsWithImage', 'max_len': 600, 'num_images': 2, 'tokenizer': {'type': 'PretrainedTokenizer', 'model_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}}, {'type': 'LiberoProprioFromInputs', 'state_dim': 64, 'norm_type': 'mean_std', 'pos_key': 'robot0_eef_pos', 'quat_key': 'robot0_eef_quat', 'gripper_key': 'robot0_gripper_qpos', 'out_key': 'states'}]}, 'denormalize_action': {'type': 'DenormalizeLiberoAction', 'norm_type': 'mean_std'}}}, 'run_id': 'gr00t_eagle_3b_libero_10_full_finetune_2026_05_14_02_41_08'}, 'lr_scheduler_type': 'linear-warmup+cosine-decay', 'warmup_ratio': 0.03, 'enable_gradient_checkpointing': False, 'enable_mixed_precision_training': True, 'mixed_precision_dtype': 'bf16', 'change_key_name': False, 'cfg': Config (path: configs/gr00t/gr00t_eagle_3b_libero_10_full_finetune.py): {'model': {'type': 'LlavaVLA', 'pretrained_name_or_path': './checkpoints/GR00T-N1.5-3B', 'vlm_backbone': {'type': 'EagleBackbone', 'dtype': 'bf16', 'vlm_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}, 'vla_head': {'type': 'FlowMatchingHead', 'state_dim': 64, 'hidden_size': 1024, 'input_embedding_dim': 1536, 'num_layers': 1, 'num_heads': 4, 'num_inference_timesteps': 4, 'traj_length': 10, 'action_dim': 32, 'ori_action_dim': 7}, 'freeze_vlm_backbone': False, 'name_mapping': {'vlm_backbone.vlm': 'backbone.eagle_model', 'vla_head': 'action_head'}, 'freeze_projector': False}, 'inference_model': {'type': 'LlavaVLA', 'pretrained_name_or_path': './checkpoints/GR00T-N1.5-3B', 'vlm_backbone': {'type': 'EagleInferenceBackbone', 'vlm_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}, 'vla_head': {'type': 'FlowMatchingInferenceHead', 'state_dim': 64, 'hidden_size': 1024, 'input_embedding_dim': 1536, 'num_layers': 1, 'num_heads': 4, 'num_inference_timesteps': 4, 'traj_length': 10, 'action_dim': 32, 'ori_action_dim': 7, 'diffusion_model_cfg': {'attention_head_dim': 48, 'cross_attention_dim': 2048, 'dropout': 0.2, 'final_dropout': True, 'interleave_self_attention': True, 'norm_type': 'ada_norm', 'num_attention_heads': 32, 'num_layers': 16, 'output_dim': 1024, 'positional_embeddings': None}}}, 'train_dataloader': {'per_device_batch_size': 8, 'per_device_num_workers': 4, 'dataset': {'type': 'DistributedRepeatingDataset', 'seed': 7, 'name_mappings': {'observation.state': ['proprio'], 'action': ['action']}, 'dataset_statistics': {'libero_10_no_noops': {'proprio': {'mean': [-0.0419132679050224, 0.034591788297521735, 0.8265881844959498, 2.90259518190321, -0.5570652600832564, -0.16592166873533284, 0.02845031351083622, -0.02880236273799356], 'std': [0.03756502182067285, 0.05091765880150317, 0.09107525593038836, 0.12327524826514363, 0.4418352294043351, 0.12490994022681218, 0.004662133639412193, 0.00460807817987938], 'min': [-0.48278069496154785, -0.3309336006641388, 0.44550687074661255, 1.1323540210723877, -3.6312508583068848, -1.842738389968872, -0.005453015677630901, -0.04112039878964424], 'max': [0.2103137969970703, 0.38887521624565125, 1.333192229270935, 3.7248642444610596, 3.5618896484375, 1.3863215446472168, 0.041575800627470016, 0.0013126095291227102], 'q01': [-0.1855636807291125, -0.16145669766439186, 0.7064185725262808, 2.5678211534702324, -1.2430377303522737, -0.5195810482339626, 0.01022917473133343, -0.03999379658232052], 'q99': [0.05938728483051665, 0.2361478409238694, 0.9397258571145816, 3.2118708728143526, 0.49082919816100534, 0.2100883989120329, 0.040047131839991014, -0.011104049991952391]}, 'timestamp': {'mean': [7.007510548523206], 'std': [4.457129586378845], 'min': [0.0], 'max': [25.2], 'q01': None, 'q99': None}, 'action': {'mean': [0.01905656634877842, 0.05672475971568838, -0.056239289430234256, 0.004756678478841528, 0.002797492338491304, -0.00714607048416358, 0.54599156235075], 'std': [0.10588348353857541, 0.13552477199270377, 0.13886650724555177, 0.01433739270759898, 0.02038583948325967, 0.033299202425577934, 0.1881810653484855], 'min': [-0.9375, -0.9375, -0.9375, -0.23642857372760773, -0.3053571283817291, -0.3642857074737549, 0.0], 'max': [0.9375, 0.9375, 0.9375, 0.32892856001853943, 0.36964285373687744, 0.375, 1.0], 'q01': [-0.4997477764535965, -0.6992653512084763, -0.6543309163615124, -0.07417070079989778, -0.11898748445770971, -0.15976085962510805, 0.0], 'q99': [0.658747846713789, 0.7333480638990948, 0.768601965587579, 0.09784501244893279, 0.12943469061349036, 0.15137893471596325, 1.0]}}}, 'statistic_keys': ['observation.state', 'timestamp', 'action'], 'statistic_name': 'libero_10_no_noops', 'datasets': {'type': 'ParquetDataset', 'data_root_path': 'datasets/libero_10_no_noops_lerobotv2.1', 'transforms': [{'type': 'ProcessParquetInputs', 'embodiment_id': 2, 'parquet_keys': ['observation.state', 'timestamp', 'actions', 'info', 'stats', 'action_masks'], 'video_keys': ['observation.images.image', 'observation.images.wrist_image'], 'name_mappings': {'observation.state': ['states'], 'actions': ['actions']}}, {'type': 'ParquetPrompter'}, {'type': 'ProcessPromptsWithImage', 'max_len': 600, 'num_images': 2, 'tokenizer': {'type': 'PretrainedTokenizer', 'model_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}}, {'type': 'ResizeImages', 'height': 224, 'width': 224}, {'type': 'NormalizeImages', 'means': [[123.515625, 116.04492188, 103.59375], [123.515625, 116.04492188, 103.59375]], 'stds': [[58.27148438, 57.02636719, 57.27539062], [58.27148438, 57.02636719, 57.27539062]]}, {'type': 'NormalizeStatesAndActions', 'action_dim': 32, 'state_dim': 64, 'state_key': 'proprio', 'action_key': 'action', 'norm_type': 'mean_std'}], 'action_window_size': 10, 'action_key': 'action', 'use_delta': False, 'statistic_name': 'libero_10_no_noops', 'window_start_idx': 0}}}, 'runner': {'type': 'FSDPTrainRunner', 'max_epochs': 18, 'learning_rate': 1.5e-05, 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'sampler': None, 'tokenizer': {'type': 'PretrainedTokenizer', 'model_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}, 'collator': {'type': 'DictCollator', 'keys': ['states', 'observation.eepose', 'timestamp', 'images', 'img_masks', 'lang_tokens', 'lang_masks', 'actions', 'action_masks', 'embodiment_ids'], 'meta_keys': ['task_description', 'prompt', 'info', 'stats']}, 'metric': {'type': 'VLAMetric', 'active_trackers': ['jsonl', 'tensorboard'], 'run_dir': '/limx/tos/users/jikun/wk_dir/gr00t/libero_10', 'grad_accumulation_steps': 1, 'window_size': 1, 'hparams': Config (path: configs/gr00t/gr00t_eagle_3b_libero_10_full_finetune.py): {...}, 'run_id': 'gr00t_eagle_3b_libero_10_full_finetune_2026_05_14_02_41_08'}, 'lr_scheduler_type': 'linear-warmup+cosine-decay', 'warmup_ratio': 0.03, 'enable_gradient_checkpointing': False, 'enable_mixed_precision_training': True, 'mixed_precision_dtype': 'bf16', 'change_key_name': False, 'cfg': Config (path: configs/gr00t/gr00t_eagle_3b_libero_10_full_finetune.py): {...}, 'args': Namespace(config='configs/gr00t/gr00t_eagle_3b_libero_10_full_finetune.py', work_dir='/limx/tos/users/jikun/wk_dir/gr00t/libero_10', cfg_options={'runner.metric.active_trackers': ['jsonl', 'tensorboard']}, eval_after_train=True, resume_from=None)}, 'eval': {'type': 'LiberoEvalRunner', 'task_suite_name': 'libero_10', 'model_family': 'pi0', 'eval_chunk_size': 10, 'resize_size': 224, 'num_trials_per_task': 50, 'num_steps_wait': 10, 'seed': 7, 'dataset': {'type': 'LiberoParquetEvalDataset', 'transforms': [{'type': 'ProcessLiberoEvalInputs', 'embodiment_id': 2, 'img_keys': ['agentview_image', 'robot0_eye_in_hand_image']}, {'type': 'TransformImage', 'image_resize_strategy': 'resize-naive', 'input_sizes': [[3, 224, 224], [3, 224, 224]], 'means': [[123.515625, 116.04492188, 103.59375], [123.515625, 116.04492188, 103.59375]], 'stds': [[58.27148438, 57.02636719, 57.27539062], [58.27148438, 57.02636719, 57.27539062]]}, {'type': 'ProcessPromptsWithImage', 'max_len': 600, 'num_images': 2, 'tokenizer': {'type': 'PretrainedTokenizer', 'model_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}}, {'type': 'LiberoProprioFromInputs', 'state_dim': 64, 'norm_type': 'mean_std', 'pos_key': 'robot0_eef_pos', 'quat_key': 'robot0_eef_quat', 'gripper_key': 'robot0_gripper_qpos', 'out_key': 'states'}]}, 'denormalize_action': {'type': 'DenormalizeLiberoAction', 'norm_type': 'mean_std'}}}, 'args': Namespace(config='configs/gr00t/gr00t_eagle_3b_libero_10_full_finetune.py', work_dir='/limx/tos/users/jikun/wk_dir/gr00t/libero_10', cfg_options={'runner.metric.active_trackers': ['jsonl', 'tensorboard']}, eval_after_train=True, resume_from=None)}, 'eval': {'type': 'LiberoEvalRunner', 'task_suite_name': 'libero_10', 'model_family': 'pi0', 'eval_chunk_size': 10, 'resize_size': 224, 'num_trials_per_task': 50, 'num_steps_wait': 10, 'seed': 7, 'dataset': {'type': 'LiberoParquetEvalDataset', 'transforms': [{'type': 'ProcessLiberoEvalInputs', 'embodiment_id': 2, 'img_keys': ['agentview_image', 'robot0_eye_in_hand_image']}, {'type': 'TransformImage', 'image_resize_strategy': 'resize-naive', 'input_sizes': [[3, 224, 224], [3, 224, 224]], 'means': [[123.515625, 116.04492188, 103.59375], [123.515625, 116.04492188, 103.59375]], 'stds': [[58.27148438, 57.02636719, 57.27539062], [58.27148438, 57.02636719, 57.27539062]]}, {'type': 'ProcessPromptsWithImage', 'max_len': 600, 'num_images': 2, 'tokenizer': {'type': 'PretrainedTokenizer', 'model_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}}, {'type': 'LiberoProprioFromInputs', 'state_dim': 64, 'norm_type': 'mean_std', 'pos_key': 'robot0_eef_pos', 'quat_key': 'robot0_eef_quat', 'gripper_key': 'robot0_gripper_qpos', 'out_key': 'states'}]}, 'denormalize_action': {'type': 'DenormalizeLiberoAction', 'norm_type': 'mean_std'}}}", "run_id": "gr00t_eagle_3b_libero_10_full_finetune_2026_05_14_02_41_08"}

gr00t_eagle_3b_libero_10_full_finetune_bs64/tokenizer/merges.txt CHANGED Viewed

@@ -151385,4 +151385,4 @@ krÃ¤ fte
 áķ ·
 âį ¨
 âº Ł
-â½ Ĺ

 áķ ·
 âį ¨
 âº Ł
+â½ Ĺ

gr00t_eagle_3b_libero_goal_full_finetune_bs64/checkpoints/step-014886-epoch-18-loss=0.0550.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e5a6817ea595796a6cc1ee83363141da7e2d3f519183c7addfc37696a2895162
+size 10896783888

gr00t_eagle_3b_libero_goal_full_finetune_bs64/config.json ADDED Viewed

	@@ -0,0 +1,302 @@

+{
+  "eval": {
+    "dataset": {
+      "transforms": [
+        {
+          "embodiment_id": 2,
+          "img_keys": [
+            "agentview_image",
+            "robot0_eye_in_hand_image"
+          ],
+          "type": "ProcessLiberoEvalInputs"
+        },
+        {
+          "image_resize_strategy": "resize-naive",
+          "input_sizes": [
+            [
+              3,
+              224,
+              224
+            ],
+            [
+              3,
+              224,
+              224
+            ]
+          ],
+          "means": [
+            [
+              123.515625,
+              116.04492188,
+              103.59375
+            ],
+            [
+              123.515625,
+              116.04492188,
+              103.59375
+            ]
+          ],
+          "stds": [
+            [
+              58.27148438,
+              57.02636719,
+              57.27539062
+            ],
+            [
+              58.27148438,
+              57.02636719,
+              57.27539062
+            ]
+          ],
+          "type": "TransformImage"
+        },
+        {
+          "max_len": 600,
+          "num_images": 2,
+          "tokenizer": {
+            "model_path": "fluxvla/models/third_party_models/eagle2_hg_model",
+            "type": "PretrainedTokenizer"
+          },
+          "type": "ProcessPromptsWithImage"
+        },
+        {
+          "gripper_key": "robot0_gripper_qpos",
+          "norm_type": "mean_std",
+          "out_key": "states",
+          "pos_key": "robot0_eef_pos",
+          "quat_key": "robot0_eef_quat",
+          "state_dim": 64,
+          "type": "LiberoProprioFromInputs"
+        }
+      ],
+      "type": "LiberoParquetEvalDataset"
+    },
+    "denormalize_action": {
+      "norm_type": "mean_std",
+      "type": "DenormalizeLiberoAction"
+    },
+    "eval_chunk_size": 10,
+    "model_family": "pi0",
+    "num_steps_wait": 10,
+    "num_trials_per_task": 50,
+    "resize_size": 224,
+    "seed": 7,
+    "task_suite_name": "libero_goal",
+    "type": "LiberoEvalRunner"
+  },
+  "inference_model": {
+    "pretrained_name_or_path": "./checkpoints/GR00T-N1.5-3B",
+    "type": "LlavaVLA",
+    "vla_head": {
+      "action_dim": 32,
+      "diffusion_model_cfg": {
+        "attention_head_dim": 48,
+        "cross_attention_dim": 2048,
+        "dropout": 0.2,
+        "final_dropout": true,
+        "interleave_self_attention": true,
+        "norm_type": "ada_norm",
+        "num_attention_heads": 32,
+        "num_layers": 16,
+        "output_dim": 1024,
+        "positional_embeddings": null
+      },
+      "hidden_size": 1024,
+      "input_embedding_dim": 1536,
+      "num_heads": 4,
+      "num_inference_timesteps": 4,
+      "num_layers": 1,
+      "ori_action_dim": 7,
+      "state_dim": 64,
+      "traj_length": 10,
+      "type": "FlowMatchingInferenceHead"
+    },
+    "vlm_backbone": {
+      "type": "EagleInferenceBackbone",
+      "vlm_path": "fluxvla/models/third_party_models/eagle2_hg_model"
+    }
+  },
+  "model": {
+    "freeze_projector": false,
+    "freeze_vlm_backbone": false,
+    "name_mapping": {
+      "vla_head": "action_head",
+      "vlm_backbone.vlm": "backbone.eagle_model"
+    },
+    "pretrained_name_or_path": "./checkpoints/GR00T-N1.5-3B",
+    "type": "LlavaVLA",
+    "vla_head": {
+      "action_dim": 32,
+      "hidden_size": 1024,
+      "input_embedding_dim": 1536,
+      "num_heads": 4,
+      "num_inference_timesteps": 4,
+      "num_layers": 1,
+      "ori_action_dim": 7,
+      "state_dim": 64,
+      "traj_length": 10,
+      "type": "FlowMatchingHead"
+    },
+    "vlm_backbone": {
+      "type": "EagleBackbone",
+      "vlm_path": "fluxvla/models/third_party_models/eagle2_hg_model"
+    }
+  },
+  "runner": {
+    "change_key_name": false,
+    "collator": {
+      "keys": [
+        "states",
+        "observation.eepose",
+        "timestamp",
+        "images",
+        "img_masks",
+        "lang_tokens",
+        "lang_masks",
+        "actions",
+        "action_masks",
+        "embodiment_ids"
+      ],
+      "meta_keys": [
+        "task_description",
+        "prompt",
+        "info",
+        "stats"
+      ],
+      "type": "DictCollator"
+    },
+    "enable_gradient_checkpointing": false,
+    "enable_mixed_precision_training": true,
+    "learning_rate": 1.5e-05,
+    "lr_scheduler_type": "linear-warmup+cosine-decay",
+    "max_epochs": 18,
+    "max_grad_norm": 1.0,
+    "metric": {
+      "active_trackers": [
+        "jsonl",
+        "tensorboard"
+      ],
+      "grad_accumulation_steps": 1,
+      "run_dir": "work_dirs",
+      "type": "VLAMetric",
+      "window_size": 1
+    },
+    "mixed_precision_dtype": "bf16",
+    "sampler": null,
+    "tokenizer": {
+      "model_path": "fluxvla/models/third_party_models/eagle2_hg_model",
+      "type": "PretrainedTokenizer"
+    },
+    "type": "FSDPTrainRunner",
+    "warmup_ratio": 0.03,
+    "weight_decay": 0.0
+  },
+  "train_dataloader": {
+    "dataset": {
+      "datasets": {
+        "action_key": "action",
+        "action_window_size": 10,
+        "data_root_path": "datasets/libero_goal_no_noops_lerobotv2.1",
+        "statistic_name": "libero_goal_no_noops",
+        "transforms": [
+          {
+            "embodiment_id": 2,
+            "name_mappings": {
+              "actions": [
+                "actions"
+              ],
+              "observation.state": [
+                "states"
+              ]
+            },
+            "parquet_keys": [
+              "observation.state",
+              "timestamp",
+              "actions",
+              "info",
+              "stats",
+              "action_masks"
+            ],
+            "type": "ProcessParquetInputs",
+            "video_keys": [
+              "observation.images.image",
+              "observation.images.wrist_image"
+            ]
+          },
+          {
+            "type": "ParquetPrompter"
+          },
+          {
+            "max_len": 600,
+            "num_images": 2,
+            "tokenizer": {
+              "model_path": "fluxvla/models/third_party_models/eagle2_hg_model",
+              "type": "PretrainedTokenizer"
+            },
+            "type": "ProcessPromptsWithImage"
+          },
+          {
+            "height": 224,
+            "type": "ResizeImages",
+            "width": 224
+          },
+          {
+            "means": [
+              [
+                123.515625,
+                116.04492188,
+                103.59375
+              ],
+              [
+                123.515625,
+                116.04492188,
+                103.59375
+              ]
+            ],
+            "stds": [
+              [
+                58.27148438,
+                57.02636719,
+                57.27539062
+              ],
+              [
+                58.27148438,
+                57.02636719,
+                57.27539062
+              ]
+            ],
+            "type": "NormalizeImages"
+          },
+          {
+            "action_dim": 32,
+            "action_key": "action",
+            "norm_type": "mean_std",
+            "state_dim": 64,
+            "state_key": "proprio",
+            "type": "NormalizeStatesAndActions"
+          }
+        ],
+        "type": "ParquetDataset",
+        "use_delta": false,
+        "window_start_idx": 0
+      },
+      "name_mappings": {
+        "action": [
+          "action"
+        ],
+        "observation.state": [
+          "proprio"
+        ]
+      },
+      "statistic_keys": [
+        "observation.state",
+        "timestamp",
+        "action"
+      ],
+      "statistic_name": "libero_goal_no_noops",
+      "type": "DistributedRepeatingDataset"
+    },
+    "per_device_batch_size": 8,
+    "per_device_num_workers": 4
+  }
+}

gr00t_eagle_3b_libero_goal_full_finetune_bs64/config.yaml ADDED Viewed

	@@ -0,0 +1,220 @@

+eval:
+  dataset:
+    transforms:
+    - embodiment_id: 2
+      img_keys:
+      - agentview_image
+      - robot0_eye_in_hand_image
+      type: ProcessLiberoEvalInputs
+    - image_resize_strategy: resize-naive
+      input_sizes:
+      - - 3
+        - 224
+        - 224
+      - - 3
+        - 224
+        - 224
+      means:
+      - - 123.515625
+        - 116.04492188
+        - 103.59375
+      - - 123.515625
+        - 116.04492188
+        - 103.59375
+      stds:
+      - - 58.27148438
+        - 57.02636719
+        - 57.27539062
+      - - 58.27148438
+        - 57.02636719
+        - 57.27539062
+      type: TransformImage
+    - max_len: 600
+      num_images: 2
+      tokenizer:
+        model_path: fluxvla/models/third_party_models/eagle2_hg_model
+        type: PretrainedTokenizer
+      type: ProcessPromptsWithImage
+    - gripper_key: robot0_gripper_qpos
+      norm_type: mean_std
+      out_key: states
+      pos_key: robot0_eef_pos
+      quat_key: robot0_eef_quat
+      state_dim: 64
+      type: LiberoProprioFromInputs
+    type: LiberoParquetEvalDataset
+  denormalize_action:
+    norm_type: mean_std
+    type: DenormalizeLiberoAction
+  eval_chunk_size: 10
+  model_family: pi0
+  num_steps_wait: 10
+  num_trials_per_task: 50
+  resize_size: 224
+  seed: 7
+  task_suite_name: libero_goal
+  type: LiberoEvalRunner
+inference_model:
+  pretrained_name_or_path: ./checkpoints/GR00T-N1.5-3B
+  type: LlavaVLA
+  vla_head:
+    action_dim: 32
+    diffusion_model_cfg:
+      attention_head_dim: 48
+      cross_attention_dim: 2048
+      dropout: 0.2
+      final_dropout: true
+      interleave_self_attention: true
+      norm_type: ada_norm
+      num_attention_heads: 32
+      num_layers: 16
+      output_dim: 1024
+      positional_embeddings: null
+    hidden_size: 1024
+    input_embedding_dim: 1536
+    num_heads: 4
+    num_inference_timesteps: 4
+    num_layers: 1
+    ori_action_dim: 7
+    state_dim: 64
+    traj_length: 10
+    type: FlowMatchingInferenceHead
+  vlm_backbone:
+    type: EagleInferenceBackbone
+    vlm_path: fluxvla/models/third_party_models/eagle2_hg_model
+model:
+  freeze_projector: false
+  freeze_vlm_backbone: false
+  name_mapping:
+    vla_head: action_head
+    vlm_backbone.vlm: backbone.eagle_model
+  pretrained_name_or_path: ./checkpoints/GR00T-N1.5-3B
+  type: LlavaVLA
+  vla_head:
+    action_dim: 32
+    hidden_size: 1024
+    input_embedding_dim: 1536
+    num_heads: 4
+    num_inference_timesteps: 4
+    num_layers: 1
+    ori_action_dim: 7
+    state_dim: 64
+    traj_length: 10
+    type: FlowMatchingHead
+  vlm_backbone:
+    type: EagleBackbone
+    vlm_path: fluxvla/models/third_party_models/eagle2_hg_model
+runner:
+  change_key_name: false
+  collator:
+    keys:
+    - states
+    - observation.eepose
+    - timestamp
+    - images
+    - img_masks
+    - lang_tokens
+    - lang_masks
+    - actions
+    - action_masks
+    - embodiment_ids
+    meta_keys:
+    - task_description
+    - prompt
+    - info
+    - stats
+    type: DictCollator
+  enable_gradient_checkpointing: false
+  enable_mixed_precision_training: true
+  learning_rate: 1.5e-05
+  lr_scheduler_type: linear-warmup+cosine-decay
+  max_epochs: 18
+  max_grad_norm: 1.0
+  metric:
+    active_trackers:
+    - jsonl
+    - tensorboard
+    grad_accumulation_steps: 1
+    run_dir: work_dirs
+    type: VLAMetric
+    window_size: 1
+  mixed_precision_dtype: bf16
+  sampler: null
+  tokenizer:
+    model_path: fluxvla/models/third_party_models/eagle2_hg_model
+    type: PretrainedTokenizer
+  type: FSDPTrainRunner
+  warmup_ratio: 0.03
+  weight_decay: 0.0
+train_dataloader:
+  dataset:
+    datasets:
+      action_key: action
+      action_window_size: 10
+      data_root_path: datasets/libero_goal_no_noops_lerobotv2.1
+      statistic_name: libero_goal_no_noops
+      transforms:
+      - embodiment_id: 2
+        name_mappings:
+          actions:
+          - actions
+          observation.state:
+          - states
+        parquet_keys:
+        - observation.state
+        - timestamp
+        - actions
+        - info
+        - stats
+        - action_masks
+        type: ProcessParquetInputs
+        video_keys:
+        - observation.images.image
+        - observation.images.wrist_image
+      - type: ParquetPrompter
+      - max_len: 600
+        num_images: 2
+        tokenizer:
+          model_path: fluxvla/models/third_party_models/eagle2_hg_model
+          type: PretrainedTokenizer
+        type: ProcessPromptsWithImage
+      - height: 224
+        type: ResizeImages
+        width: 224
+      - means:
+        - - 123.515625
+          - 116.04492188
+          - 103.59375
+        - - 123.515625
+          - 116.04492188
+          - 103.59375
+        stds:
+        - - 58.27148438
+          - 57.02636719
+          - 57.27539062
+        - - 58.27148438
+          - 57.02636719
+          - 57.27539062
+        type: NormalizeImages
+      - action_dim: 32
+        action_key: action
+        norm_type: mean_std
+        state_dim: 64
+        state_key: proprio
+        type: NormalizeStatesAndActions
+      type: ParquetDataset
+      use_delta: false
+      window_start_idx: 0
+    name_mappings:
+      action:
+      - action
+      observation.state:
+      - proprio
+    statistic_keys:
+    - observation.state
+    - timestamp
+    - action
+    statistic_name: libero_goal_no_noops
+    type: DistributedRepeatingDataset
+  per_device_batch_size: 8
+  per_device_num_workers: 4

gr00t_eagle_3b_libero_goal_full_finetune_bs64/dataset_statistics.json ADDED Viewed

	@@ -0,0 +1,138 @@

+{
+  "libero_goal_no_noops": {
+    "proprio": {
+      "mean": [
+        -0.09891432758878499,
+        0.01489584178884142,
+        1.067322519907531,
+        2.8289916028867363,
+        0.31907902813946676,
+        -0.2782741362135122,
+        0.02821884616217808,
+        -0.02719759491844234
+      ],
+      "std": [
+        0.04140466877133655,
+        0.04117920944028847,
+        0.037538661473221754,
+        0.1955033740750787,
+        0.2574273555156476,
+        0.12620857483432396,
+        0.0053229405182870755,
+        0.005316267734440966
+      ],
+      "min": [
+        -0.46141287684440613,
+        -0.30136311054229736,
+        0.9083037972450256,
+        1.002794623374939,
+        -1.0517308712005615,
+        -1.5227035284042358,
+        -0.0021671096328645945,
+        -0.042015016078948975
+      ],
+      "max": [
+        0.13241106271743774,
+        0.3271525800228119,
+        1.472778081893921,
+        3.4731650352478027,
+        2.676265239715576,
+        0.6698114275932312,
+        0.04232141748070717,
+        0.001021005678921938
+      ],
+      "q01": [
+        -0.22800911694063627,
+        -0.10299188974829282,
+        0.9455820491176684,
+        2.608259821258135,
+        -0.18011099436472794,
+        -0.5618953405895196,
+        0.012391739034799726,
+        -0.039969403267763146
+      ],
+      "q99": [
+        -0.01061282617817482,
+        0.11139527808191847,
+        1.2117906450921032,
+        3.168615021869246,
+        0.6706572281431679,
+        0.05441701961452796,
+        0.04007683324960615,
+        -0.009863127064877238
+      ]
+    },
+    "timestamp": {
+      "mean": [
+        3.354542962472823
+      ],
+      "std": [
+        2.391036718656464
+      ],
+      "min": [
+        0.0
+      ],
+      "max": [
+        17.3
+      ],
+      "q01": null,
+      "q99": null
+    },
+    "action": {
+      "mean": [
+        0.04244028081958392,
+        0.03443110282231447,
+        -0.15229553502677498,
+        -0.0024877518145540465,
+        0.02584054000286765,
+        0.026984970605938637,
+        0.6345212227794035
+      ],
+      "std": [
+        0.15081003273695404,
+        0.13262089326077886,
+        0.18549323777289492,
+        0.020653428159559374,
+        0.029405301079120767,
+        0.03768659701327122,
+        0.1820141409830254
+      ],
+      "min": [
+        -0.9375,
+        -0.9375,
+        -0.9375,
+        -0.24214285612106323,
+        -0.375,
+        -0.2871428430080414,
+        0.0
+      ],
+      "max": [
+        0.9375,
+        0.9375,
+        0.9375,
+        0.3557142913341522,
+        0.375,
+        0.375,
+        1.0
+      ],
+      "q01": [
+        -0.6000398242585724,
+        -0.4840628973395442,
+        -0.828284557604454,
+        -0.08435403729704126,
+        -0.10102247173430137,
+        -0.08468755117369905,
+        0.22234615748180359
+      ],
+      "q99": [
+        0.7428644945783142,
+        0.5763548859545187,
+        0.6755035821278529,
+        0.09240677347557615,
+        0.17272708537297377,
+        0.14638082286503087,
+        1.0
+      ]
+    }
+  }
+}

gr00t_eagle_3b_libero_goal_full_finetune_bs64/gr00t_eagle_3b_libero_goal_full_finetune_2026_05_14_02_40_40.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

gr00t_eagle_3b_libero_goal_full_finetune_bs64/run-metrics.jsonl ADDED Viewed

	@@ -0,0 +1 @@

+ {"hparams": "{'model': {'type': 'LlavaVLA', 'pretrained_name_or_path': './checkpoints/GR00T-N1.5-3B', 'vlm_backbone': {'type': 'EagleBackbone', 'vlm_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}, 'vla_head': {'type': 'FlowMatchingHead', 'state_dim': 64, 'hidden_size': 1024, 'input_embedding_dim': 1536, 'num_layers': 1, 'num_heads': 4, 'num_inference_timesteps': 4, 'traj_length': 10, 'action_dim': 32, 'ori_action_dim': 7}, 'freeze_vlm_backbone': False, 'name_mapping': {'vlm_backbone.vlm': 'backbone.eagle_model', 'vla_head': 'action_head'}, 'freeze_projector': False}, 'inference_model': {'type': 'LlavaVLA', 'pretrained_name_or_path': './checkpoints/GR00T-N1.5-3B', 'vlm_backbone': {'type': 'EagleInferenceBackbone', 'vlm_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}, 'vla_head': {'type': 'FlowMatchingInferenceHead', 'state_dim': 64, 'hidden_size': 1024, 'input_embedding_dim': 1536, 'num_layers': 1, 'num_heads': 4, 'num_inference_timesteps': 4, 'traj_length': 10, 'action_dim': 32, 'ori_action_dim': 7, 'diffusion_model_cfg': {'attention_head_dim': 48, 'cross_attention_dim': 2048, 'dropout': 0.2, 'final_dropout': True, 'interleave_self_attention': True, 'norm_type': 'ada_norm', 'num_attention_heads': 32, 'num_layers': 16, 'output_dim': 1024, 'positional_embeddings': None}}}, 'train_dataloader': {'per_device_batch_size': 8, 'per_device_num_workers': 4, 'dataset': {'type': 'DistributedRepeatingDataset', 'name_mappings': {'observation.state': ['proprio'], 'action': ['action']}, 'statistic_keys': ['observation.state', 'timestamp', 'action'], 'statistic_name': 'libero_goal_no_noops', 'datasets': {'type': 'ParquetDataset', 'data_root_path': 'datasets/libero_goal_no_noops_lerobotv2.1', 'transforms': [{'type': 'ProcessParquetInputs', 'embodiment_id': 2, 'parquet_keys': ['observation.state', 'timestamp', 'actions', 'info', 'stats', 'action_masks'], 'video_keys': ['observation.images.image', 'observation.images.wrist_image'], 'name_mappings': {'observation.state': ['states'], 'actions': ['actions']}}, {'type': 'ParquetPrompter'}, {'type': 'ProcessPromptsWithImage', 'max_len': 600, 'num_images': 2, 'tokenizer': {'type': 'PretrainedTokenizer', 'model_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}}, {'type': 'ResizeImages', 'height': 224, 'width': 224}, {'type': 'NormalizeImages', 'means': [[123.515625, 116.04492188, 103.59375], [123.515625, 116.04492188, 103.59375]], 'stds': [[58.27148438, 57.02636719, 57.27539062], [58.27148438, 57.02636719, 57.27539062]]}, {'type': 'NormalizeStatesAndActions', 'action_dim': 32, 'state_dim': 64, 'state_key': 'proprio', 'action_key': 'action', 'norm_type': 'mean_std'}], 'action_window_size': 10, 'action_key': 'action', 'use_delta': False, 'statistic_name': 'libero_goal_no_noops', 'window_start_idx': 0}}}, 'runner': {'type': 'FSDPTrainRunner', 'max_epochs': 18, 'learning_rate': 1.5e-05, 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'sampler': None, 'tokenizer': {'type': 'PretrainedTokenizer', 'model_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}, 'collator': {'type': 'DictCollator', 'keys': ['states', 'observation.eepose', 'timestamp', 'images', 'img_masks', 'lang_tokens', 'lang_masks', 'actions', 'action_masks', 'embodiment_ids'], 'meta_keys': ['task_description', 'prompt', 'info', 'stats']}, 'metric': {'type': 'VLAMetric', 'active_trackers': ['jsonl', 'tensorboard'], 'run_dir': '/limx/tos/users/jikun/wk_dir/gr00t/libero_goal', 'grad_accumulation_steps': 1, 'window_size': 1, 'hparams': Config (path: configs/gr00t/gr00t_eagle_3b_libero_goal_full_finetune.py): {'model': {'type': 'LlavaVLA', 'pretrained_name_or_path': './checkpoints/GR00T-N1.5-3B', 'vlm_backbone': {'type': 'EagleBackbone', 'vlm_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}, 'vla_head': {'type': 'FlowMatchingHead', 'state_dim': 64, 'hidden_size': 1024, 'input_embedding_dim': 1536, 'num_layers': 1, 'num_heads': 4, 'num_inference_timesteps': 4, 'traj_length': 10, 'action_dim': 32, 'ori_action_dim': 7}, 'freeze_vlm_backbone': False, 'name_mapping': {'vlm_backbone.vlm': 'backbone.eagle_model', 'vla_head': 'action_head'}, 'freeze_projector': False}, 'inference_model': {'type': 'LlavaVLA', 'pretrained_name_or_path': './checkpoints/GR00T-N1.5-3B', 'vlm_backbone': {'type': 'EagleInferenceBackbone', 'vlm_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}, 'vla_head': {'type': 'FlowMatchingInferenceHead', 'state_dim': 64, 'hidden_size': 1024, 'input_embedding_dim': 1536, 'num_layers': 1, 'num_heads': 4, 'num_inference_timesteps': 4, 'traj_length': 10, 'action_dim': 32, 'ori_action_dim': 7, 'diffusion_model_cfg': {'attention_head_dim': 48, 'cross_attention_dim': 2048, 'dropout': 0.2, 'final_dropout': True, 'interleave_self_attention': True, 'norm_type': 'ada_norm', 'num_attention_heads': 32, 'num_layers': 16, 'output_dim': 1024, 'positional_embeddings': None}}}, 'train_dataloader': {'per_device_batch_size': 8, 'per_device_num_workers': 4, 'dataset': {'type': 'DistributedRepeatingDataset', 'name_mappings': {'observation.state': ['proprio'], 'action': ['action']}, 'statistic_keys': ['observation.state', 'timestamp', 'action'], 'statistic_name': 'libero_goal_no_noops', 'datasets': {'type': 'ParquetDataset', 'data_root_path': 'datasets/libero_goal_no_noops_lerobotv2.1', 'transforms': [{'type': 'ProcessParquetInputs', 'embodiment_id': 2, 'parquet_keys': ['observation.state', 'timestamp', 'actions', 'info', 'stats', 'action_masks'], 'video_keys': ['observation.images.image', 'observation.images.wrist_image'], 'name_mappings': {'observation.state': ['states'], 'actions': ['actions']}}, {'type': 'ParquetPrompter'}, {'type': 'ProcessPromptsWithImage', 'max_len': 600, 'num_images': 2, 'tokenizer': {'type': 'PretrainedTokenizer', 'model_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}}, {'type': 'ResizeImages', 'height': 224, 'width': 224}, {'type': 'NormalizeImages', 'means': [[123.515625, 116.04492188, 103.59375], [123.515625, 116.04492188, 103.59375]], 'stds': [[58.27148438, 57.02636719, 57.27539062], [58.27148438, 57.02636719, 57.27539062]]}, {'type': 'NormalizeStatesAndActions', 'action_dim': 32, 'state_dim': 64, 'state_key': 'proprio', 'action_key': 'action', 'norm_type': 'mean_std'}], 'action_window_size': 10, 'action_key': 'action', 'use_delta': False, 'statistic_name': 'libero_goal_no_noops', 'window_start_idx': 0}}}, 'runner': {'type': 'FSDPTrainRunner', 'max_epochs': 18, 'learning_rate': 1.5e-05, 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'sampler': None, 'tokenizer': {'type': 'PretrainedTokenizer', 'model_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}, 'collator': {'type': 'DictCollator', 'keys': ['states', 'observation.eepose', 'timestamp', 'images', 'img_masks', 'lang_tokens', 'lang_masks', 'actions', 'action_masks', 'embodiment_ids'], 'meta_keys': ['task_description', 'prompt', 'info', 'stats']}, 'metric': {'type': 'VLAMetric', 'active_trackers': ['jsonl', 'tensorboard'], 'run_dir': '/limx/tos/users/jikun/wk_dir/gr00t/libero_goal', 'grad_accumulation_steps': 1, 'window_size': 1, 'hparams': Config (path: configs/gr00t/gr00t_eagle_3b_libero_goal_full_finetune.py): {...}, 'run_id': 'gr00t_eagle_3b_libero_goal_full_finetune_2026_05_14_02_40_40'}, 'lr_scheduler_type': 'linear-warmup+cosine-decay', 'warmup_ratio': 0.03, 'enable_gradient_checkpointing': False, 'enable_mixed_precision_training': True, 'mixed_precision_dtype': 'bf16', 'change_key_name': False, 'cfg': Config (path: configs/gr00t/gr00t_eagle_3b_libero_goal_full_finetune.py): {...}, 'args': Namespace(config='configs/gr00t/gr00t_eagle_3b_libero_goal_full_finetune.py', work_dir='/limx/tos/users/jikun/wk_dir/gr00t/libero_goal', cfg_options={'runner.metric.active_trackers': ['jsonl', 'tensorboard']}, eval_after_train=True, resume_from=None)}, 'eval': {'type': 'LiberoEvalRunner', 'task_suite_name': 'libero_goal', 'model_family': 'pi0', 'eval_chunk_size': 10, 'resize_size': 224, 'num_trials_per_task': 50, 'num_steps_wait': 10, 'seed': 7, 'dataset': {'type': 'LiberoParquetEvalDataset', 'transforms': [{'type': 'ProcessLiberoEvalInputs', 'embodiment_id': 2, 'img_keys': ['agentview_image', 'robot0_eye_in_hand_image']}, {'type': 'TransformImage', 'image_resize_strategy': 'resize-naive', 'input_sizes': [[3, 224, 224], [3, 224, 224]], 'means': [[123.515625, 116.04492188, 103.59375], [123.515625, 116.04492188, 103.59375]], 'stds': [[58.27148438, 57.02636719, 57.27539062], [58.27148438, 57.02636719, 57.27539062]]}, {'type': 'ProcessPromptsWithImage', 'max_len': 600, 'num_images': 2, 'tokenizer': {'type': 'PretrainedTokenizer', 'model_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}}, {'type': 'LiberoProprioFromInputs', 'state_dim': 64, 'norm_type': 'mean_std', 'pos_key': 'robot0_eef_pos', 'quat_key': 'robot0_eef_quat', 'gripper_key': 'robot0_gripper_qpos', 'out_key': 'states'}]}, 'denormalize_action': {'type': 'DenormalizeLiberoAction', 'norm_type': 'mean_std'}}}, 'run_id': 'gr00t_eagle_3b_libero_goal_full_finetune_2026_05_14_02_40_40'}, 'lr_scheduler_type': 'linear-warmup+cosine-decay', 'warmup_ratio': 0.03, 'enable_gradient_checkpointing': False, 'enable_mixed_precision_training': True, 'mixed_precision_dtype': 'bf16', 'change_key_name': False, 'cfg': Config (path: configs/gr00t/gr00t_eagle_3b_libero_goal_full_finetune.py): {'model': {'type': 'LlavaVLA', 'pretrained_name_or_path': './checkpoints/GR00T-N1.5-3B', 'vlm_backbone': {'type': 'EagleBackbone', 'vlm_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}, 'vla_head': {'type': 'FlowMatchingHead', 'state_dim': 64, 'hidden_size': 1024, 'input_embedding_dim': 1536, 'num_layers': 1, 'num_heads': 4, 'num_inference_timesteps': 4, 'traj_length': 10, 'action_dim': 32, 'ori_action_dim': 7}, 'freeze_vlm_backbone': False, 'name_mapping': {'vlm_backbone.vlm': 'backbone.eagle_model', 'vla_head': 'action_head'}, 'freeze_projector': False}, 'inference_model': {'type': 'LlavaVLA', 'pretrained_name_or_path': './checkpoints/GR00T-N1.5-3B', 'vlm_backbone': {'type': 'EagleInferenceBackbone', 'vlm_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}, 'vla_head': {'type': 'FlowMatchingInferenceHead', 'state_dim': 64, 'hidden_size': 1024, 'input_embedding_dim': 1536, 'num_layers': 1, 'num_heads': 4, 'num_inference_timesteps': 4, 'traj_length': 10, 'action_dim': 32, 'ori_action_dim': 7, 'diffusion_model_cfg': {'attention_head_dim': 48, 'cross_attention_dim': 2048, 'dropout': 0.2, 'final_dropout': True, 'interleave_self_attention': True, 'norm_type': 'ada_norm', 'num_attention_heads': 32, 'num_layers': 16, 'output_dim': 1024, 'positional_embeddings': None}}}, 'train_dataloader': {'per_device_batch_size': 8, 'per_device_num_workers': 4, 'dataset': {'type': 'DistributedRepeatingDataset', 'name_mappings': {'observation.state': ['proprio'], 'action': ['action']}, 'statistic_keys': ['observation.state', 'timestamp', 'action'], 'statistic_name': 'libero_goal_no_noops', 'datasets': {'type': 'ParquetDataset', 'data_root_path': 'datasets/libero_goal_no_noops_lerobotv2.1', 'transforms': [{'type': 'ProcessParquetInputs', 'embodiment_id': 2, 'parquet_keys': ['observation.state', 'timestamp', 'actions', 'info', 'stats', 'action_masks'], 'video_keys': ['observation.images.image', 'observation.images.wrist_image'], 'name_mappings': {'observation.state': ['states'], 'actions': ['actions']}}, {'type': 'ParquetPrompter'}, {'type': 'ProcessPromptsWithImage', 'max_len': 600, 'num_images': 2, 'tokenizer': {'type': 'PretrainedTokenizer', 'model_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}}, {'type': 'ResizeImages', 'height': 224, 'width': 224}, {'type': 'NormalizeImages', 'means': [[123.515625, 116.04492188, 103.59375], [123.515625, 116.04492188, 103.59375]], 'stds': [[58.27148438, 57.02636719, 57.27539062], [58.27148438, 57.02636719, 57.27539062]]}, {'type': 'NormalizeStatesAndActions', 'action_dim': 32, 'state_dim': 64, 'state_key': 'proprio', 'action_key': 'action', 'norm_type': 'mean_std'}], 'action_window_size': 10, 'action_key': 'action', 'use_delta': False, 'statistic_name': 'libero_goal_no_noops', 'window_start_idx': 0}}}, 'runner': {'type': 'FSDPTrainRunner', 'max_epochs': 18, 'learning_rate': 1.5e-05, 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'sampler': None, 'tokenizer': {'type': 'PretrainedTokenizer', 'model_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}, 'collator': {'type': 'DictCollator', 'keys': ['states', 'observation.eepose', 'timestamp', 'images', 'img_masks', 'lang_tokens', 'lang_masks', 'actions', 'action_masks', 'embodiment_ids'], 'meta_keys': ['task_description', 'prompt', 'info', 'stats']}, 'metric': {'type': 'VLAMetric', 'active_trackers': ['jsonl', 'tensorboard'], 'run_dir': '/limx/tos/users/jikun/wk_dir/gr00t/libero_goal', 'grad_accumulation_steps': 1, 'window_size': 1, 'hparams': Config (path: configs/gr00t/gr00t_eagle_3b_libero_goal_full_finetune.py): {...}, 'run_id': 'gr00t_eagle_3b_libero_goal_full_finetune_2026_05_14_02_40_40'}, 'lr_scheduler_type': 'linear-warmup+cosine-decay', 'warmup_ratio': 0.03, 'enable_gradient_checkpointing': False, 'enable_mixed_precision_training': True, 'mixed_precision_dtype': 'bf16', 'change_key_name': False, 'cfg': Config (path: configs/gr00t/gr00t_eagle_3b_libero_goal_full_finetune.py): {...}, 'args': Namespace(config='configs/gr00t/gr00t_eagle_3b_libero_goal_full_finetune.py', work_dir='/limx/tos/users/jikun/wk_dir/gr00t/libero_goal', cfg_options={'runner.metric.active_trackers': ['jsonl', 'tensorboard']}, eval_after_train=True, resume_from=None)}, 'eval': {'type': 'LiberoEvalRunner', 'task_suite_name': 'libero_goal', 'model_family': 'pi0', 'eval_chunk_size': 10, 'resize_size': 224, 'num_trials_per_task': 50, 'num_steps_wait': 10, 'seed': 7, 'dataset': {'type': 'LiberoParquetEvalDataset', 'transforms': [{'type': 'ProcessLiberoEvalInputs', 'embodiment_id': 2, 'img_keys': ['agentview_image', 'robot0_eye_in_hand_image']}, {'type': 'TransformImage', 'image_resize_strategy': 'resize-naive', 'input_sizes': [[3, 224, 224], [3, 224, 224]], 'means': [[123.515625, 116.04492188, 103.59375], [123.515625, 116.04492188, 103.59375]], 'stds': [[58.27148438, 57.02636719, 57.27539062], [58.27148438, 57.02636719, 57.27539062]]}, {'type': 'ProcessPromptsWithImage', 'max_len': 600, 'num_images': 2, 'tokenizer': {'type': 'PretrainedTokenizer', 'model_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}}, {'type': 'LiberoProprioFromInputs', 'state_dim': 64, 'norm_type': 'mean_std', 'pos_key': 'robot0_eef_pos', 'quat_key': 'robot0_eef_quat', 'gripper_key': 'robot0_gripper_qpos', 'out_key': 'states'}]}, 'denormalize_action': {'type': 'DenormalizeLiberoAction', 'norm_type': 'mean_std'}}}, 'args': Namespace(config='configs/gr00t/gr00t_eagle_3b_libero_goal_full_finetune.py', work_dir='/limx/tos/users/jikun/wk_dir/gr00t/libero_goal', cfg_options={'runner.metric.active_trackers': ['jsonl', 'tensorboard']}, eval_after_train=True, resume_from=None)}, 'eval': {'type': 'LiberoEvalRunner', 'task_suite_name': 'libero_goal', 'model_family': 'pi0', 'eval_chunk_size': 10, 'resize_size': 224, 'num_trials_per_task': 50, 'num_steps_wait': 10, 'seed': 7, 'dataset': {'type': 'LiberoParquetEvalDataset', 'transforms': [{'type': 'ProcessLiberoEvalInputs', 'embodiment_id': 2, 'img_keys': ['agentview_image', 'robot0_eye_in_hand_image']}, {'type': 'TransformImage', 'image_resize_strategy': 'resize-naive', 'input_sizes': [[3, 224, 224], [3, 224, 224]], 'means': [[123.515625, 116.04492188, 103.59375], [123.515625, 116.04492188, 103.59375]], 'stds': [[58.27148438, 57.02636719, 57.27539062], [58.27148438, 57.02636719, 57.27539062]]}, {'type': 'ProcessPromptsWithImage', 'max_len': 600, 'num_images': 2, 'tokenizer': {'type': 'PretrainedTokenizer', 'model_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}}, {'type': 'LiberoProprioFromInputs', 'state_dim': 64, 'norm_type': 'mean_std', 'pos_key': 'robot0_eef_pos', 'quat_key': 'robot0_eef_quat', 'gripper_key': 'robot0_gripper_qpos', 'out_key': 'states'}]}, 'denormalize_action': {'type': 'DenormalizeLiberoAction', 'norm_type': 'mean_std'}}}", "run_id": "gr00t_eagle_3b_libero_goal_full_finetune_2026_05_14_02_40_40"}

gr00t_eagle_3b_libero_goal_full_finetune_bs64/tokenizer/added_tokens.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "</box>": 151673,
+  "</img>": 151671,
+  "</interval>": 151679,
+  "</quad>": 151675,
+  "</ref>": 151677,
+  "</think>": 151668,
+  "</tool_call>": 151658,
+  "</tool_response>": 151666,
+  "<IMG_CONTEXT>": 151669,
+  "<box>": 151672,
+  "<img>": 151670,
+  "<interval>": 151678,
+  "<quad>": 151674,
+  "<ref>": 151676,
+  "<think>": 151667,
+  "<tool_call>": 151657,
+  "<tool_response>": 151665,
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|file_sep|>": 151664,
+  "<|fim_middle|>": 151660,
+  "<|fim_pad|>": 151662,
+  "<|fim_prefix|>": 151659,
+  "<|fim_suffix|>": 151661,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|repo_name|>": 151663,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

gr00t_eagle_3b_libero_goal_full_finetune_bs64/tokenizer/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

gr00t_eagle_3b_libero_goal_full_finetune_bs64/tokenizer/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,42 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>",
+    "<IMG_CONTEXT>",
+    "<img>",
+    "</img>",
+    "<box>",
+    "</box>",
+    "<quad>",
+    "</quad>",
+    "<ref>",
+    "</ref>",
+    "<interval>",
+    "</interval>"
+  ],
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

gr00t_eagle_3b_libero_goal_full_finetune_bs64/tokenizer/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,344 @@

+{
+  "add_bos_token": false,
+  "add_eos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151646": {
+      "content": "<|object_ref_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|object_ref_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151648": {
+      "content": "<|box_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151649": {
+      "content": "<|box_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151657": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151658": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151659": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151660": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151661": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151662": {
+      "content": "<|fim_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151663": {
+      "content": "<|repo_name|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151664": {
+      "content": "<|file_sep|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151665": {
+      "content": "<tool_response>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151666": {
+      "content": "</tool_response>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151667": {
+      "content": "<think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151668": {
+      "content": "</think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151669": {
+      "content": "<IMG_CONTEXT>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151670": {
+      "content": "<img>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151671": {
+      "content": "</img>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151672": {
+      "content": "<box>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151673": {
+      "content": "</box>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151674": {
+      "content": "<quad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151675": {
+      "content": "</quad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151676": {
+      "content": "<ref>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151677": {
+      "content": "</ref>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151678": {
+      "content": "<interval>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151679": {
+      "content": "</interval>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>",
+    "<IMG_CONTEXT>",
+    "<img>",
+    "</img>",
+    "<box>",
+    "</box>",
+    "<quad>",
+    "</quad>",
+    "<ref>",
+    "</ref>",
+    "<interval>",
+    "</interval>"
+  ],
+  "auto_map": {
+    "AutoProcessor": "processing_eagle2_5_vl.Eagle2_5_VLProcessor"
+  },
+  "bos_token": null,
+  "chat_template": "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0].role == 'system' %}\n        {{- messages[0].content + '\\n\\n' }}\n    {%- endif %}\n    {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0].role == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}\n    {%- endif %}\n{%- endif %}\n{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n    {%- set index = (messages|length - 1) - loop.index0 %}\n    {%- if ns.multi_step_tool and message.role == \"user\" and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}\n        {%- set ns.multi_step_tool = false %}\n        {%- set ns.last_query_index = index %}\n    {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n        {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {%- set content = message.content %}\n        {%- set reasoning_content = '' %}\n        {%- if message.reasoning_content is defined and message.reasoning_content is not none %}\n            {%- set reasoning_content = message.reasoning_content %}\n        {%- else %}\n            {%- if '</think>' in message.content %}\n                {%- set content = message.content.split('</think>')[-1].lstrip('\\n') %}\n                {%- set reasoning_content = message.content.split('</think>')[0].rstrip('\\n').split('<think>')[-1].lstrip('\\n') %}\n            {%- endif %}\n        {%- endif %}\n        {%- if loop.index0 > ns.last_query_index %}\n            {%- if loop.last or (not loop.last and reasoning_content) %}\n                {{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content.strip('\\n') + '\\n</think>\\n\\n' + content.lstrip('\\n') }}\n            {%- else %}\n                {{- '<|im_start|>' + message.role + '\\n' + content }}\n            {%- endif %}\n        {%- else %}\n            {{- '<|im_start|>' + message.role + '\\n' + content }}\n        {%- endif %}\n        {%- if message.tool_calls %}\n            {%- for tool_call in message.tool_calls %}\n                {%- if (loop.first and content) or (not loop.first) %}\n                    {{- '\\n' }}\n                {%- endif %}\n                {%- if tool_call.function %}\n                    {%- set tool_call = tool_call.function %}\n                {%- endif %}\n                {{- '<tool_call>\\n{\"name\": \"' }}\n                {{- tool_call.name }}\n                {{- '\", \"arguments\": ' }}\n                {%- if tool_call.arguments is string %}\n                    {{- tool_call.arguments }}\n                {%- else %}\n                    {{- tool_call.arguments | tojson }}\n                {%- endif %}\n                {{- '}\\n</tool_call>' }}\n            {%- endfor %}\n        {%- endif %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- message.content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n' }}\n    {%- if enable_thinking is defined and enable_thinking is false %}\n        {{- '<think>\\n\\n</think>\\n\\n' }}\n    {%- endif %}\n{%- endif %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "model_max_length": 16384,
+  "pad_token": "<|endoftext|>",
+  "processor_class": "Eagle2_5_VLProcessor",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

gr00t_eagle_3b_libero_goal_full_finetune_bs64/tokenizer/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

gr00t_eagle_3b_libero_goal_full_finetune_bs64/vlm_backbone_config.json ADDED Viewed

	@@ -0,0 +1,106 @@

+{
+  "_attn_implementation": "flash_attention_2",
+  "architectures": [
+    "Eagle2_5_VLForConditionalGeneration"
+  ],
+  "auto_map": {
+    "AutoConfig": "configuration_eagle2_5_vl.Eagle2_5_VLConfig",
+    "AutoModel": "modeling_eagle2_5_vl.Eagle2_5_VLForConditionalGeneration"
+  },
+  "downsample_ratio": 0.5,
+  "dynamic_image_size": true,
+  "force_image_size": 224,
+  "image_token_index": 151669,
+  "initializer_range": 0.02,
+  "loss_version": "efficient_v2_cp_head",
+  "max_dynamic_tiles": 12,
+  "min_dynamic_tiles": 1,
+  "mlp_checkpoint": false,
+  "mlp_connector_layers": 1,
+  "model_type": "eagle_2_5_vl",
+  "output_attentions": false,
+  "pad2square": false,
+  "select_layer": -1,
+  "template": "qwen3-chat",
+  "text_config": {
+    "_name_or_path": "Qwen/Qwen3-1.7B",
+    "architectures": [
+      "Qwen3ForCausalLM"
+    ],
+    "attention_bias": false,
+    "attention_dropout": 0,
+    "bos_token_id": 151643,
+    "eos_token_id": 151645,
+    "head_dim": 128,
+    "hidden_act": "silu",
+    "hidden_size": 2048,
+    "initializer_range": 0.02,
+    "intermediate_size": 6144,
+    "layer_types": [
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention"
+    ],
+    "max_position_embeddings": 40960,
+    "max_window_layers": 28,
+    "model_type": "qwen3",
+    "num_attention_heads": 16,
+    "num_hidden_layers": 12,
+    "num_key_value_heads": 8,
+    "rms_norm_eps": 1e-06,
+    "rope_scaling": null,
+    "rope_theta": 1000000,
+    "sliding_window": null,
+    "tie_word_embeddings": true,
+    "torch_dtype": "bfloat16",
+    "use_cache": false,
+    "use_sliding_window": false,
+    "vocab_size": 151680
+  },
+  "tie_word_embeddings": true,
+  "torch_dtype": "bfloat16",
+  "transformers_version": null,
+  "use_backbone_lora": 0,
+  "use_llm_lora": 0,
+  "use_pixel_shuffle": false,
+  "use_thumbnail": true,
+  "vision_config": {
+    "attention_dropout": 0,
+    "hidden_act": "gelu_pytorch_tanh",
+    "hidden_size": 1152,
+    "image_size": 224,
+    "intermediate_size": 4304,
+    "layer_norm_eps": 1e-06,
+    "model_type": "siglip_vision_model",
+    "num_attention_heads": 16,
+    "num_channels": 3,
+    "num_hidden_layers": 27,
+    "patch_size": 14,
+    "torch_dtype": "bfloat16"
+  }
+}

gr00t_eagle_3b_libero_object_full_finetune_bs64/checkpoints/step-018846-epoch-18-loss=0.0701.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e63051195907f4e25fc0a6dc317a1306643faaa4cbfd8677c8d4ecf3f627b8c6
+size 10896783888

gr00t_eagle_3b_libero_object_full_finetune_bs64/config.json ADDED Viewed

	@@ -0,0 +1,302 @@

+{
+  "eval": {
+    "dataset": {
+      "transforms": [
+        {
+          "embodiment_id": 2,
+          "img_keys": [
+            "agentview_image",
+            "robot0_eye_in_hand_image"
+          ],
+          "type": "ProcessLiberoEvalInputs"
+        },
+        {
+          "image_resize_strategy": "resize-naive",
+          "input_sizes": [
+            [
+              3,
+              224,
+              224
+            ],
+            [
+              3,
+              224,
+              224
+            ]
+          ],
+          "means": [
+            [
+              123.515625,
+              116.04492188,
+              103.59375
+            ],
+            [
+              123.515625,
+              116.04492188,
+              103.59375
+            ]
+          ],
+          "stds": [
+            [
+              58.27148438,
+              57.02636719,
+              57.27539062
+            ],
+            [
+              58.27148438,
+              57.02636719,
+              57.27539062
+            ]
+          ],
+          "type": "TransformImage"
+        },
+        {
+          "max_len": 600,
+          "num_images": 2,
+          "tokenizer": {
+            "model_path": "fluxvla/models/third_party_models/eagle2_hg_model",
+            "type": "PretrainedTokenizer"
+          },
+          "type": "ProcessPromptsWithImage"
+        },
+        {
+          "gripper_key": "robot0_gripper_qpos",
+          "norm_type": "mean_std",
+          "out_key": "states",
+          "pos_key": "robot0_eef_pos",
+          "quat_key": "robot0_eef_quat",
+          "state_dim": 64,
+          "type": "LiberoProprioFromInputs"
+        }
+      ],
+      "type": "LiberoParquetEvalDataset"
+    },
+    "denormalize_action": {
+      "norm_type": "mean_std",
+      "type": "DenormalizeLiberoAction"
+    },
+    "eval_chunk_size": 10,
+    "model_family": "pi0",
+    "num_steps_wait": 10,
+    "num_trials_per_task": 50,
+    "resize_size": 224,
+    "seed": 7,
+    "task_suite_name": "libero_object",
+    "type": "LiberoEvalRunner"
+  },
+  "inference_model": {
+    "pretrained_name_or_path": "./checkpoints/GR00T-N1.5-3B",
+    "type": "LlavaVLA",
+    "vla_head": {
+      "action_dim": 32,
+      "diffusion_model_cfg": {
+        "attention_head_dim": 48,
+        "cross_attention_dim": 2048,
+        "dropout": 0.2,
+        "final_dropout": true,
+        "interleave_self_attention": true,
+        "norm_type": "ada_norm",
+        "num_attention_heads": 32,
+        "num_layers": 16,
+        "output_dim": 1024,
+        "positional_embeddings": null
+      },
+      "hidden_size": 1024,
+      "input_embedding_dim": 1536,
+      "num_heads": 4,
+      "num_inference_timesteps": 4,
+      "num_layers": 1,
+      "ori_action_dim": 7,
+      "state_dim": 64,
+      "traj_length": 10,
+      "type": "FlowMatchingInferenceHead"
+    },
+    "vlm_backbone": {
+      "type": "EagleInferenceBackbone",
+      "vlm_path": "fluxvla/models/third_party_models/eagle2_hg_model"
+    }
+  },
+  "model": {
+    "freeze_projector": false,
+    "freeze_vlm_backbone": false,
+    "name_mapping": {
+      "vla_head": "action_head",
+      "vlm_backbone.vlm": "backbone.eagle_model"
+    },
+    "pretrained_name_or_path": "./checkpoints/GR00T-N1.5-3B",
+    "type": "LlavaVLA",
+    "vla_head": {
+      "action_dim": 32,
+      "hidden_size": 1024,
+      "input_embedding_dim": 1536,
+      "num_heads": 4,
+      "num_inference_timesteps": 4,
+      "num_layers": 1,
+      "ori_action_dim": 7,
+      "state_dim": 64,
+      "traj_length": 10,
+      "type": "FlowMatchingHead"
+    },
+    "vlm_backbone": {
+      "type": "EagleBackbone",
+      "vlm_path": "fluxvla/models/third_party_models/eagle2_hg_model"
+    }
+  },
+  "runner": {
+    "change_key_name": false,
+    "collator": {
+      "keys": [
+        "states",
+        "observation.eepose",
+        "timestamp",
+        "images",
+        "img_masks",
+        "lang_tokens",
+        "lang_masks",
+        "actions",
+        "action_masks",
+        "embodiment_ids"
+      ],
+      "meta_keys": [
+        "task_description",
+        "prompt",
+        "info",
+        "stats"
+      ],
+      "type": "DictCollator"
+    },
+    "enable_gradient_checkpointing": false,
+    "enable_mixed_precision_training": true,
+    "learning_rate": 1.5e-05,
+    "lr_scheduler_type": "linear-warmup+cosine-decay",
+    "max_epochs": 18,
+    "max_grad_norm": 1.0,
+    "metric": {
+      "active_trackers": [
+        "jsonl",
+        "tensorboard"
+      ],
+      "grad_accumulation_steps": 1,
+      "run_dir": "work_dirs",
+      "type": "VLAMetric",
+      "window_size": 1
+    },
+    "mixed_precision_dtype": "bf16",
+    "sampler": null,
+    "tokenizer": {
+      "model_path": "fluxvla/models/third_party_models/eagle2_hg_model",
+      "type": "PretrainedTokenizer"
+    },
+    "type": "FSDPTrainRunner",
+    "warmup_ratio": 0.03,
+    "weight_decay": 0.0
+  },
+  "train_dataloader": {
+    "dataset": {
+      "datasets": {
+        "action_key": "action",
+        "action_window_size": 10,
+        "data_root_path": "datasets/libero_object_no_noops_lerobotv2.1",
+        "statistic_name": "libero_object_no_noops",
+        "transforms": [
+          {
+            "embodiment_id": 2,
+            "name_mappings": {
+              "actions": [
+                "actions"
+              ],
+              "observation.state": [
+                "states"
+              ]
+            },
+            "parquet_keys": [
+              "observation.state",
+              "timestamp",
+              "actions",
+              "info",
+              "stats",
+              "action_masks"
+            ],
+            "type": "ProcessParquetInputs",
+            "video_keys": [
+              "observation.images.image",
+              "observation.images.wrist_image"
+            ]
+          },
+          {
+            "type": "ParquetPrompter"
+          },
+          {
+            "max_len": 600,
+            "num_images": 2,
+            "tokenizer": {
+              "model_path": "fluxvla/models/third_party_models/eagle2_hg_model",
+              "type": "PretrainedTokenizer"
+            },
+            "type": "ProcessPromptsWithImage"
+          },
+          {
+            "height": 224,
+            "type": "ResizeImages",
+            "width": 224
+          },
+          {
+            "means": [
+              [
+                123.515625,
+                116.04492188,
+                103.59375
+              ],
+              [
+                123.515625,
+                116.04492188,
+                103.59375
+              ]
+            ],
+            "stds": [
+              [
+                58.27148438,
+                57.02636719,
+                57.27539062
+              ],
+              [
+                58.27148438,
+                57.02636719,
+                57.27539062
+              ]
+            ],
+            "type": "NormalizeImages"
+          },
+          {
+            "action_dim": 32,
+            "action_key": "action",
+            "norm_type": "mean_std",
+            "state_dim": 64,
+            "state_key": "proprio",
+            "type": "NormalizeStatesAndActions"
+          }
+        ],
+        "type": "ParquetDataset",
+        "use_delta": false,
+        "window_start_idx": 0
+      },
+      "name_mappings": {
+        "action": [
+          "action"
+        ],
+        "observation.state": [
+          "proprio"
+        ]
+      },
+      "statistic_keys": [
+        "observation.state",
+        "timestamp",
+        "action"
+      ],
+      "statistic_name": "libero_object_no_noops",
+      "type": "DistributedRepeatingDataset"
+    },
+    "per_device_batch_size": 8,
+    "per_device_num_workers": 4
+  }
+}

gr00t_eagle_3b_libero_object_full_finetune_bs64/config.yaml ADDED Viewed

	@@ -0,0 +1,220 @@

+eval:
+  dataset:
+    transforms:
+    - embodiment_id: 2
+      img_keys:
+      - agentview_image
+      - robot0_eye_in_hand_image
+      type: ProcessLiberoEvalInputs
+    - image_resize_strategy: resize-naive
+      input_sizes:
+      - - 3
+        - 224
+        - 224
+      - - 3
+        - 224
+        - 224
+      means:
+      - - 123.515625
+        - 116.04492188
+        - 103.59375
+      - - 123.515625
+        - 116.04492188
+        - 103.59375
+      stds:
+      - - 58.27148438
+        - 57.02636719
+        - 57.27539062
+      - - 58.27148438
+        - 57.02636719
+        - 57.27539062
+      type: TransformImage
+    - max_len: 600
+      num_images: 2
+      tokenizer:
+        model_path: fluxvla/models/third_party_models/eagle2_hg_model
+        type: PretrainedTokenizer
+      type: ProcessPromptsWithImage
+    - gripper_key: robot0_gripper_qpos
+      norm_type: mean_std
+      out_key: states
+      pos_key: robot0_eef_pos
+      quat_key: robot0_eef_quat
+      state_dim: 64
+      type: LiberoProprioFromInputs
+    type: LiberoParquetEvalDataset
+  denormalize_action:
+    norm_type: mean_std
+    type: DenormalizeLiberoAction
+  eval_chunk_size: 10
+  model_family: pi0
+  num_steps_wait: 10
+  num_trials_per_task: 50
+  resize_size: 224
+  seed: 7
+  task_suite_name: libero_object
+  type: LiberoEvalRunner
+inference_model:
+  pretrained_name_or_path: ./checkpoints/GR00T-N1.5-3B
+  type: LlavaVLA
+  vla_head:
+    action_dim: 32
+    diffusion_model_cfg:
+      attention_head_dim: 48
+      cross_attention_dim: 2048
+      dropout: 0.2
+      final_dropout: true
+      interleave_self_attention: true
+      norm_type: ada_norm
+      num_attention_heads: 32
+      num_layers: 16
+      output_dim: 1024
+      positional_embeddings: null
+    hidden_size: 1024
+    input_embedding_dim: 1536
+    num_heads: 4
+    num_inference_timesteps: 4
+    num_layers: 1
+    ori_action_dim: 7
+    state_dim: 64
+    traj_length: 10
+    type: FlowMatchingInferenceHead
+  vlm_backbone:
+    type: EagleInferenceBackbone
+    vlm_path: fluxvla/models/third_party_models/eagle2_hg_model
+model:
+  freeze_projector: false
+  freeze_vlm_backbone: false
+  name_mapping:
+    vla_head: action_head
+    vlm_backbone.vlm: backbone.eagle_model
+  pretrained_name_or_path: ./checkpoints/GR00T-N1.5-3B
+  type: LlavaVLA
+  vla_head:
+    action_dim: 32
+    hidden_size: 1024
+    input_embedding_dim: 1536
+    num_heads: 4
+    num_inference_timesteps: 4
+    num_layers: 1
+    ori_action_dim: 7
+    state_dim: 64
+    traj_length: 10
+    type: FlowMatchingHead
+  vlm_backbone:
+    type: EagleBackbone
+    vlm_path: fluxvla/models/third_party_models/eagle2_hg_model
+runner:
+  change_key_name: false
+  collator:
+    keys:
+    - states
+    - observation.eepose
+    - timestamp
+    - images
+    - img_masks
+    - lang_tokens
+    - lang_masks
+    - actions
+    - action_masks
+    - embodiment_ids
+    meta_keys:
+    - task_description
+    - prompt
+    - info
+    - stats
+    type: DictCollator
+  enable_gradient_checkpointing: false
+  enable_mixed_precision_training: true
+  learning_rate: 1.5e-05
+  lr_scheduler_type: linear-warmup+cosine-decay
+  max_epochs: 18
+  max_grad_norm: 1.0
+  metric:
+    active_trackers:
+    - jsonl
+    - tensorboard
+    grad_accumulation_steps: 1
+    run_dir: work_dirs
+    type: VLAMetric
+    window_size: 1
+  mixed_precision_dtype: bf16
+  sampler: null
+  tokenizer:
+    model_path: fluxvla/models/third_party_models/eagle2_hg_model
+    type: PretrainedTokenizer
+  type: FSDPTrainRunner
+  warmup_ratio: 0.03
+  weight_decay: 0.0
+train_dataloader:
+  dataset:
+    datasets:
+      action_key: action
+      action_window_size: 10
+      data_root_path: datasets/libero_object_no_noops_lerobotv2.1
+      statistic_name: libero_object_no_noops
+      transforms:
+      - embodiment_id: 2
+        name_mappings:
+          actions:
+          - actions
+          observation.state:
+          - states
+        parquet_keys:
+        - observation.state
+        - timestamp
+        - actions
+        - info
+        - stats
+        - action_masks
+        type: ProcessParquetInputs
+        video_keys:
+        - observation.images.image
+        - observation.images.wrist_image
+      - type: ParquetPrompter
+      - max_len: 600
+        num_images: 2
+        tokenizer:
+          model_path: fluxvla/models/third_party_models/eagle2_hg_model
+          type: PretrainedTokenizer
+        type: ProcessPromptsWithImage
+      - height: 224
+        type: ResizeImages
+        width: 224
+      - means:
+        - - 123.515625
+          - 116.04492188
+          - 103.59375
+        - - 123.515625
+          - 116.04492188
+          - 103.59375
+        stds:
+        - - 58.27148438
+          - 57.02636719
+          - 57.27539062
+        - - 58.27148438
+          - 57.02636719
+          - 57.27539062
+        type: NormalizeImages
+      - action_dim: 32
+        action_key: action
+        norm_type: mean_std
+        state_dim: 64
+        state_key: proprio
+        type: NormalizeStatesAndActions
+      type: ParquetDataset
+      use_delta: false
+      window_start_idx: 0
+    name_mappings:
+      action:
+      - action
+      observation.state:
+      - proprio
+    statistic_keys:
+    - observation.state
+    - timestamp
+    - action
+    statistic_name: libero_object_no_noops
+    type: DistributedRepeatingDataset
+  per_device_batch_size: 8
+  per_device_num_workers: 4

gr00t_eagle_3b_libero_object_full_finetune_bs64/dataset_statistics.json ADDED Viewed

	@@ -0,0 +1,104 @@

+{
+  "libero_object_no_noops": {
+    "proprio": {
+      "mean": [
+        -0.029990377887890714,
+        -0.007947119348036638,
+        0.20293400450543442,
+        3.108609864126749,
+        -0.2140478258736818,
+        -0.11307033080181891,
+        0.02938040086729137,
+        -0.03055662046031239
+      ],
+      "std": [
+        0.023670072817660013,
+        0.06225550550101929,
+        0.027602195887468282,
+        0.030705662709939595,
+        0.11858388544011475,
+        0.0732862116780689,
+        0.0033820150919409114,
+        0.003251806898346789
+      ],
+      "min": [
+        -0.1765444278717041,
+        -0.29457300901412964,
+        0.008128180168569088,
+        2.2890501022338867,
+        -1.883241891860962,
+        -1.0600427389144897,
+        0.0006495157140307128,
+        -0.041782498359680176
+      ],
+      "max": [
+        0.14580604434013367,
+        0.33216384053230286,
+        0.3857804834842682,
+        3.4003844261169434,
+        0.7954911589622498,
+        0.6642207503318787,
+        0.04104341194033623,
+        -0.00018117300351150334
+      ],
+      "q01": null,
+      "q99": null
+    },
+    "timestamp": {
+      "mean": [
+        3.721695479517497
+      ],
+      "std": [
+        2.237081841546431
+      ],
+      "min": [
+        0.0
+      ],
+      "max": [
+        12.65
+      ],
+      "q01": null,
+      "q99": null
+    },
+    "action": {
+      "mean": [
+        0.07096490746267721,
+        0.13498889685796536,
+        -0.046013733641776924,
+        0.0012352044345171392,
+        0.006998803721298765,
+        -0.015027527802288103,
+        0.46428998075465666
+      ],
+      "std": [
+        0.10133946158044306,
+        0.165716399861371,
+        0.16914353294024564,
+        0.009240558533809633,
+        0.018657116474914717,
+        0.015913625946349673,
+        0.18849963395480163
+      ],
+      "min": [
+        -0.8839285969734192,
+        -0.9375,
+        -0.9375,
+        -0.15000000596046448,
+        -0.29035714268684387,
+        -0.32892856001853943,
+        0.0
+      ],
+      "max": [
+        0.9375,
+        0.8919642567634583,
+        0.9375,
+        0.17678570747375488,
+        0.35035714507102966,
+        0.1810714304447174,
+        1.0
+      ],
+      "q01": null,
+      "q99": null
+    }
+  }
+}

gr00t_eagle_3b_libero_object_full_finetune_bs64/gr00t_eagle_3b_libero_object_full_finetune_2026_05_14_02_40_05.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

gr00t_eagle_3b_libero_object_full_finetune_bs64/run-metrics.jsonl ADDED Viewed

	@@ -0,0 +1 @@

+ {"hparams": "{'model': {'type': 'LlavaVLA', 'pretrained_name_or_path': './checkpoints/GR00T-N1.5-3B', 'vlm_backbone': {'type': 'EagleBackbone', 'vlm_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}, 'vla_head': {'type': 'FlowMatchingHead', 'state_dim': 64, 'hidden_size': 1024, 'input_embedding_dim': 1536, 'num_layers': 1, 'num_heads': 4, 'num_inference_timesteps': 4, 'traj_length': 10, 'action_dim': 32, 'ori_action_dim': 7}, 'freeze_vlm_backbone': False, 'name_mapping': {'vlm_backbone.vlm': 'backbone.eagle_model', 'vla_head': 'action_head'}, 'freeze_projector': False}, 'inference_model': {'type': 'LlavaVLA', 'pretrained_name_or_path': './checkpoints/GR00T-N1.5-3B', 'vlm_backbone': {'type': 'EagleInferenceBackbone', 'vlm_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}, 'vla_head': {'type': 'FlowMatchingInferenceHead', 'state_dim': 64, 'hidden_size': 1024, 'input_embedding_dim': 1536, 'num_layers': 1, 'num_heads': 4, 'num_inference_timesteps': 4, 'traj_length': 10, 'action_dim': 32, 'ori_action_dim': 7, 'diffusion_model_cfg': {'attention_head_dim': 48, 'cross_attention_dim': 2048, 'dropout': 0.2, 'final_dropout': True, 'interleave_self_attention': True, 'norm_type': 'ada_norm', 'num_attention_heads': 32, 'num_layers': 16, 'output_dim': 1024, 'positional_embeddings': None}}}, 'train_dataloader': {'per_device_batch_size': 8, 'per_device_num_workers': 4, 'dataset': {'type': 'DistributedRepeatingDataset', 'name_mappings': {'observation.state': ['proprio'], 'action': ['action']}, 'statistic_keys': ['observation.state', 'timestamp', 'action'], 'statistic_name': 'libero_object_no_noops', 'datasets': {'type': 'ParquetDataset', 'data_root_path': 'datasets/libero_object_no_noops_lerobotv2.1', 'transforms': [{'type': 'ProcessParquetInputs', 'embodiment_id': 2, 'parquet_keys': ['observation.state', 'timestamp', 'actions', 'info', 'stats', 'action_masks'], 'video_keys': ['observation.images.image', 'observation.images.wrist_image'], 'name_mappings': {'observation.state': ['states'], 'actions': ['actions']}}, {'type': 'ParquetPrompter'}, {'type': 'ProcessPromptsWithImage', 'max_len': 600, 'num_images': 2, 'tokenizer': {'type': 'PretrainedTokenizer', 'model_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}}, {'type': 'ResizeImages', 'height': 224, 'width': 224}, {'type': 'NormalizeImages', 'means': [[123.515625, 116.04492188, 103.59375], [123.515625, 116.04492188, 103.59375]], 'stds': [[58.27148438, 57.02636719, 57.27539062], [58.27148438, 57.02636719, 57.27539062]]}, {'type': 'NormalizeStatesAndActions', 'action_dim': 32, 'state_dim': 64, 'state_key': 'proprio', 'action_key': 'action', 'norm_type': 'mean_std'}], 'action_window_size': 10, 'action_key': 'action', 'use_delta': False, 'statistic_name': 'libero_object_no_noops', 'window_start_idx': 0}}}, 'runner': {'type': 'FSDPTrainRunner', 'max_epochs': 18, 'learning_rate': 1.5e-05, 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'sampler': None, 'tokenizer': {'type': 'PretrainedTokenizer', 'model_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}, 'collator': {'type': 'DictCollator', 'keys': ['states', 'observation.eepose', 'timestamp', 'images', 'img_masks', 'lang_tokens', 'lang_masks', 'actions', 'action_masks', 'embodiment_ids'], 'meta_keys': ['task_description', 'prompt', 'info', 'stats']}, 'metric': {'type': 'VLAMetric', 'active_trackers': ['jsonl', 'tensorboard'], 'run_dir': '/limx/tos/users/jikun/wk_dir/gr00t/libero_object', 'grad_accumulation_steps': 1, 'window_size': 1, 'hparams': Config (path: configs/gr00t/gr00t_eagle_3b_libero_object_full_finetune.py): {'model': {'type': 'LlavaVLA', 'pretrained_name_or_path': './checkpoints/GR00T-N1.5-3B', 'vlm_backbone': {'type': 'EagleBackbone', 'vlm_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}, 'vla_head': {'type': 'FlowMatchingHead', 'state_dim': 64, 'hidden_size': 1024, 'input_embedding_dim': 1536, 'num_layers': 1, 'num_heads': 4, 'num_inference_timesteps': 4, 'traj_length': 10, 'action_dim': 32, 'ori_action_dim': 7}, 'freeze_vlm_backbone': False, 'name_mapping': {'vlm_backbone.vlm': 'backbone.eagle_model', 'vla_head': 'action_head'}, 'freeze_projector': False}, 'inference_model': {'type': 'LlavaVLA', 'pretrained_name_or_path': './checkpoints/GR00T-N1.5-3B', 'vlm_backbone': {'type': 'EagleInferenceBackbone', 'vlm_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}, 'vla_head': {'type': 'FlowMatchingInferenceHead', 'state_dim': 64, 'hidden_size': 1024, 'input_embedding_dim': 1536, 'num_layers': 1, 'num_heads': 4, 'num_inference_timesteps': 4, 'traj_length': 10, 'action_dim': 32, 'ori_action_dim': 7, 'diffusion_model_cfg': {'attention_head_dim': 48, 'cross_attention_dim': 2048, 'dropout': 0.2, 'final_dropout': True, 'interleave_self_attention': True, 'norm_type': 'ada_norm', 'num_attention_heads': 32, 'num_layers': 16, 'output_dim': 1024, 'positional_embeddings': None}}}, 'train_dataloader': {'per_device_batch_size': 8, 'per_device_num_workers': 4, 'dataset': {'type': 'DistributedRepeatingDataset', 'name_mappings': {'observation.state': ['proprio'], 'action': ['action']}, 'statistic_keys': ['observation.state', 'timestamp', 'action'], 'statistic_name': 'libero_object_no_noops', 'datasets': {'type': 'ParquetDataset', 'data_root_path': 'datasets/libero_object_no_noops_lerobotv2.1', 'transforms': [{'type': 'ProcessParquetInputs', 'embodiment_id': 2, 'parquet_keys': ['observation.state', 'timestamp', 'actions', 'info', 'stats', 'action_masks'], 'video_keys': ['observation.images.image', 'observation.images.wrist_image'], 'name_mappings': {'observation.state': ['states'], 'actions': ['actions']}}, {'type': 'ParquetPrompter'}, {'type': 'ProcessPromptsWithImage', 'max_len': 600, 'num_images': 2, 'tokenizer': {'type': 'PretrainedTokenizer', 'model_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}}, {'type': 'ResizeImages', 'height': 224, 'width': 224}, {'type': 'NormalizeImages', 'means': [[123.515625, 116.04492188, 103.59375], [123.515625, 116.04492188, 103.59375]], 'stds': [[58.27148438, 57.02636719, 57.27539062], [58.27148438, 57.02636719, 57.27539062]]}, {'type': 'NormalizeStatesAndActions', 'action_dim': 32, 'state_dim': 64, 'state_key': 'proprio', 'action_key': 'action', 'norm_type': 'mean_std'}], 'action_window_size': 10, 'action_key': 'action', 'use_delta': False, 'statistic_name': 'libero_object_no_noops', 'window_start_idx': 0}}}, 'runner': {'type': 'FSDPTrainRunner', 'max_epochs': 18, 'learning_rate': 1.5e-05, 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'sampler': None, 'tokenizer': {'type': 'PretrainedTokenizer', 'model_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}, 'collator': {'type': 'DictCollator', 'keys': ['states', 'observation.eepose', 'timestamp', 'images', 'img_masks', 'lang_tokens', 'lang_masks', 'actions', 'action_masks', 'embodiment_ids'], 'meta_keys': ['task_description', 'prompt', 'info', 'stats']}, 'metric': {'type': 'VLAMetric', 'active_trackers': ['jsonl', 'tensorboard'], 'run_dir': '/limx/tos/users/jikun/wk_dir/gr00t/libero_object', 'grad_accumulation_steps': 1, 'window_size': 1, 'hparams': Config (path: configs/gr00t/gr00t_eagle_3b_libero_object_full_finetune.py): {...}, 'run_id': 'gr00t_eagle_3b_libero_object_full_finetune_2026_05_14_02_40_05'}, 'lr_scheduler_type': 'linear-warmup+cosine-decay', 'warmup_ratio': 0.03, 'enable_gradient_checkpointing': False, 'enable_mixed_precision_training': True, 'mixed_precision_dtype': 'bf16', 'change_key_name': False, 'cfg': Config (path: configs/gr00t/gr00t_eagle_3b_libero_object_full_finetune.py): {...}, 'args': Namespace(config='configs/gr00t/gr00t_eagle_3b_libero_object_full_finetune.py', work_dir='/limx/tos/users/jikun/wk_dir/gr00t/libero_object', cfg_options={'runner.metric.active_trackers': ['jsonl', 'tensorboard']}, eval_after_train=True, resume_from=None)}, 'eval': {'type': 'LiberoEvalRunner', 'task_suite_name': 'libero_object', 'model_family': 'pi0', 'eval_chunk_size': 10, 'resize_size': 224, 'num_trials_per_task': 50, 'num_steps_wait': 10, 'seed': 7, 'dataset': {'type': 'LiberoParquetEvalDataset', 'transforms': [{'type': 'ProcessLiberoEvalInputs', 'embodiment_id': 2, 'img_keys': ['agentview_image', 'robot0_eye_in_hand_image']}, {'type': 'TransformImage', 'image_resize_strategy': 'resize-naive', 'input_sizes': [[3, 224, 224], [3, 224, 224]], 'means': [[123.515625, 116.04492188, 103.59375], [123.515625, 116.04492188, 103.59375]], 'stds': [[58.27148438, 57.02636719, 57.27539062], [58.27148438, 57.02636719, 57.27539062]]}, {'type': 'ProcessPromptsWithImage', 'max_len': 600, 'num_images': 2, 'tokenizer': {'type': 'PretrainedTokenizer', 'model_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}}, {'type': 'LiberoProprioFromInputs', 'state_dim': 64, 'norm_type': 'mean_std', 'pos_key': 'robot0_eef_pos', 'quat_key': 'robot0_eef_quat', 'gripper_key': 'robot0_gripper_qpos', 'out_key': 'states'}]}, 'denormalize_action': {'type': 'DenormalizeLiberoAction', 'norm_type': 'mean_std'}}}, 'run_id': 'gr00t_eagle_3b_libero_object_full_finetune_2026_05_14_02_40_05'}, 'lr_scheduler_type': 'linear-warmup+cosine-decay', 'warmup_ratio': 0.03, 'enable_gradient_checkpointing': False, 'enable_mixed_precision_training': True, 'mixed_precision_dtype': 'bf16', 'change_key_name': False, 'cfg': Config (path: configs/gr00t/gr00t_eagle_3b_libero_object_full_finetune.py): {'model': {'type': 'LlavaVLA', 'pretrained_name_or_path': './checkpoints/GR00T-N1.5-3B', 'vlm_backbone': {'type': 'EagleBackbone', 'vlm_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}, 'vla_head': {'type': 'FlowMatchingHead', 'state_dim': 64, 'hidden_size': 1024, 'input_embedding_dim': 1536, 'num_layers': 1, 'num_heads': 4, 'num_inference_timesteps': 4, 'traj_length': 10, 'action_dim': 32, 'ori_action_dim': 7}, 'freeze_vlm_backbone': False, 'name_mapping': {'vlm_backbone.vlm': 'backbone.eagle_model', 'vla_head': 'action_head'}, 'freeze_projector': False}, 'inference_model': {'type': 'LlavaVLA', 'pretrained_name_or_path': './checkpoints/GR00T-N1.5-3B', 'vlm_backbone': {'type': 'EagleInferenceBackbone', 'vlm_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}, 'vla_head': {'type': 'FlowMatchingInferenceHead', 'state_dim': 64, 'hidden_size': 1024, 'input_embedding_dim': 1536, 'num_layers': 1, 'num_heads': 4, 'num_inference_timesteps': 4, 'traj_length': 10, 'action_dim': 32, 'ori_action_dim': 7, 'diffusion_model_cfg': {'attention_head_dim': 48, 'cross_attention_dim': 2048, 'dropout': 0.2, 'final_dropout': True, 'interleave_self_attention': True, 'norm_type': 'ada_norm', 'num_attention_heads': 32, 'num_layers': 16, 'output_dim': 1024, 'positional_embeddings': None}}}, 'train_dataloader': {'per_device_batch_size': 8, 'per_device_num_workers': 4, 'dataset': {'type': 'DistributedRepeatingDataset', 'name_mappings': {'observation.state': ['proprio'], 'action': ['action']}, 'statistic_keys': ['observation.state', 'timestamp', 'action'], 'statistic_name': 'libero_object_no_noops', 'datasets': {'type': 'ParquetDataset', 'data_root_path': 'datasets/libero_object_no_noops_lerobotv2.1', 'transforms': [{'type': 'ProcessParquetInputs', 'embodiment_id': 2, 'parquet_keys': ['observation.state', 'timestamp', 'actions', 'info', 'stats', 'action_masks'], 'video_keys': ['observation.images.image', 'observation.images.wrist_image'], 'name_mappings': {'observation.state': ['states'], 'actions': ['actions']}}, {'type': 'ParquetPrompter'}, {'type': 'ProcessPromptsWithImage', 'max_len': 600, 'num_images': 2, 'tokenizer': {'type': 'PretrainedTokenizer', 'model_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}}, {'type': 'ResizeImages', 'height': 224, 'width': 224}, {'type': 'NormalizeImages', 'means': [[123.515625, 116.04492188, 103.59375], [123.515625, 116.04492188, 103.59375]], 'stds': [[58.27148438, 57.02636719, 57.27539062], [58.27148438, 57.02636719, 57.27539062]]}, {'type': 'NormalizeStatesAndActions', 'action_dim': 32, 'state_dim': 64, 'state_key': 'proprio', 'action_key': 'action', 'norm_type': 'mean_std'}], 'action_window_size': 10, 'action_key': 'action', 'use_delta': False, 'statistic_name': 'libero_object_no_noops', 'window_start_idx': 0}}}, 'runner': {'type': 'FSDPTrainRunner', 'max_epochs': 18, 'learning_rate': 1.5e-05, 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'sampler': None, 'tokenizer': {'type': 'PretrainedTokenizer', 'model_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}, 'collator': {'type': 'DictCollator', 'keys': ['states', 'observation.eepose', 'timestamp', 'images', 'img_masks', 'lang_tokens', 'lang_masks', 'actions', 'action_masks', 'embodiment_ids'], 'meta_keys': ['task_description', 'prompt', 'info', 'stats']}, 'metric': {'type': 'VLAMetric', 'active_trackers': ['jsonl', 'tensorboard'], 'run_dir': '/limx/tos/users/jikun/wk_dir/gr00t/libero_object', 'grad_accumulation_steps': 1, 'window_size': 1, 'hparams': Config (path: configs/gr00t/gr00t_eagle_3b_libero_object_full_finetune.py): {...}, 'run_id': 'gr00t_eagle_3b_libero_object_full_finetune_2026_05_14_02_40_05'}, 'lr_scheduler_type': 'linear-warmup+cosine-decay', 'warmup_ratio': 0.03, 'enable_gradient_checkpointing': False, 'enable_mixed_precision_training': True, 'mixed_precision_dtype': 'bf16', 'change_key_name': False, 'cfg': Config (path: configs/gr00t/gr00t_eagle_3b_libero_object_full_finetune.py): {...}, 'args': Namespace(config='configs/gr00t/gr00t_eagle_3b_libero_object_full_finetune.py', work_dir='/limx/tos/users/jikun/wk_dir/gr00t/libero_object', cfg_options={'runner.metric.active_trackers': ['jsonl', 'tensorboard']}, eval_after_train=True, resume_from=None)}, 'eval': {'type': 'LiberoEvalRunner', 'task_suite_name': 'libero_object', 'model_family': 'pi0', 'eval_chunk_size': 10, 'resize_size': 224, 'num_trials_per_task': 50, 'num_steps_wait': 10, 'seed': 7, 'dataset': {'type': 'LiberoParquetEvalDataset', 'transforms': [{'type': 'ProcessLiberoEvalInputs', 'embodiment_id': 2, 'img_keys': ['agentview_image', 'robot0_eye_in_hand_image']}, {'type': 'TransformImage', 'image_resize_strategy': 'resize-naive', 'input_sizes': [[3, 224, 224], [3, 224, 224]], 'means': [[123.515625, 116.04492188, 103.59375], [123.515625, 116.04492188, 103.59375]], 'stds': [[58.27148438, 57.02636719, 57.27539062], [58.27148438, 57.02636719, 57.27539062]]}, {'type': 'ProcessPromptsWithImage', 'max_len': 600, 'num_images': 2, 'tokenizer': {'type': 'PretrainedTokenizer', 'model_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}}, {'type': 'LiberoProprioFromInputs', 'state_dim': 64, 'norm_type': 'mean_std', 'pos_key': 'robot0_eef_pos', 'quat_key': 'robot0_eef_quat', 'gripper_key': 'robot0_gripper_qpos', 'out_key': 'states'}]}, 'denormalize_action': {'type': 'DenormalizeLiberoAction', 'norm_type': 'mean_std'}}}, 'args': Namespace(config='configs/gr00t/gr00t_eagle_3b_libero_object_full_finetune.py', work_dir='/limx/tos/users/jikun/wk_dir/gr00t/libero_object', cfg_options={'runner.metric.active_trackers': ['jsonl', 'tensorboard']}, eval_after_train=True, resume_from=None)}, 'eval': {'type': 'LiberoEvalRunner', 'task_suite_name': 'libero_object', 'model_family': 'pi0', 'eval_chunk_size': 10, 'resize_size': 224, 'num_trials_per_task': 50, 'num_steps_wait': 10, 'seed': 7, 'dataset': {'type': 'LiberoParquetEvalDataset', 'transforms': [{'type': 'ProcessLiberoEvalInputs', 'embodiment_id': 2, 'img_keys': ['agentview_image', 'robot0_eye_in_hand_image']}, {'type': 'TransformImage', 'image_resize_strategy': 'resize-naive', 'input_sizes': [[3, 224, 224], [3, 224, 224]], 'means': [[123.515625, 116.04492188, 103.59375], [123.515625, 116.04492188, 103.59375]], 'stds': [[58.27148438, 57.02636719, 57.27539062], [58.27148438, 57.02636719, 57.27539062]]}, {'type': 'ProcessPromptsWithImage', 'max_len': 600, 'num_images': 2, 'tokenizer': {'type': 'PretrainedTokenizer', 'model_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}}, {'type': 'LiberoProprioFromInputs', 'state_dim': 64, 'norm_type': 'mean_std', 'pos_key': 'robot0_eef_pos', 'quat_key': 'robot0_eef_quat', 'gripper_key': 'robot0_gripper_qpos', 'out_key': 'states'}]}, 'denormalize_action': {'type': 'DenormalizeLiberoAction', 'norm_type': 'mean_std'}}}", "run_id": "gr00t_eagle_3b_libero_object_full_finetune_2026_05_14_02_40_05"}

gr00t_eagle_3b_libero_object_full_finetune_bs64/tokenizer/added_tokens.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "</box>": 151673,
+  "</img>": 151671,
+  "</interval>": 151679,
+  "</quad>": 151675,
+  "</ref>": 151677,
+  "</think>": 151668,
+  "</tool_call>": 151658,
+  "</tool_response>": 151666,
+  "<IMG_CONTEXT>": 151669,
+  "<box>": 151672,
+  "<img>": 151670,
+  "<interval>": 151678,
+  "<quad>": 151674,
+  "<ref>": 151676,
+  "<think>": 151667,
+  "<tool_call>": 151657,
+  "<tool_response>": 151665,
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|file_sep|>": 151664,
+  "<|fim_middle|>": 151660,
+  "<|fim_pad|>": 151662,
+  "<|fim_prefix|>": 151659,
+  "<|fim_suffix|>": 151661,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|repo_name|>": 151663,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

gr00t_eagle_3b_libero_object_full_finetune_bs64/tokenizer/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

gr00t_eagle_3b_libero_object_full_finetune_bs64/tokenizer/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,42 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>",
+    "<IMG_CONTEXT>",
+    "<img>",
+    "</img>",
+    "<box>",
+    "</box>",
+    "<quad>",
+    "</quad>",
+    "<ref>",
+    "</ref>",
+    "<interval>",
+    "</interval>"
+  ],
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

gr00t_eagle_3b_libero_object_full_finetune_bs64/tokenizer/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,344 @@

+{
+  "add_bos_token": false,
+  "add_eos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151646": {
+      "content": "<|object_ref_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|object_ref_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151648": {
+      "content": "<|box_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151649": {
+      "content": "<|box_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151657": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151658": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151659": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151660": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151661": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151662": {
+      "content": "<|fim_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151663": {
+      "content": "<|repo_name|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151664": {
+      "content": "<|file_sep|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151665": {
+      "content": "<tool_response>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151666": {
+      "content": "</tool_response>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151667": {
+      "content": "<think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151668": {
+      "content": "</think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151669": {
+      "content": "<IMG_CONTEXT>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151670": {
+      "content": "<img>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151671": {
+      "content": "</img>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151672": {
+      "content": "<box>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151673": {
+      "content": "</box>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151674": {
+      "content": "<quad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151675": {
+      "content": "</quad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151676": {
+      "content": "<ref>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151677": {
+      "content": "</ref>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151678": {
+      "content": "<interval>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151679": {
+      "content": "</interval>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>",
+    "<IMG_CONTEXT>",
+    "<img>",
+    "</img>",
+    "<box>",
+    "</box>",
+    "<quad>",
+    "</quad>",
+    "<ref>",
+    "</ref>",
+    "<interval>",
+    "</interval>"
+  ],
+  "auto_map": {
+    "AutoProcessor": "processing_eagle2_5_vl.Eagle2_5_VLProcessor"
+  },
+  "bos_token": null,
+  "chat_template": "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0].role == 'system' %}\n        {{- messages[0].content + '\\n\\n' }}\n    {%- endif %}\n    {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0].role == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}\n    {%- endif %}\n{%- endif %}\n{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n    {%- set index = (messages|length - 1) - loop.index0 %}\n    {%- if ns.multi_step_tool and message.role == \"user\" and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}\n        {%- set ns.multi_step_tool = false %}\n        {%- set ns.last_query_index = index %}\n    {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n        {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {%- set content = message.content %}\n        {%- set reasoning_content = '' %}\n        {%- if message.reasoning_content is defined and message.reasoning_content is not none %}\n            {%- set reasoning_content = message.reasoning_content %}\n        {%- else %}\n            {%- if '</think>' in message.content %}\n                {%- set content = message.content.split('</think>')[-1].lstrip('\\n') %}\n                {%- set reasoning_content = message.content.split('</think>')[0].rstrip('\\n').split('<think>')[-1].lstrip('\\n') %}\n            {%- endif %}\n        {%- endif %}\n        {%- if loop.index0 > ns.last_query_index %}\n            {%- if loop.last or (not loop.last and reasoning_content) %}\n                {{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content.strip('\\n') + '\\n</think>\\n\\n' + content.lstrip('\\n') }}\n            {%- else %}\n                {{- '<|im_start|>' + message.role + '\\n' + content }}\n            {%- endif %}\n        {%- else %}\n            {{- '<|im_start|>' + message.role + '\\n' + content }}\n        {%- endif %}\n        {%- if message.tool_calls %}\n            {%- for tool_call in message.tool_calls %}\n                {%- if (loop.first and content) or (not loop.first) %}\n                    {{- '\\n' }}\n                {%- endif %}\n                {%- if tool_call.function %}\n                    {%- set tool_call = tool_call.function %}\n                {%- endif %}\n                {{- '<tool_call>\\n{\"name\": \"' }}\n                {{- tool_call.name }}\n                {{- '\", \"arguments\": ' }}\n                {%- if tool_call.arguments is string %}\n                    {{- tool_call.arguments }}\n                {%- else %}\n                    {{- tool_call.arguments | tojson }}\n                {%- endif %}\n                {{- '}\\n</tool_call>' }}\n            {%- endfor %}\n        {%- endif %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- message.content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n' }}\n    {%- if enable_thinking is defined and enable_thinking is false %}\n        {{- '<think>\\n\\n</think>\\n\\n' }}\n    {%- endif %}\n{%- endif %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "model_max_length": 16384,
+  "pad_token": "<|endoftext|>",
+  "processor_class": "Eagle2_5_VLProcessor",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

gr00t_eagle_3b_libero_object_full_finetune_bs64/tokenizer/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

gr00t_eagle_3b_libero_object_full_finetune_bs64/vlm_backbone_config.json ADDED Viewed

	@@ -0,0 +1,106 @@

+{
+  "_attn_implementation": "flash_attention_2",
+  "architectures": [
+    "Eagle2_5_VLForConditionalGeneration"
+  ],
+  "auto_map": {
+    "AutoConfig": "configuration_eagle2_5_vl.Eagle2_5_VLConfig",
+    "AutoModel": "modeling_eagle2_5_vl.Eagle2_5_VLForConditionalGeneration"
+  },
+  "downsample_ratio": 0.5,
+  "dynamic_image_size": true,
+  "force_image_size": 224,
+  "image_token_index": 151669,
+  "initializer_range": 0.02,
+  "loss_version": "efficient_v2_cp_head",
+  "max_dynamic_tiles": 12,
+  "min_dynamic_tiles": 1,
+  "mlp_checkpoint": false,
+  "mlp_connector_layers": 1,
+  "model_type": "eagle_2_5_vl",
+  "output_attentions": false,
+  "pad2square": false,
+  "select_layer": -1,
+  "template": "qwen3-chat",
+  "text_config": {
+    "_name_or_path": "Qwen/Qwen3-1.7B",
+    "architectures": [
+      "Qwen3ForCausalLM"
+    ],
+    "attention_bias": false,
+    "attention_dropout": 0,
+    "bos_token_id": 151643,
+    "eos_token_id": 151645,
+    "head_dim": 128,
+    "hidden_act": "silu",
+    "hidden_size": 2048,
+    "initializer_range": 0.02,
+    "intermediate_size": 6144,
+    "layer_types": [
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention"
+    ],
+    "max_position_embeddings": 40960,
+    "max_window_layers": 28,
+    "model_type": "qwen3",
+    "num_attention_heads": 16,
+    "num_hidden_layers": 12,
+    "num_key_value_heads": 8,
+    "rms_norm_eps": 1e-06,
+    "rope_scaling": null,
+    "rope_theta": 1000000,
+    "sliding_window": null,
+    "tie_word_embeddings": true,
+    "torch_dtype": "bfloat16",
+    "use_cache": false,
+    "use_sliding_window": false,
+    "vocab_size": 151680
+  },
+  "tie_word_embeddings": true,
+  "torch_dtype": "bfloat16",
+  "transformers_version": null,
+  "use_backbone_lora": 0,
+  "use_llm_lora": 0,
+  "use_pixel_shuffle": false,
+  "use_thumbnail": true,
+  "vision_config": {
+    "attention_dropout": 0,
+    "hidden_act": "gelu_pytorch_tanh",
+    "hidden_size": 1152,
+    "image_size": 224,
+    "intermediate_size": 4304,
+    "layer_norm_eps": 1e-06,
+    "model_type": "siglip_vision_model",
+    "num_attention_heads": 16,
+    "num_channels": 3,
+    "num_hidden_layers": 27,
+    "patch_size": 14,
+    "torch_dtype": "bfloat16"
+  }
+}

gr00t_eagle_3b_libero_spatial_full_finetune_bs64/checkpoints/step-014904-epoch-18-loss=0.0780.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1a624cec26c0a9d2179f84c668cd2a4eb34f9f5b34e78b67c4d7c5a93676671e
+size 10896783888

gr00t_eagle_3b_libero_spatial_full_finetune_bs64/config.json ADDED Viewed

	@@ -0,0 +1,302 @@

+{
+  "eval": {
+    "dataset": {
+      "transforms": [
+        {
+          "embodiment_id": 2,
+          "img_keys": [
+            "agentview_image",
+            "robot0_eye_in_hand_image"
+          ],
+          "type": "ProcessLiberoEvalInputs"
+        },
+        {
+          "image_resize_strategy": "resize-naive",
+          "input_sizes": [
+            [
+              3,
+              224,
+              224
+            ],
+            [
+              3,
+              224,
+              224
+            ]
+          ],
+          "means": [
+            [
+              123.515625,
+              116.04492188,
+              103.59375
+            ],
+            [
+              123.515625,
+              116.04492188,
+              103.59375
+            ]
+          ],
+          "stds": [
+            [
+              58.27148438,
+              57.02636719,
+              57.27539062
+            ],
+            [
+              58.27148438,
+              57.02636719,
+              57.27539062
+            ]
+          ],
+          "type": "TransformImage"
+        },
+        {
+          "max_len": 600,
+          "num_images": 2,
+          "tokenizer": {
+            "model_path": "fluxvla/models/third_party_models/eagle2_hg_model",
+            "type": "PretrainedTokenizer"
+          },
+          "type": "ProcessPromptsWithImage"
+        },
+        {
+          "gripper_key": "robot0_gripper_qpos",
+          "norm_type": "mean_std",
+          "out_key": "states",
+          "pos_key": "robot0_eef_pos",
+          "quat_key": "robot0_eef_quat",
+          "state_dim": 64,
+          "type": "LiberoProprioFromInputs"
+        }
+      ],
+      "type": "LiberoParquetEvalDataset"
+    },
+    "denormalize_action": {
+      "norm_type": "mean_std",
+      "type": "DenormalizeLiberoAction"
+    },
+    "eval_chunk_size": 10,
+    "model_family": "pi0",
+    "num_steps_wait": 10,
+    "num_trials_per_task": 50,
+    "resize_size": 224,
+    "seed": 7,
+    "task_suite_name": "libero_spatial",
+    "type": "LiberoEvalRunner"
+  },
+  "inference_model": {
+    "pretrained_name_or_path": "./checkpoints/GR00T-N1.5-3B",
+    "type": "LlavaVLA",
+    "vla_head": {
+      "action_dim": 32,
+      "diffusion_model_cfg": {
+        "attention_head_dim": 48,
+        "cross_attention_dim": 2048,
+        "dropout": 0.2,
+        "final_dropout": true,
+        "interleave_self_attention": true,
+        "norm_type": "ada_norm",
+        "num_attention_heads": 32,
+        "num_layers": 16,
+        "output_dim": 1024,
+        "positional_embeddings": null
+      },
+      "hidden_size": 1024,
+      "input_embedding_dim": 1536,
+      "num_heads": 4,
+      "num_inference_timesteps": 4,
+      "num_layers": 1,
+      "ori_action_dim": 7,
+      "state_dim": 64,
+      "traj_length": 10,
+      "type": "FlowMatchingInferenceHead"
+    },
+    "vlm_backbone": {
+      "type": "EagleInferenceBackbone",
+      "vlm_path": "fluxvla/models/third_party_models/eagle2_hg_model"
+    }
+  },
+  "model": {
+    "freeze_projector": false,
+    "freeze_vlm_backbone": false,
+    "name_mapping": {
+      "vla_head": "action_head",
+      "vlm_backbone.vlm": "backbone.eagle_model"
+    },
+    "pretrained_name_or_path": "./checkpoints/GR00T-N1.5-3B",
+    "type": "LlavaVLA",
+    "vla_head": {
+      "action_dim": 32,
+      "hidden_size": 1024,
+      "input_embedding_dim": 1536,
+      "num_heads": 4,
+      "num_inference_timesteps": 4,
+      "num_layers": 1,
+      "ori_action_dim": 7,
+      "state_dim": 64,
+      "traj_length": 10,
+      "type": "FlowMatchingHead"
+    },
+    "vlm_backbone": {
+      "type": "EagleBackbone",
+      "vlm_path": "fluxvla/models/third_party_models/eagle2_hg_model"
+    }
+  },
+  "runner": {
+    "change_key_name": false,
+    "collator": {
+      "keys": [
+        "states",
+        "observation.eepose",
+        "timestamp",
+        "images",
+        "img_masks",
+        "lang_tokens",
+        "lang_masks",
+        "actions",
+        "action_masks",
+        "embodiment_ids"
+      ],
+      "meta_keys": [
+        "task_description",
+        "prompt",
+        "info",
+        "stats"
+      ],
+      "type": "DictCollator"
+    },
+    "enable_gradient_checkpointing": false,
+    "enable_mixed_precision_training": true,
+    "learning_rate": 1.5e-05,
+    "lr_scheduler_type": "linear-warmup+cosine-decay",
+    "max_epochs": 18,
+    "max_grad_norm": 1.0,
+    "metric": {
+      "active_trackers": [
+        "jsonl",
+        "tensorboard"
+      ],
+      "grad_accumulation_steps": 1,
+      "run_dir": "work_dirs",
+      "type": "VLAMetric",
+      "window_size": 1
+    },
+    "mixed_precision_dtype": "bf16",
+    "sampler": null,
+    "tokenizer": {
+      "model_path": "fluxvla/models/third_party_models/eagle2_hg_model",
+      "type": "PretrainedTokenizer"
+    },
+    "type": "FSDPTrainRunner",
+    "warmup_ratio": 0.03,
+    "weight_decay": 0.0
+  },
+  "train_dataloader": {
+    "dataset": {
+      "datasets": {
+        "action_key": "action",
+        "action_window_size": 10,
+        "data_root_path": "datasets/libero_spatial_no_noops_lerobotv2.1",
+        "statistic_name": "libero_spatial_no_noops",
+        "transforms": [
+          {
+            "embodiment_id": 2,
+            "name_mappings": {
+              "actions": [
+                "actions"
+              ],
+              "observation.state": [
+                "states"
+              ]
+            },
+            "parquet_keys": [
+              "observation.state",
+              "timestamp",
+              "actions",
+              "info",
+              "stats",
+              "action_masks"
+            ],
+            "type": "ProcessParquetInputs",
+            "video_keys": [
+              "observation.images.image",
+              "observation.images.wrist_image"
+            ]
+          },
+          {
+            "type": "ParquetPrompter"
+          },
+          {
+            "max_len": 600,
+            "num_images": 2,
+            "tokenizer": {
+              "model_path": "fluxvla/models/third_party_models/eagle2_hg_model",
+              "type": "PretrainedTokenizer"
+            },
+            "type": "ProcessPromptsWithImage"
+          },
+          {
+            "height": 224,
+            "type": "ResizeImages",
+            "width": 224
+          },
+          {
+            "means": [
+              [
+                123.515625,
+                116.04492188,
+                103.59375
+              ],
+              [
+                123.515625,
+                116.04492188,
+                103.59375
+              ]
+            ],
+            "stds": [
+              [
+                58.27148438,
+                57.02636719,
+                57.27539062
+              ],
+              [
+                58.27148438,
+                57.02636719,
+                57.27539062
+              ]
+            ],
+            "type": "NormalizeImages"
+          },
+          {
+            "action_dim": 32,
+            "action_key": "action",
+            "norm_type": "mean_std",
+            "state_dim": 64,
+            "state_key": "proprio",
+            "type": "NormalizeStatesAndActions"
+          }
+        ],
+        "type": "ParquetDataset",
+        "use_delta": false,
+        "window_start_idx": 0
+      },
+      "name_mappings": {
+        "action": [
+          "action"
+        ],
+        "observation.state": [
+          "proprio"
+        ]
+      },
+      "statistic_keys": [
+        "observation.state",
+        "timestamp",
+        "action"
+      ],
+      "statistic_name": "libero_spatial_no_noops",
+      "type": "DistributedRepeatingDataset"
+    },
+    "per_device_batch_size": 8,
+    "per_device_num_workers": 4
+  }
+}

gr00t_eagle_3b_libero_spatial_full_finetune_bs64/config.yaml ADDED Viewed

	@@ -0,0 +1,220 @@

+eval:
+  dataset:
+    transforms:
+    - embodiment_id: 2
+      img_keys:
+      - agentview_image
+      - robot0_eye_in_hand_image
+      type: ProcessLiberoEvalInputs
+    - image_resize_strategy: resize-naive
+      input_sizes:
+      - - 3
+        - 224
+        - 224
+      - - 3
+        - 224
+        - 224
+      means:
+      - - 123.515625
+        - 116.04492188
+        - 103.59375
+      - - 123.515625
+        - 116.04492188
+        - 103.59375
+      stds:
+      - - 58.27148438
+        - 57.02636719
+        - 57.27539062
+      - - 58.27148438
+        - 57.02636719
+        - 57.27539062
+      type: TransformImage
+    - max_len: 600
+      num_images: 2
+      tokenizer:
+        model_path: fluxvla/models/third_party_models/eagle2_hg_model
+        type: PretrainedTokenizer
+      type: ProcessPromptsWithImage
+    - gripper_key: robot0_gripper_qpos
+      norm_type: mean_std
+      out_key: states
+      pos_key: robot0_eef_pos
+      quat_key: robot0_eef_quat
+      state_dim: 64
+      type: LiberoProprioFromInputs
+    type: LiberoParquetEvalDataset
+  denormalize_action:
+    norm_type: mean_std
+    type: DenormalizeLiberoAction
+  eval_chunk_size: 10
+  model_family: pi0
+  num_steps_wait: 10
+  num_trials_per_task: 50
+  resize_size: 224
+  seed: 7
+  task_suite_name: libero_spatial
+  type: LiberoEvalRunner
+inference_model:
+  pretrained_name_or_path: ./checkpoints/GR00T-N1.5-3B
+  type: LlavaVLA
+  vla_head:
+    action_dim: 32
+    diffusion_model_cfg:
+      attention_head_dim: 48
+      cross_attention_dim: 2048
+      dropout: 0.2
+      final_dropout: true
+      interleave_self_attention: true
+      norm_type: ada_norm
+      num_attention_heads: 32
+      num_layers: 16
+      output_dim: 1024
+      positional_embeddings: null
+    hidden_size: 1024
+    input_embedding_dim: 1536
+    num_heads: 4
+    num_inference_timesteps: 4
+    num_layers: 1
+    ori_action_dim: 7
+    state_dim: 64
+    traj_length: 10
+    type: FlowMatchingInferenceHead
+  vlm_backbone:
+    type: EagleInferenceBackbone
+    vlm_path: fluxvla/models/third_party_models/eagle2_hg_model
+model:
+  freeze_projector: false
+  freeze_vlm_backbone: false
+  name_mapping:
+    vla_head: action_head
+    vlm_backbone.vlm: backbone.eagle_model
+  pretrained_name_or_path: ./checkpoints/GR00T-N1.5-3B
+  type: LlavaVLA
+  vla_head:
+    action_dim: 32
+    hidden_size: 1024
+    input_embedding_dim: 1536
+    num_heads: 4
+    num_inference_timesteps: 4
+    num_layers: 1
+    ori_action_dim: 7
+    state_dim: 64
+    traj_length: 10
+    type: FlowMatchingHead
+  vlm_backbone:
+    type: EagleBackbone
+    vlm_path: fluxvla/models/third_party_models/eagle2_hg_model
+runner:
+  change_key_name: false
+  collator:
+    keys:
+    - states
+    - observation.eepose
+    - timestamp
+    - images
+    - img_masks
+    - lang_tokens
+    - lang_masks
+    - actions
+    - action_masks
+    - embodiment_ids
+    meta_keys:
+    - task_description
+    - prompt
+    - info
+    - stats
+    type: DictCollator
+  enable_gradient_checkpointing: false
+  enable_mixed_precision_training: true
+  learning_rate: 1.5e-05
+  lr_scheduler_type: linear-warmup+cosine-decay
+  max_epochs: 18
+  max_grad_norm: 1.0
+  metric:
+    active_trackers:
+    - jsonl
+    - tensorboard
+    grad_accumulation_steps: 1
+    run_dir: work_dirs
+    type: VLAMetric
+    window_size: 1
+  mixed_precision_dtype: bf16
+  sampler: null
+  tokenizer:
+    model_path: fluxvla/models/third_party_models/eagle2_hg_model
+    type: PretrainedTokenizer
+  type: FSDPTrainRunner
+  warmup_ratio: 0.03
+  weight_decay: 0.0
+train_dataloader:
+  dataset:
+    datasets:
+      action_key: action
+      action_window_size: 10
+      data_root_path: datasets/libero_spatial_no_noops_lerobotv2.1
+      statistic_name: libero_spatial_no_noops
+      transforms:
+      - embodiment_id: 2
+        name_mappings:
+          actions:
+          - actions
+          observation.state:
+          - states
+        parquet_keys:
+        - observation.state
+        - timestamp
+        - actions
+        - info
+        - stats
+        - action_masks
+        type: ProcessParquetInputs
+        video_keys:
+        - observation.images.image
+        - observation.images.wrist_image
+      - type: ParquetPrompter
+      - max_len: 600
+        num_images: 2
+        tokenizer:
+          model_path: fluxvla/models/third_party_models/eagle2_hg_model
+          type: PretrainedTokenizer
+        type: ProcessPromptsWithImage
+      - height: 224
+        type: ResizeImages
+        width: 224
+      - means:
+        - - 123.515625
+          - 116.04492188
+          - 103.59375
+        - - 123.515625
+          - 116.04492188
+          - 103.59375
+        stds:
+        - - 58.27148438
+          - 57.02636719
+          - 57.27539062
+        - - 58.27148438
+          - 57.02636719
+          - 57.27539062
+        type: NormalizeImages
+      - action_dim: 32
+        action_key: action
+        norm_type: mean_std
+        state_dim: 64
+        state_key: proprio
+        type: NormalizeStatesAndActions
+      type: ParquetDataset
+      use_delta: false
+      window_start_idx: 0
+    name_mappings:
+      action:
+      - action
+      observation.state:
+      - proprio
+    statistic_keys:
+    - observation.state
+    - timestamp
+    - action
+    statistic_name: libero_spatial_no_noops
+    type: DistributedRepeatingDataset
+  per_device_batch_size: 8
+  per_device_num_workers: 4

gr00t_eagle_3b_libero_spatial_full_finetune_bs64/dataset_statistics.json ADDED Viewed

	@@ -0,0 +1,104 @@

+{
+  "libero_spatial_no_noops": {
+    "proprio": {
+      "mean": [
+        -0.024462566693947342,
+        0.10653030478388664,
+        1.0580495716300307,
+        3.062855007870368,
+        -0.10464045916348884,
+        0.08307320236969534,
+        0.019954609054627596,
+        -0.02016269208612657
+      ],
+      "std": [
+        0.03894316411915835,
+        0.04873628070932603,
+        0.03692094784082842,
+        0.0369502396792774,
+        0.14538513627309543,
+        0.07695788947742314,
+        0.006102641532497049,
+        0.006049884044502419
+      ],
+      "min": [
+        -0.3095473051071167,
+        -0.29250794649124146,
+        0.9095591306686401,
+        2.497488260269165,
+        -1.8006486892700195,
+        -0.7207611203193665,
+        -0.0004703797458205372,
+        -0.041536275297403336
+      ],
+      "max": [
+        0.1759040206670761,
+        0.3904820382595062,
+        1.3290715217590332,
+        3.4566118717193604,
+        1.2268599271774292,
+        1.0429412126541138,
+        0.041053611785173416,
+        0.000775813648942858
+      ],
+      "q01": null,
+      "q99": null
+    },
+    "timestamp": {
+      "mean": [
+        3.1281914291108173
+      ],
+      "std": [
+        1.9190018719668336
+      ],
+      "min": [
+        0.0
+      ],
+      "max": [
+        9.6
+      ],
+      "q01": null,
+      "q99": null
+    },
+    "action": {
+      "mean": [
+        0.15312488430795423,
+        0.13707241597825376,
+        -0.15526779033841448,
+        -0.005176474488725037,
+        -0.011208756940533639,
+        -0.02019425420384803,
+        0.4578818200364616
+      ],
+      "std": [
+        0.15599777645651164,
+        0.13125442554438385,
+        0.19226097301543327,
+        0.014084604392168992,
+        0.02738322007326005,
+        0.021779582921450876,
+        0.18831055119433956
+      ],
+      "min": [
+        -0.9375,
+        -0.9375,
+        -0.9375,
+        -0.1875,
+        -0.3675000071525574,
+        -0.36000001430511475,
+        0.0
+      ],
+      "max": [
+        0.9375,
+        0.9375,
+        0.9375,
+        0.1971428543329239,
+        0.33642858266830444,
+        0.375,
+        1.0
+      ],
+      "q01": null,
+      "q99": null
+    }
+  }
+}

gr00t_eagle_3b_libero_spatial_full_finetune_bs64/gr00t_eagle_3b_libero_spatial_full_finetune_2026_05_14_02_41_01.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

gr00t_eagle_3b_libero_spatial_full_finetune_bs64/run-metrics.jsonl ADDED Viewed

	@@ -0,0 +1 @@

+ {"hparams": "{'model': {'type': 'LlavaVLA', 'pretrained_name_or_path': './checkpoints/GR00T-N1.5-3B', 'vlm_backbone': {'type': 'EagleBackbone', 'vlm_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}, 'vla_head': {'type': 'FlowMatchingHead', 'state_dim': 64, 'hidden_size': 1024, 'input_embedding_dim': 1536, 'num_layers': 1, 'num_heads': 4, 'num_inference_timesteps': 4, 'traj_length': 10, 'action_dim': 32, 'ori_action_dim': 7}, 'freeze_vlm_backbone': False, 'name_mapping': {'vlm_backbone.vlm': 'backbone.eagle_model', 'vla_head': 'action_head'}, 'freeze_projector': False}, 'inference_model': {'type': 'LlavaVLA', 'pretrained_name_or_path': './checkpoints/GR00T-N1.5-3B', 'vlm_backbone': {'type': 'EagleInferenceBackbone', 'vlm_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}, 'vla_head': {'type': 'FlowMatchingInferenceHead', 'state_dim': 64, 'hidden_size': 1024, 'input_embedding_dim': 1536, 'num_layers': 1, 'num_heads': 4, 'num_inference_timesteps': 4, 'traj_length': 10, 'action_dim': 32, 'ori_action_dim': 7, 'diffusion_model_cfg': {'attention_head_dim': 48, 'cross_attention_dim': 2048, 'dropout': 0.2, 'final_dropout': True, 'interleave_self_attention': True, 'norm_type': 'ada_norm', 'num_attention_heads': 32, 'num_layers': 16, 'output_dim': 1024, 'positional_embeddings': None}}}, 'train_dataloader': {'per_device_batch_size': 8, 'per_device_num_workers': 4, 'dataset': {'type': 'DistributedRepeatingDataset', 'name_mappings': {'observation.state': ['proprio'], 'action': ['action']}, 'statistic_keys': ['observation.state', 'timestamp', 'action'], 'statistic_name': 'libero_spatial_no_noops', 'datasets': {'type': 'ParquetDataset', 'data_root_path': 'datasets/libero_spatial_no_noops_lerobotv2.1', 'transforms': [{'type': 'ProcessParquetInputs', 'embodiment_id': 2, 'parquet_keys': ['observation.state', 'timestamp', 'actions', 'info', 'stats', 'action_masks'], 'video_keys': ['observation.images.image', 'observation.images.wrist_image'], 'name_mappings': {'observation.state': ['states'], 'actions': ['actions']}}, {'type': 'ParquetPrompter'}, {'type': 'ProcessPromptsWithImage', 'max_len': 600, 'num_images': 2, 'tokenizer': {'type': 'PretrainedTokenizer', 'model_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}}, {'type': 'ResizeImages', 'height': 224, 'width': 224}, {'type': 'NormalizeImages', 'means': [[123.515625, 116.04492188, 103.59375], [123.515625, 116.04492188, 103.59375]], 'stds': [[58.27148438, 57.02636719, 57.27539062], [58.27148438, 57.02636719, 57.27539062]]}, {'type': 'NormalizeStatesAndActions', 'action_dim': 32, 'state_dim': 64, 'state_key': 'proprio', 'action_key': 'action', 'norm_type': 'mean_std'}], 'action_window_size': 10, 'action_key': 'action', 'use_delta': False, 'statistic_name': 'libero_spatial_no_noops', 'window_start_idx': 0}}}, 'runner': {'type': 'FSDPTrainRunner', 'max_epochs': 18, 'learning_rate': 1.5e-05, 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'sampler': None, 'tokenizer': {'type': 'PretrainedTokenizer', 'model_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}, 'collator': {'type': 'DictCollator', 'keys': ['states', 'observation.eepose', 'timestamp', 'images', 'img_masks', 'lang_tokens', 'lang_masks', 'actions', 'action_masks', 'embodiment_ids'], 'meta_keys': ['task_description', 'prompt', 'info', 'stats']}, 'metric': {'type': 'VLAMetric', 'active_trackers': ['jsonl', 'tensorboard'], 'run_dir': '/limx/tos/users/jikun/wk_dir/gr00t/libero_spatial', 'grad_accumulation_steps': 1, 'window_size': 1, 'hparams': Config (path: configs/gr00t/gr00t_eagle_3b_libero_spatial_full_finetune.py): {'model': {'type': 'LlavaVLA', 'pretrained_name_or_path': './checkpoints/GR00T-N1.5-3B', 'vlm_backbone': {'type': 'EagleBackbone', 'vlm_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}, 'vla_head': {'type': 'FlowMatchingHead', 'state_dim': 64, 'hidden_size': 1024, 'input_embedding_dim': 1536, 'num_layers': 1, 'num_heads': 4, 'num_inference_timesteps': 4, 'traj_length': 10, 'action_dim': 32, 'ori_action_dim': 7}, 'freeze_vlm_backbone': False, 'name_mapping': {'vlm_backbone.vlm': 'backbone.eagle_model', 'vla_head': 'action_head'}, 'freeze_projector': False}, 'inference_model': {'type': 'LlavaVLA', 'pretrained_name_or_path': './checkpoints/GR00T-N1.5-3B', 'vlm_backbone': {'type': 'EagleInferenceBackbone', 'vlm_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}, 'vla_head': {'type': 'FlowMatchingInferenceHead', 'state_dim': 64, 'hidden_size': 1024, 'input_embedding_dim': 1536, 'num_layers': 1, 'num_heads': 4, 'num_inference_timesteps': 4, 'traj_length': 10, 'action_dim': 32, 'ori_action_dim': 7, 'diffusion_model_cfg': {'attention_head_dim': 48, 'cross_attention_dim': 2048, 'dropout': 0.2, 'final_dropout': True, 'interleave_self_attention': True, 'norm_type': 'ada_norm', 'num_attention_heads': 32, 'num_layers': 16, 'output_dim': 1024, 'positional_embeddings': None}}}, 'train_dataloader': {'per_device_batch_size': 8, 'per_device_num_workers': 4, 'dataset': {'type': 'DistributedRepeatingDataset', 'name_mappings': {'observation.state': ['proprio'], 'action': ['action']}, 'statistic_keys': ['observation.state', 'timestamp', 'action'], 'statistic_name': 'libero_spatial_no_noops', 'datasets': {'type': 'ParquetDataset', 'data_root_path': 'datasets/libero_spatial_no_noops_lerobotv2.1', 'transforms': [{'type': 'ProcessParquetInputs', 'embodiment_id': 2, 'parquet_keys': ['observation.state', 'timestamp', 'actions', 'info', 'stats', 'action_masks'], 'video_keys': ['observation.images.image', 'observation.images.wrist_image'], 'name_mappings': {'observation.state': ['states'], 'actions': ['actions']}}, {'type': 'ParquetPrompter'}, {'type': 'ProcessPromptsWithImage', 'max_len': 600, 'num_images': 2, 'tokenizer': {'type': 'PretrainedTokenizer', 'model_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}}, {'type': 'ResizeImages', 'height': 224, 'width': 224}, {'type': 'NormalizeImages', 'means': [[123.515625, 116.04492188, 103.59375], [123.515625, 116.04492188, 103.59375]], 'stds': [[58.27148438, 57.02636719, 57.27539062], [58.27148438, 57.02636719, 57.27539062]]}, {'type': 'NormalizeStatesAndActions', 'action_dim': 32, 'state_dim': 64, 'state_key': 'proprio', 'action_key': 'action', 'norm_type': 'mean_std'}], 'action_window_size': 10, 'action_key': 'action', 'use_delta': False, 'statistic_name': 'libero_spatial_no_noops', 'window_start_idx': 0}}}, 'runner': {'type': 'FSDPTrainRunner', 'max_epochs': 18, 'learning_rate': 1.5e-05, 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'sampler': None, 'tokenizer': {'type': 'PretrainedTokenizer', 'model_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}, 'collator': {'type': 'DictCollator', 'keys': ['states', 'observation.eepose', 'timestamp', 'images', 'img_masks', 'lang_tokens', 'lang_masks', 'actions', 'action_masks', 'embodiment_ids'], 'meta_keys': ['task_description', 'prompt', 'info', 'stats']}, 'metric': {'type': 'VLAMetric', 'active_trackers': ['jsonl', 'tensorboard'], 'run_dir': '/limx/tos/users/jikun/wk_dir/gr00t/libero_spatial', 'grad_accumulation_steps': 1, 'window_size': 1, 'hparams': Config (path: configs/gr00t/gr00t_eagle_3b_libero_spatial_full_finetune.py): {...}, 'run_id': 'gr00t_eagle_3b_libero_spatial_full_finetune_2026_05_14_02_41_01'}, 'lr_scheduler_type': 'linear-warmup+cosine-decay', 'warmup_ratio': 0.03, 'enable_gradient_checkpointing': False, 'enable_mixed_precision_training': True, 'mixed_precision_dtype': 'bf16', 'change_key_name': False, 'cfg': Config (path: configs/gr00t/gr00t_eagle_3b_libero_spatial_full_finetune.py): {...}, 'args': Namespace(config='configs/gr00t/gr00t_eagle_3b_libero_spatial_full_finetune.py', work_dir='/limx/tos/users/jikun/wk_dir/gr00t/libero_spatial', cfg_options={'runner.metric.active_trackers': ['jsonl', 'tensorboard']}, eval_after_train=True, resume_from=None)}, 'eval': {'type': 'LiberoEvalRunner', 'task_suite_name': 'libero_spatial', 'model_family': 'pi0', 'eval_chunk_size': 10, 'resize_size': 224, 'num_trials_per_task': 50, 'num_steps_wait': 10, 'seed': 7, 'dataset': {'type': 'LiberoParquetEvalDataset', 'transforms': [{'type': 'ProcessLiberoEvalInputs', 'embodiment_id': 2, 'img_keys': ['agentview_image', 'robot0_eye_in_hand_image']}, {'type': 'TransformImage', 'image_resize_strategy': 'resize-naive', 'input_sizes': [[3, 224, 224], [3, 224, 224]], 'means': [[123.515625, 116.04492188, 103.59375], [123.515625, 116.04492188, 103.59375]], 'stds': [[58.27148438, 57.02636719, 57.27539062], [58.27148438, 57.02636719, 57.27539062]]}, {'type': 'ProcessPromptsWithImage', 'max_len': 600, 'num_images': 2, 'tokenizer': {'type': 'PretrainedTokenizer', 'model_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}}, {'type': 'LiberoProprioFromInputs', 'state_dim': 64, 'norm_type': 'mean_std', 'pos_key': 'robot0_eef_pos', 'quat_key': 'robot0_eef_quat', 'gripper_key': 'robot0_gripper_qpos', 'out_key': 'states'}]}, 'denormalize_action': {'type': 'DenormalizeLiberoAction', 'norm_type': 'mean_std'}}}, 'run_id': 'gr00t_eagle_3b_libero_spatial_full_finetune_2026_05_14_02_41_01'}, 'lr_scheduler_type': 'linear-warmup+cosine-decay', 'warmup_ratio': 0.03, 'enable_gradient_checkpointing': False, 'enable_mixed_precision_training': True, 'mixed_precision_dtype': 'bf16', 'change_key_name': False, 'cfg': Config (path: configs/gr00t/gr00t_eagle_3b_libero_spatial_full_finetune.py): {'model': {'type': 'LlavaVLA', 'pretrained_name_or_path': './checkpoints/GR00T-N1.5-3B', 'vlm_backbone': {'type': 'EagleBackbone', 'vlm_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}, 'vla_head': {'type': 'FlowMatchingHead', 'state_dim': 64, 'hidden_size': 1024, 'input_embedding_dim': 1536, 'num_layers': 1, 'num_heads': 4, 'num_inference_timesteps': 4, 'traj_length': 10, 'action_dim': 32, 'ori_action_dim': 7}, 'freeze_vlm_backbone': False, 'name_mapping': {'vlm_backbone.vlm': 'backbone.eagle_model', 'vla_head': 'action_head'}, 'freeze_projector': False}, 'inference_model': {'type': 'LlavaVLA', 'pretrained_name_or_path': './checkpoints/GR00T-N1.5-3B', 'vlm_backbone': {'type': 'EagleInferenceBackbone', 'vlm_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}, 'vla_head': {'type': 'FlowMatchingInferenceHead', 'state_dim': 64, 'hidden_size': 1024, 'input_embedding_dim': 1536, 'num_layers': 1, 'num_heads': 4, 'num_inference_timesteps': 4, 'traj_length': 10, 'action_dim': 32, 'ori_action_dim': 7, 'diffusion_model_cfg': {'attention_head_dim': 48, 'cross_attention_dim': 2048, 'dropout': 0.2, 'final_dropout': True, 'interleave_self_attention': True, 'norm_type': 'ada_norm', 'num_attention_heads': 32, 'num_layers': 16, 'output_dim': 1024, 'positional_embeddings': None}}}, 'train_dataloader': {'per_device_batch_size': 8, 'per_device_num_workers': 4, 'dataset': {'type': 'DistributedRepeatingDataset', 'name_mappings': {'observation.state': ['proprio'], 'action': ['action']}, 'statistic_keys': ['observation.state', 'timestamp', 'action'], 'statistic_name': 'libero_spatial_no_noops', 'datasets': {'type': 'ParquetDataset', 'data_root_path': 'datasets/libero_spatial_no_noops_lerobotv2.1', 'transforms': [{'type': 'ProcessParquetInputs', 'embodiment_id': 2, 'parquet_keys': ['observation.state', 'timestamp', 'actions', 'info', 'stats', 'action_masks'], 'video_keys': ['observation.images.image', 'observation.images.wrist_image'], 'name_mappings': {'observation.state': ['states'], 'actions': ['actions']}}, {'type': 'ParquetPrompter'}, {'type': 'ProcessPromptsWithImage', 'max_len': 600, 'num_images': 2, 'tokenizer': {'type': 'PretrainedTokenizer', 'model_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}}, {'type': 'ResizeImages', 'height': 224, 'width': 224}, {'type': 'NormalizeImages', 'means': [[123.515625, 116.04492188, 103.59375], [123.515625, 116.04492188, 103.59375]], 'stds': [[58.27148438, 57.02636719, 57.27539062], [58.27148438, 57.02636719, 57.27539062]]}, {'type': 'NormalizeStatesAndActions', 'action_dim': 32, 'state_dim': 64, 'state_key': 'proprio', 'action_key': 'action', 'norm_type': 'mean_std'}], 'action_window_size': 10, 'action_key': 'action', 'use_delta': False, 'statistic_name': 'libero_spatial_no_noops', 'window_start_idx': 0}}}, 'runner': {'type': 'FSDPTrainRunner', 'max_epochs': 18, 'learning_rate': 1.5e-05, 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'sampler': None, 'tokenizer': {'type': 'PretrainedTokenizer', 'model_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}, 'collator': {'type': 'DictCollator', 'keys': ['states', 'observation.eepose', 'timestamp', 'images', 'img_masks', 'lang_tokens', 'lang_masks', 'actions', 'action_masks', 'embodiment_ids'], 'meta_keys': ['task_description', 'prompt', 'info', 'stats']}, 'metric': {'type': 'VLAMetric', 'active_trackers': ['jsonl', 'tensorboard'], 'run_dir': '/limx/tos/users/jikun/wk_dir/gr00t/libero_spatial', 'grad_accumulation_steps': 1, 'window_size': 1, 'hparams': Config (path: configs/gr00t/gr00t_eagle_3b_libero_spatial_full_finetune.py): {...}, 'run_id': 'gr00t_eagle_3b_libero_spatial_full_finetune_2026_05_14_02_41_01'}, 'lr_scheduler_type': 'linear-warmup+cosine-decay', 'warmup_ratio': 0.03, 'enable_gradient_checkpointing': False, 'enable_mixed_precision_training': True, 'mixed_precision_dtype': 'bf16', 'change_key_name': False, 'cfg': Config (path: configs/gr00t/gr00t_eagle_3b_libero_spatial_full_finetune.py): {...}, 'args': Namespace(config='configs/gr00t/gr00t_eagle_3b_libero_spatial_full_finetune.py', work_dir='/limx/tos/users/jikun/wk_dir/gr00t/libero_spatial', cfg_options={'runner.metric.active_trackers': ['jsonl', 'tensorboard']}, eval_after_train=True, resume_from=None)}, 'eval': {'type': 'LiberoEvalRunner', 'task_suite_name': 'libero_spatial', 'model_family': 'pi0', 'eval_chunk_size': 10, 'resize_size': 224, 'num_trials_per_task': 50, 'num_steps_wait': 10, 'seed': 7, 'dataset': {'type': 'LiberoParquetEvalDataset', 'transforms': [{'type': 'ProcessLiberoEvalInputs', 'embodiment_id': 2, 'img_keys': ['agentview_image', 'robot0_eye_in_hand_image']}, {'type': 'TransformImage', 'image_resize_strategy': 'resize-naive', 'input_sizes': [[3, 224, 224], [3, 224, 224]], 'means': [[123.515625, 116.04492188, 103.59375], [123.515625, 116.04492188, 103.59375]], 'stds': [[58.27148438, 57.02636719, 57.27539062], [58.27148438, 57.02636719, 57.27539062]]}, {'type': 'ProcessPromptsWithImage', 'max_len': 600, 'num_images': 2, 'tokenizer': {'type': 'PretrainedTokenizer', 'model_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}}, {'type': 'LiberoProprioFromInputs', 'state_dim': 64, 'norm_type': 'mean_std', 'pos_key': 'robot0_eef_pos', 'quat_key': 'robot0_eef_quat', 'gripper_key': 'robot0_gripper_qpos', 'out_key': 'states'}]}, 'denormalize_action': {'type': 'DenormalizeLiberoAction', 'norm_type': 'mean_std'}}}, 'args': Namespace(config='configs/gr00t/gr00t_eagle_3b_libero_spatial_full_finetune.py', work_dir='/limx/tos/users/jikun/wk_dir/gr00t/libero_spatial', cfg_options={'runner.metric.active_trackers': ['jsonl', 'tensorboard']}, eval_after_train=True, resume_from=None)}, 'eval': {'type': 'LiberoEvalRunner', 'task_suite_name': 'libero_spatial', 'model_family': 'pi0', 'eval_chunk_size': 10, 'resize_size': 224, 'num_trials_per_task': 50, 'num_steps_wait': 10, 'seed': 7, 'dataset': {'type': 'LiberoParquetEvalDataset', 'transforms': [{'type': 'ProcessLiberoEvalInputs', 'embodiment_id': 2, 'img_keys': ['agentview_image', 'robot0_eye_in_hand_image']}, {'type': 'TransformImage', 'image_resize_strategy': 'resize-naive', 'input_sizes': [[3, 224, 224], [3, 224, 224]], 'means': [[123.515625, 116.04492188, 103.59375], [123.515625, 116.04492188, 103.59375]], 'stds': [[58.27148438, 57.02636719, 57.27539062], [58.27148438, 57.02636719, 57.27539062]]}, {'type': 'ProcessPromptsWithImage', 'max_len': 600, 'num_images': 2, 'tokenizer': {'type': 'PretrainedTokenizer', 'model_path': 'fluxvla/models/third_party_models/eagle2_hg_model'}}, {'type': 'LiberoProprioFromInputs', 'state_dim': 64, 'norm_type': 'mean_std', 'pos_key': 'robot0_eef_pos', 'quat_key': 'robot0_eef_quat', 'gripper_key': 'robot0_gripper_qpos', 'out_key': 'states'}]}, 'denormalize_action': {'type': 'DenormalizeLiberoAction', 'norm_type': 'mean_std'}}}", "run_id": "gr00t_eagle_3b_libero_spatial_full_finetune_2026_05_14_02_41_01"}

gr00t_eagle_3b_libero_spatial_full_finetune_bs64/tokenizer/added_tokens.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "</box>": 151673,
+  "</img>": 151671,
+  "</interval>": 151679,
+  "</quad>": 151675,
+  "</ref>": 151677,
+  "</think>": 151668,
+  "</tool_call>": 151658,
+  "</tool_response>": 151666,
+  "<IMG_CONTEXT>": 151669,
+  "<box>": 151672,
+  "<img>": 151670,
+  "<interval>": 151678,
+  "<quad>": 151674,
+  "<ref>": 151676,
+  "<think>": 151667,
+  "<tool_call>": 151657,
+  "<tool_response>": 151665,
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|file_sep|>": 151664,
+  "<|fim_middle|>": 151660,
+  "<|fim_pad|>": 151662,
+  "<|fim_prefix|>": 151659,
+  "<|fim_suffix|>": 151661,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|repo_name|>": 151663,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

gr00t_eagle_3b_libero_spatial_full_finetune_bs64/tokenizer/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

gr00t_eagle_3b_libero_spatial_full_finetune_bs64/tokenizer/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,42 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>",
+    "<IMG_CONTEXT>",
+    "<img>",
+    "</img>",
+    "<box>",
+    "</box>",
+    "<quad>",
+    "</quad>",
+    "<ref>",
+    "</ref>",
+    "<interval>",
+    "</interval>"
+  ],
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

gr00t_eagle_3b_libero_spatial_full_finetune_bs64/tokenizer/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,344 @@

+{
+  "add_bos_token": false,
+  "add_eos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151646": {
+      "content": "<|object_ref_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|object_ref_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151648": {
+      "content": "<|box_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151649": {
+      "content": "<|box_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151657": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151658": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151659": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151660": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151661": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151662": {
+      "content": "<|fim_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151663": {
+      "content": "<|repo_name|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151664": {
+      "content": "<|file_sep|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151665": {
+      "content": "<tool_response>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151666": {
+      "content": "</tool_response>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151667": {
+      "content": "<think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151668": {
+      "content": "</think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151669": {
+      "content": "<IMG_CONTEXT>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151670": {
+      "content": "<img>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151671": {
+      "content": "</img>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151672": {
+      "content": "<box>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151673": {
+      "content": "</box>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151674": {
+      "content": "<quad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151675": {
+      "content": "</quad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151676": {
+      "content": "<ref>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151677": {
+      "content": "</ref>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151678": {
+      "content": "<interval>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151679": {
+      "content": "</interval>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>",
+    "<IMG_CONTEXT>",
+    "<img>",
+    "</img>",
+    "<box>",
+    "</box>",
+    "<quad>",
+    "</quad>",
+    "<ref>",
+    "</ref>",
+    "<interval>",
+    "</interval>"
+  ],
+  "auto_map": {
+    "AutoProcessor": "processing_eagle2_5_vl.Eagle2_5_VLProcessor"
+  },
+  "bos_token": null,
+  "chat_template": "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0].role == 'system' %}\n        {{- messages[0].content + '\\n\\n' }}\n    {%- endif %}\n    {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0].role == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}\n    {%- endif %}\n{%- endif %}\n{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n    {%- set index = (messages|length - 1) - loop.index0 %}\n    {%- if ns.multi_step_tool and message.role == \"user\" and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}\n        {%- set ns.multi_step_tool = false %}\n        {%- set ns.last_query_index = index %}\n    {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n        {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {%- set content = message.content %}\n        {%- set reasoning_content = '' %}\n        {%- if message.reasoning_content is defined and message.reasoning_content is not none %}\n            {%- set reasoning_content = message.reasoning_content %}\n        {%- else %}\n            {%- if '</think>' in message.content %}\n                {%- set content = message.content.split('</think>')[-1].lstrip('\\n') %}\n                {%- set reasoning_content = message.content.split('</think>')[0].rstrip('\\n').split('<think>')[-1].lstrip('\\n') %}\n            {%- endif %}\n        {%- endif %}\n        {%- if loop.index0 > ns.last_query_index %}\n            {%- if loop.last or (not loop.last and reasoning_content) %}\n                {{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content.strip('\\n') + '\\n</think>\\n\\n' + content.lstrip('\\n') }}\n            {%- else %}\n                {{- '<|im_start|>' + message.role + '\\n' + content }}\n            {%- endif %}\n        {%- else %}\n            {{- '<|im_start|>' + message.role + '\\n' + content }}\n        {%- endif %}\n        {%- if message.tool_calls %}\n            {%- for tool_call in message.tool_calls %}\n                {%- if (loop.first and content) or (not loop.first) %}\n                    {{- '\\n' }}\n                {%- endif %}\n                {%- if tool_call.function %}\n                    {%- set tool_call = tool_call.function %}\n                {%- endif %}\n                {{- '<tool_call>\\n{\"name\": \"' }}\n                {{- tool_call.name }}\n                {{- '\", \"arguments\": ' }}\n                {%- if tool_call.arguments is string %}\n                    {{- tool_call.arguments }}\n                {%- else %}\n                    {{- tool_call.arguments | tojson }}\n                {%- endif %}\n                {{- '}\\n</tool_call>' }}\n            {%- endfor %}\n        {%- endif %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- message.content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n' }}\n    {%- if enable_thinking is defined and enable_thinking is false %}\n        {{- '<think>\\n\\n</think>\\n\\n' }}\n    {%- endif %}\n{%- endif %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "model_max_length": 16384,
+  "pad_token": "<|endoftext|>",
+  "processor_class": "Eagle2_5_VLProcessor",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

gr00t_eagle_3b_libero_spatial_full_finetune_bs64/tokenizer/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

gr00t_eagle_3b_libero_spatial_full_finetune_bs64/vlm_backbone_config.json ADDED Viewed

	@@ -0,0 +1,106 @@

+{
+  "_attn_implementation": "flash_attention_2",
+  "architectures": [
+    "Eagle2_5_VLForConditionalGeneration"
+  ],
+  "auto_map": {
+    "AutoConfig": "configuration_eagle2_5_vl.Eagle2_5_VLConfig",
+    "AutoModel": "modeling_eagle2_5_vl.Eagle2_5_VLForConditionalGeneration"
+  },
+  "downsample_ratio": 0.5,
+  "dynamic_image_size": true,
+  "force_image_size": 224,
+  "image_token_index": 151669,
+  "initializer_range": 0.02,
+  "loss_version": "efficient_v2_cp_head",
+  "max_dynamic_tiles": 12,
+  "min_dynamic_tiles": 1,
+  "mlp_checkpoint": false,
+  "mlp_connector_layers": 1,
+  "model_type": "eagle_2_5_vl",
+  "output_attentions": false,
+  "pad2square": false,
+  "select_layer": -1,
+  "template": "qwen3-chat",
+  "text_config": {
+    "_name_or_path": "Qwen/Qwen3-1.7B",
+    "architectures": [
+      "Qwen3ForCausalLM"
+    ],
+    "attention_bias": false,
+    "attention_dropout": 0,
+    "bos_token_id": 151643,
+    "eos_token_id": 151645,
+    "head_dim": 128,
+    "hidden_act": "silu",
+    "hidden_size": 2048,
+    "initializer_range": 0.02,
+    "intermediate_size": 6144,
+    "layer_types": [
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention"
+    ],
+    "max_position_embeddings": 40960,
+    "max_window_layers": 28,
+    "model_type": "qwen3",
+    "num_attention_heads": 16,
+    "num_hidden_layers": 12,
+    "num_key_value_heads": 8,
+    "rms_norm_eps": 1e-06,
+    "rope_scaling": null,
+    "rope_theta": 1000000,
+    "sliding_window": null,
+    "tie_word_embeddings": true,
+    "torch_dtype": "bfloat16",
+    "use_cache": false,
+    "use_sliding_window": false,
+    "vocab_size": 151680
+  },
+  "tie_word_embeddings": true,
+  "torch_dtype": "bfloat16",
+  "transformers_version": null,
+  "use_backbone_lora": 0,
+  "use_llm_lora": 0,
+  "use_pixel_shuffle": false,
+  "use_thumbnail": true,
+  "vision_config": {
+    "attention_dropout": 0,
+    "hidden_act": "gelu_pytorch_tanh",
+    "hidden_size": 1152,
+    "image_size": 224,
+    "intermediate_size": 4304,
+    "layer_norm_eps": 1e-06,
+    "model_type": "siglip_vision_model",
+    "num_attention_heads": 16,
+    "num_channels": 3,
+    "num_hidden_layers": 27,
+    "patch_size": 14,
+    "torch_dtype": "bfloat16"
+  }
+}

pi05_paligemma_libero_10_full_finetune_bs64/checkpoints/step-038064-epoch-24-loss=0.0170.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4a79542cbb79b75deea3804d91f87f9c2180ff704f0cd1988469ee72655fbe15
+size 7233625688

pi05_paligemma_libero_10_full_finetune_bs64/config.json CHANGED Viewed

@@ -225,14 +225,12 @@
     "enable_mixed_precision_training": true,
     "learning_rate": 5e-05,
     "lr_scheduler_type": "linear-warmup+cosine-decay",
-    "max_epochs": 18,
     "max_grad_norm": 1.0,
-    "max_keep_ckpts": 1,
-    "max_steps": null,
     "metric": {
       "active_trackers": [
         "jsonl",
-        "wandb"
       ],
       "grad_accumulation_steps": 1,
       "run_dir": "work_dirs",
@@ -241,6 +239,7 @@
     },
     "mixed_precision_dtype": "bf16",
     "sampler": null,
     "tokenizer": {
       "type": "PaligemmaTokenizer"
     },

     "enable_mixed_precision_training": true,
     "learning_rate": 5e-05,
     "lr_scheduler_type": "linear-warmup+cosine-decay",
+    "max_epochs": 24,
     "max_grad_norm": 1.0,
     "metric": {
       "active_trackers": [
         "jsonl",
+        "tensorboard"
       ],
       "grad_accumulation_steps": 1,
       "run_dir": "work_dirs",
     },
     "mixed_precision_dtype": "bf16",
     "sampler": null,
+    "sharding_strategy": "no-shard",
     "tokenizer": {
       "type": "PaligemmaTokenizer"
     },

pi05_paligemma_libero_10_full_finetune_bs64/config.yaml CHANGED Viewed

@@ -181,20 +181,19 @@ runner:
   enable_mixed_precision_training: true
   learning_rate: 5.0e-05
   lr_scheduler_type: linear-warmup+cosine-decay
-  max_epochs: 18
   max_grad_norm: 1.0
-  max_keep_ckpts: 1
-  max_steps: null
   metric:
     active_trackers:
     - jsonl
-    - wandb
     grad_accumulation_steps: 1
     run_dir: work_dirs
     type: VLAMetric
     window_size: 1
   mixed_precision_dtype: bf16
   sampler: null
   tokenizer:
     type: PaligemmaTokenizer
   type: FSDPTrainRunner

   enable_mixed_precision_training: true
   learning_rate: 5.0e-05
   lr_scheduler_type: linear-warmup+cosine-decay
+  max_epochs: 24
   max_grad_norm: 1.0
   metric:
     active_trackers:
     - jsonl
+    - tensorboard
     grad_accumulation_steps: 1
     run_dir: work_dirs
     type: VLAMetric
     window_size: 1
   mixed_precision_dtype: bf16
   sampler: null
+  sharding_strategy: no-shard
   tokenizer:
     type: PaligemmaTokenizer
   type: FSDPTrainRunner

pi05_paligemma_libero_10_full_finetune_bs64/pi05_paligemma_libero_10_full_finetune_2026_05_15_09_15_10.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d6b55f3eac24c09f0b32008eb10d130f9769ce38528f7b110d62789bbd20826e
+size 10747290

pi05_paligemma_libero_10_full_finetune_bs64/run-metrics.jsonl CHANGED Viewed

@@ -1 +1 @@

- {"hparams": "{'model': {'type': 'PI05FlowMatching', 'llm_backbone': {'type': 'ConditionGemmaModel', 'adarms_cond_dim': None, 'attention_bias': False, 'attention_dropout': 0.0, 'bos_token_id': 2, 'eos_token_id': 1, 'head_dim': 256, 'hidden_act': 'gelu_pytorch_tanh', 'hidden_activation': 'gelu_pytorch_tanh', 'hidden_size': 2048, 'initializer_range': 0.02, 'intermediate_size': 16384, 'max_position_embeddings': 8192, 'model_type': 'gemma', 'num_attention_heads': 8, 'num_hidden_layers': 18, 'num_key_value_heads': 1, 'rms_norm_eps': 1e-06, 'rope_theta': 10000.0, 'torch_dtype': 'float32', 'use_cache': True, 'vocab_size': 257152}, 'vision_backbone': {'type': 'SigLIPViTBackbone', 'vision_backbone_id': 'siglip_224', 'vision_config': {'attention_dropout': 0.0, 'hidden_act': 'gelu_pytorch_tanh', 'hidden_size': 1152, 'image_size': 224, 'intermediate_size': 4304, 'layer_norm_eps': 1e-06, 'model_type': 'siglip_vision_model', 'num_attention_heads': 16, 'num_channels': 3, 'num_hidden_layers': 27, 'patch_size': 14, 'projection_dim': 2048, 'projector_hidden_act': 'gelu_fast', 'torch_dtype': 'float32', 'vision_use_head': False}}, 'projector': {'type': 'LinearProjector', 'in_dim': 1152, 'out_dim': 2048}, 'proj_width': 1024, 'n_action_steps': 10, 'action_in_proj': {'type': 'LinearProjector', 'in_dim': 32, 'out_dim': 1024}, 'action_out_proj': {'type': 'LinearProjector', 'in_dim': 1024, 'out_dim': 32}, 'time_mlp_in': {'type': 'LinearProjector', 'in_dim': 1024, 'out_dim': 1024}, 'time_mlp_out': {'type': 'LinearProjector', 'in_dim': 1024, 'out_dim': 1024}, 'max_action_dim': 32, 'llm_expert': {'type': 'ConditionGemmaModel', 'attention_bias': False, 'adarms_cond_dim': 1024, 'attention_dropout': 0.0, 'bos_token_id': 2, 'eos_token_id': 1, 'head_dim': 256, 'hidden_act': 'gelu_pytorch_tanh', 'hidden_activation': 'gelu_pytorch_tanh', 'hidden_size': 1024, 'initializer_range': 0.02, 'intermediate_size': 4096, 'max_position_embeddings': 8192, 'model_type': 'gemma', 'num_attention_heads': 8, 'num_hidden_layers': 18, 'num_key_value_heads': 1, 'pad_token_id': 0, 'rms_norm_eps': 1e-06, 'rope_theta': 10000.0, 'torch_dtype': 'float32', 'transformers_version': '4.48.1', 'use_adarms': True, 'use_cache': True, 'vocab_size': 257152}, 'freeze_llm_backbone': False, 'freeze_vision_backbone': False, 'pretrained_name_or_path': './checkpoints/pi05_libero/model.safetensors', 'name_mapping': {'llm_backbone': 'paligemma_with_expert.paligemma.model.language_model', 'vision_backbone.vision': 'paligemma_with_expert.paligemma.model.vision_tower', 'projector.projector': 'paligemma_with_expert.paligemma.model.multi_modal_projector.linear', 'llm_expert': 'paligemma_with_expert.gemma_expert.model', 'time_mlp_in.projector': 'time_mlp_in', 'time_mlp_out.projector': 'time_mlp_out', 'action_in_proj.projector': 'action_in_proj', 'action_out_proj.projector': 'action_out_proj', 'llm_backbone.embed_tokens': 'paligemma_with_expert.paligemma.lm_head'}}, 'train_dataloader': {'per_device_batch_size': 8, 'per_device_num_workers': 4, 'dataset': {'type': 'DistributedRepeatingDataset', 'name_mappings': {'observation.state': ['proprio'], 'action': ['action']}, 'statistic_keys': ['observation.state', 'timestamp', 'action'], 'statistic_name': 'libero_10_no_noops', 'datasets': {'type': 'ParquetDataset', 'data_root_path': './datasets/libero_10_no_noops_lerobotv2.1', 'transforms': [{'type': 'ProcessParquetInputs', 'parquet_keys': ['observation.state', 'timestamp', 'actions', 'info', 'stats', 'action_masks'], 'video_keys': ['observation.images.image', 'observation.images.wrist_image'], 'name_mappings': {'observation.state': ['states'], 'actions': ['actions']}}, {'type': 'ParquetPrompter', 'use_conversation': False}, {'type': 'ProcessPrompts', 'tokenizer': {'type': 'PaligemmaTokenizer'}}, {'type': 'ResizeImages', 'height': 224, 'width': 224}, {'type': 'NormalizeImages', 'means': [[123.515625, 116.04492188, 103.59375], [123.515625, 116.04492188, 103.59375]], 'stds': [[58.27148438, 57.02636719, 57.27539062], [58.27148438, 57.02636719, 57.27539062]]}, {'type': 'NormalizeStatesAndActions', 'action_dim': 32, 'state_dim': 32, 'state_key': 'proprio', 'action_key': 'action', 'norm_type': 'mean_std'}], 'action_window_size': 10, 'action_key': 'action', 'use_delta': False, 'statistic_name': 'libero_10_no_noops', 'window_start_idx': 0}}}, 'runner': {'type': 'FSDPTrainRunner', 'max_epochs': 18, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'collator': {'type': 'DictCollator', 'keys': ['states', 'observation.eepose', 'timestamp', 'images', 'img_masks', 'lang_tokens', 'lang_masks', 'actions', 'action_masks'], 'meta_keys': ['task_description', 'prompt', 'info', 'stats']}, 'sampler': None, 'tokenizer': {'type': 'PaligemmaTokenizer'}, 'metric': {'type': 'VLAMetric', 'active_trackers': ('jsonl', 'wandb'), 'run_dir': './work_dirs/pi05_paligemma_libero_10_full_finetune_bs64', 'grad_accumulation_steps': 1, 'window_size': 1, 'hparams': Config (path: configs/pi05/pi05_paligemma_libero_10_full_finetune.py): {'model': {'type': 'PI05FlowMatching', 'llm_backbone': {'type': 'ConditionGemmaModel', 'adarms_cond_dim': None, 'attention_bias': False, 'attention_dropout': 0.0, 'bos_token_id': 2, 'eos_token_id': 1, 'head_dim': 256, 'hidden_act': 'gelu_pytorch_tanh', 'hidden_activation': 'gelu_pytorch_tanh', 'hidden_size': 2048, 'initializer_range': 0.02, 'intermediate_size': 16384, 'max_position_embeddings': 8192, 'model_type': 'gemma', 'num_attention_heads': 8, 'num_hidden_layers': 18, 'num_key_value_heads': 1, 'rms_norm_eps': 1e-06, 'rope_theta': 10000.0, 'torch_dtype': 'float32', 'use_cache': True, 'vocab_size': 257152}, 'vision_backbone': {'type': 'SigLIPViTBackbone', 'vision_backbone_id': 'siglip_224', 'vision_config': {'attention_dropout': 0.0, 'hidden_act': 'gelu_pytorch_tanh', 'hidden_size': 1152, 'image_size': 224, 'intermediate_size': 4304, 'layer_norm_eps': 1e-06, 'model_type': 'siglip_vision_model', 'num_attention_heads': 16, 'num_channels': 3, 'num_hidden_layers': 27, 'patch_size': 14, 'projection_dim': 2048, 'projector_hidden_act': 'gelu_fast', 'torch_dtype': 'float32', 'vision_use_head': False}}, 'projector': {'type': 'LinearProjector', 'in_dim': 1152, 'out_dim': 2048}, 'proj_width': 1024, 'n_action_steps': 10, 'action_in_proj': {'type': 'LinearProjector', 'in_dim': 32, 'out_dim': 1024}, 'action_out_proj': {'type': 'LinearProjector', 'in_dim': 1024, 'out_dim': 32}, 'time_mlp_in': {'type': 'LinearProjector', 'in_dim': 1024, 'out_dim': 1024}, 'time_mlp_out': {'type': 'LinearProjector', 'in_dim': 1024, 'out_dim': 1024}, 'max_action_dim': 32, 'llm_expert': {'type': 'ConditionGemmaModel', 'attention_bias': False, 'adarms_cond_dim': 1024, 'attention_dropout': 0.0, 'bos_token_id': 2, 'eos_token_id': 1, 'head_dim': 256, 'hidden_act': 'gelu_pytorch_tanh', 'hidden_activation': 'gelu_pytorch_tanh', 'hidden_size': 1024, 'initializer_range': 0.02, 'intermediate_size': 4096, 'max_position_embeddings': 8192, 'model_type': 'gemma', 'num_attention_heads': 8, 'num_hidden_layers': 18, 'num_key_value_heads': 1, 'pad_token_id': 0, 'rms_norm_eps': 1e-06, 'rope_theta': 10000.0, 'torch_dtype': 'float32', 'transformers_version': '4.48.1', 'use_adarms': True, 'use_cache': True, 'vocab_size': 257152}, 'freeze_llm_backbone': False, 'freeze_vision_backbone': False, 'pretrained_name_or_path': './checkpoints/pi05_libero/model.safetensors', 'name_mapping': {'llm_backbone': 'paligemma_with_expert.paligemma.model.language_model', 'vision_backbone.vision': 'paligemma_with_expert.paligemma.model.vision_tower', 'projector.projector': 'paligemma_with_expert.paligemma.model.multi_modal_projector.linear', 'llm_expert': 'paligemma_with_expert.gemma_expert.model', 'time_mlp_in.projector': 'time_mlp_in', 'time_mlp_out.projector': 'time_mlp_out', 'action_in_proj.projector': 'action_in_proj', 'action_out_proj.projector': 'action_out_proj', 'llm_backbone.embed_tokens': 'paligemma_with_expert.paligemma.lm_head'}}, 'train_dataloader': {'per_device_batch_size': 8, 'per_device_num_workers': 4, 'dataset': {'type': 'DistributedRepeatingDataset', 'name_mappings': {'observation.state': ['proprio'], 'action': ['action']}, 'statistic_keys': ['observation.state', 'timestamp', 'action'], 'statistic_name': 'libero_10_no_noops', 'datasets': {'type': 'ParquetDataset', 'data_root_path': './datasets/libero_10_no_noops_lerobotv2.1', 'transforms': [{'type': 'ProcessParquetInputs', 'parquet_keys': ['observation.state', 'timestamp', 'actions', 'info', 'stats', 'action_masks'], 'video_keys': ['observation.images.image', 'observation.images.wrist_image'], 'name_mappings': {'observation.state': ['states'], 'actions': ['actions']}}, {'type': 'ParquetPrompter', 'use_conversation': False}, {'type': 'ProcessPrompts', 'tokenizer': {'type': 'PaligemmaTokenizer'}}, {'type': 'ResizeImages', 'height': 224, 'width': 224}, {'type': 'NormalizeImages', 'means': [[123.515625, 116.04492188, 103.59375], [123.515625, 116.04492188, 103.59375]], 'stds': [[58.27148438, 57.02636719, 57.27539062], [58.27148438, 57.02636719, 57.27539062]]}, {'type': 'NormalizeStatesAndActions', 'action_dim': 32, 'state_dim': 32, 'state_key': 'proprio', 'action_key': 'action', 'norm_type': 'mean_std'}], 'action_window_size': 10, 'action_key': 'action', 'use_delta': False, 'statistic_name': 'libero_10_no_noops', 'window_start_idx': 0}}}, 'runner': {'type': 'FSDPTrainRunner', 'max_epochs': 18, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'collator': {'type': 'DictCollator', 'keys': ['states', 'observation.eepose', 'timestamp', 'images', 'img_masks', 'lang_tokens', 'lang_masks', 'actions', 'action_masks'], 'meta_keys': ['task_description', 'prompt', 'info', 'stats']}, 'sampler': None, 'tokenizer': {'type': 'PaligemmaTokenizer'}, 'metric': {'type': 'VLAMetric', 'active_trackers': ('jsonl', 'wandb'), 'run_dir': './work_dirs/pi05_paligemma_libero_10_full_finetune_bs64', 'grad_accumulation_steps': 1, 'window_size': 1, 'hparams': Config (path: configs/pi05/pi05_paligemma_libero_10_full_finetune.py): {...}, 'run_id': 'pi05_paligemma_libero_10_full_finetune_2026_03_12_06_33_02'}, 'lr_scheduler_type': 'linear-warmup+cosine-decay', 'warmup_ratio': 0.03, 'enable_gradient_checkpointing': True, 'enable_mixed_precision_training': True, 'mixed_precision_dtype': 'bf16', 'change_key_name': False, 'max_steps': None, 'max_keep_ckpts': 1, 'cfg': Config (path: configs/pi05/pi05_paligemma_libero_10_full_finetune.py): {...}, 'args': Namespace(config='configs/pi05/pi05_paligemma_libero_10_full_finetune.py', work_dir='./work_dirs/pi05_paligemma_libero_10_full_finetune_bs64', cfg_options={'train_dataloader.per_device_batch_size': 8, 'runner.max_epochs': 18, 'runner.max_steps': None, 'runner.max_keep_ckpts': 1}, eval_after_train=True, resume_from=None)}, 'eval': {'type': 'LiberoEvalRunner', 'task_suite_name': 'libero_10', 'model_family': 'pi0', 'eval_chunk_size': 10, 'resize_size': 224, 'num_trials_per_task': 50, 'num_steps_wait': 10, 'seed': 7, 'dataset': {'type': 'LiberoParquetEvalDataset', 'transforms': [{'type': 'ProcessLiberoEvalInputs', 'img_keys': ['agentview_image', 'robot0_eye_in_hand_image']}, {'type': 'TransformImage', 'image_resize_strategy': 'resize-naive', 'input_sizes': [[3, 224, 224], [3, 224, 224]], 'means': [[123.515625, 116.04492188, 103.59375], [123.515625, 116.04492188, 103.59375]], 'stds': [[58.27148438, 57.02636719, 57.27539062], [58.27148438, 57.02636719, 57.27539062]]}, {'type': 'LiberoPromptFromInputs', 'use_conversation': False, 'tokenizer': {'type': 'PaligemmaTokenizer'}}, {'type': 'LiberoProprioFromInputs', 'norm_type': 'mean_std', 'pos_key': 'robot0_eef_pos', 'quat_key': 'robot0_eef_quat', 'gripper_key': 'robot0_gripper_qpos', 'state_dim': 32, 'out_key': 'states'}]}, 'denormalize_action': {'type': 'DenormalizeLiberoAction', 'norm_type': 'mean_std', 'action_dim': 7}}}, 'run_id': 'pi05_paligemma_libero_10_full_finetune_2026_03_12_06_33_02'}, 'lr_scheduler_type': 'linear-warmup+cosine-decay', 'warmup_ratio': 0.03, 'enable_gradient_checkpointing': True, 'enable_mixed_precision_training': True, 'mixed_precision_dtype': 'bf16', 'change_key_name': False, 'max_steps': None, 'max_keep_ckpts': 1, 'cfg': Config (path: configs/pi05/pi05_paligemma_libero_10_full_finetune.py): {'model': {'type': 'PI05FlowMatching', 'llm_backbone': {'type': 'ConditionGemmaModel', 'adarms_cond_dim': None, 'attention_bias': False, 'attention_dropout': 0.0, 'bos_token_id': 2, 'eos_token_id': 1, 'head_dim': 256, 'hidden_act': 'gelu_pytorch_tanh', 'hidden_activation': 'gelu_pytorch_tanh', 'hidden_size': 2048, 'initializer_range': 0.02, 'intermediate_size': 16384, 'max_position_embeddings': 8192, 'model_type': 'gemma', 'num_attention_heads': 8, 'num_hidden_layers': 18, 'num_key_value_heads': 1, 'rms_norm_eps': 1e-06, 'rope_theta': 10000.0, 'torch_dtype': 'float32', 'use_cache': True, 'vocab_size': 257152}, 'vision_backbone': {'type': 'SigLIPViTBackbone', 'vision_backbone_id': 'siglip_224', 'vision_config': {'attention_dropout': 0.0, 'hidden_act': 'gelu_pytorch_tanh', 'hidden_size': 1152, 'image_size': 224, 'intermediate_size': 4304, 'layer_norm_eps': 1e-06, 'model_type': 'siglip_vision_model', 'num_attention_heads': 16, 'num_channels': 3, 'num_hidden_layers': 27, 'patch_size': 14, 'projection_dim': 2048, 'projector_hidden_act': 'gelu_fast', 'torch_dtype': 'float32', 'vision_use_head': False}}, 'projector': {'type': 'LinearProjector', 'in_dim': 1152, 'out_dim': 2048}, 'proj_width': 1024, 'n_action_steps': 10, 'action_in_proj': {'type': 'LinearProjector', 'in_dim': 32, 'out_dim': 1024}, 'action_out_proj': {'type': 'LinearProjector', 'in_dim': 1024, 'out_dim': 32}, 'time_mlp_in': {'type': 'LinearProjector', 'in_dim': 1024, 'out_dim': 1024}, 'time_mlp_out': {'type': 'LinearProjector', 'in_dim': 1024, 'out_dim': 1024}, 'max_action_dim': 32, 'llm_expert': {'type': 'ConditionGemmaModel', 'attention_bias': False, 'adarms_cond_dim': 1024, 'attention_dropout': 0.0, 'bos_token_id': 2, 'eos_token_id': 1, 'head_dim': 256, 'hidden_act': 'gelu_pytorch_tanh', 'hidden_activation': 'gelu_pytorch_tanh', 'hidden_size': 1024, 'initializer_range': 0.02, 'intermediate_size': 4096, 'max_position_embeddings': 8192, 'model_type': 'gemma', 'num_attention_heads': 8, 'num_hidden_layers': 18, 'num_key_value_heads': 1, 'pad_token_id': 0, 'rms_norm_eps': 1e-06, 'rope_theta': 10000.0, 'torch_dtype': 'float32', 'transformers_version': '4.48.1', 'use_adarms': True, 'use_cache': True, 'vocab_size': 257152}, 'freeze_llm_backbone': False, 'freeze_vision_backbone': False, 'pretrained_name_or_path': './checkpoints/pi05_libero/model.safetensors', 'name_mapping': {'llm_backbone': 'paligemma_with_expert.paligemma.model.language_model', 'vision_backbone.vision': 'paligemma_with_expert.paligemma.model.vision_tower', 'projector.projector': 'paligemma_with_expert.paligemma.model.multi_modal_projector.linear', 'llm_expert': 'paligemma_with_expert.gemma_expert.model', 'time_mlp_in.projector': 'time_mlp_in', 'time_mlp_out.projector': 'time_mlp_out', 'action_in_proj.projector': 'action_in_proj', 'action_out_proj.projector': 'action_out_proj', 'llm_backbone.embed_tokens': 'paligemma_with_expert.paligemma.lm_head'}}, 'train_dataloader': {'per_device_batch_size': 8, 'per_device_num_workers': 4, 'dataset': {'type': 'DistributedRepeatingDataset', 'name_mappings': {'observation.state': ['proprio'], 'action': ['action']}, 'statistic_keys': ['observation.state', 'timestamp', 'action'], 'statistic_name': 'libero_10_no_noops', 'datasets': {'type': 'ParquetDataset', 'data_root_path': './datasets/libero_10_no_noops_lerobotv2.1', 'transforms': [{'type': 'ProcessParquetInputs', 'parquet_keys': ['observation.state', 'timestamp', 'actions', 'info', 'stats', 'action_masks'], 'video_keys': ['observation.images.image', 'observation.images.wrist_image'], 'name_mappings': {'observation.state': ['states'], 'actions': ['actions']}}, {'type': 'ParquetPrompter', 'use_conversation': False}, {'type': 'ProcessPrompts', 'tokenizer': {'type': 'PaligemmaTokenizer'}}, {'type': 'ResizeImages', 'height': 224, 'width': 224}, {'type': 'NormalizeImages', 'means': [[123.515625, 116.04492188, 103.59375], [123.515625, 116.04492188, 103.59375]], 'stds': [[58.27148438, 57.02636719, 57.27539062], [58.27148438, 57.02636719, 57.27539062]]}, {'type': 'NormalizeStatesAndActions', 'action_dim': 32, 'state_dim': 32, 'state_key': 'proprio', 'action_key': 'action', 'norm_type': 'mean_std'}], 'action_window_size': 10, 'action_key': 'action', 'use_delta': False, 'statistic_name': 'libero_10_no_noops', 'window_start_idx': 0}}}, 'runner': {'type': 'FSDPTrainRunner', 'max_epochs': 18, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'collator': {'type': 'DictCollator', 'keys': ['states', 'observation.eepose', 'timestamp', 'images', 'img_masks', 'lang_tokens', 'lang_masks', 'actions', 'action_masks'], 'meta_keys': ['task_description', 'prompt', 'info', 'stats']}, 'sampler': None, 'tokenizer': {'type': 'PaligemmaTokenizer'}, 'metric': {'type': 'VLAMetric', 'active_trackers': ('jsonl', 'wandb'), 'run_dir': './work_dirs/pi05_paligemma_libero_10_full_finetune_bs64', 'grad_accumulation_steps': 1, 'window_size': 1, 'hparams': Config (path: configs/pi05/pi05_paligemma_libero_10_full_finetune.py): {...}, 'run_id': 'pi05_paligemma_libero_10_full_finetune_2026_03_12_06_33_02'}, 'lr_scheduler_type': 'linear-warmup+cosine-decay', 'warmup_ratio': 0.03, 'enable_gradient_checkpointing': True, 'enable_mixed_precision_training': True, 'mixed_precision_dtype': 'bf16', 'change_key_name': False, 'max_steps': None, 'max_keep_ckpts': 1, 'cfg': Config (path: configs/pi05/pi05_paligemma_libero_10_full_finetune.py): {...}, 'args': Namespace(config='configs/pi05/pi05_paligemma_libero_10_full_finetune.py', work_dir='./work_dirs/pi05_paligemma_libero_10_full_finetune_bs64', cfg_options={'train_dataloader.per_device_batch_size': 8, 'runner.max_epochs': 18, 'runner.max_steps': None, 'runner.max_keep_ckpts': 1}, eval_after_train=True, resume_from=None)}, 'eval': {'type': 'LiberoEvalRunner', 'task_suite_name': 'libero_10', 'model_family': 'pi0', 'eval_chunk_size': 10, 'resize_size': 224, 'num_trials_per_task': 50, 'num_steps_wait': 10, 'seed': 7, 'dataset': {'type': 'LiberoParquetEvalDataset', 'transforms': [{'type': 'ProcessLiberoEvalInputs', 'img_keys': ['agentview_image', 'robot0_eye_in_hand_image']}, {'type': 'TransformImage', 'image_resize_strategy': 'resize-naive', 'input_sizes': [[3, 224, 224], [3, 224, 224]], 'means': [[123.515625, 116.04492188, 103.59375], [123.515625, 116.04492188, 103.59375]], 'stds': [[58.27148438, 57.02636719, 57.27539062], [58.27148438, 57.02636719, 57.27539062]]}, {'type': 'LiberoPromptFromInputs', 'use_conversation': False, 'tokenizer': {'type': 'PaligemmaTokenizer'}}, {'type': 'LiberoProprioFromInputs', 'norm_type': 'mean_std', 'pos_key': 'robot0_eef_pos', 'quat_key': 'robot0_eef_quat', 'gripper_key': 'robot0_gripper_qpos', 'state_dim': 32, 'out_key': 'states'}]}, 'denormalize_action': {'type': 'DenormalizeLiberoAction', 'norm_type': 'mean_std', 'action_dim': 7}}}, 'args': Namespace(config='configs/pi05/pi05_paligemma_libero_10_full_finetune.py', work_dir='./work_dirs/pi05_paligemma_libero_10_full_finetune_bs64', cfg_options={'train_dataloader.per_device_batch_size': 8, 'runner.max_epochs': 18, 'runner.max_steps': None, 'runner.max_keep_ckpts': 1}, eval_after_train=True, resume_from=None)}, 'eval': {'type': 'LiberoEvalRunner', 'task_suite_name': 'libero_10', 'model_family': 'pi0', 'eval_chunk_size': 10, 'resize_size': 224, 'num_trials_per_task': 50, 'num_steps_wait': 10, 'seed': 7, 'dataset': {'type': 'LiberoParquetEvalDataset', 'transforms': [{'type': 'ProcessLiberoEvalInputs', 'img_keys': ['agentview_image', 'robot0_eye_in_hand_image']}, {'type': 'TransformImage', 'image_resize_strategy': 'resize-naive', 'input_sizes': [[3, 224, 224], [3, 224, 224]], 'means': [[123.515625, 116.04492188, 103.59375], [123.515625, 116.04492188, 103.59375]], 'stds': [[58.27148438, 57.02636719, 57.27539062], [58.27148438, 57.02636719, 57.27539062]]}, {'type': 'LiberoPromptFromInputs', 'use_conversation': False, 'tokenizer': {'type': 'PaligemmaTokenizer'}}, {'type': 'LiberoProprioFromInputs', 'norm_type': 'mean_std', 'pos_key': 'robot0_eef_pos', 'quat_key': 'robot0_eef_quat', 'gripper_key': 'robot0_gripper_qpos', 'state_dim': 32, 'out_key': 'states'}]}, 'denormalize_action': {'type': 'DenormalizeLiberoAction', 'norm_type': 'mean_std', 'action_dim': 7}}}", "run_id": "pi05_paligemma_libero_10_full_finetune_2026_03_12_06_33_02"}

+ {"hparams": "{'model': {'type': 'PI05FlowMatching', 'llm_backbone': {'type': 'ConditionGemmaModel', 'adarms_cond_dim': None, 'attention_bias': False, 'attention_dropout': 0.0, 'bos_token_id': 2, 'eos_token_id': 1, 'head_dim': 256, 'hidden_act': 'gelu_pytorch_tanh', 'hidden_activation': 'gelu_pytorch_tanh', 'hidden_size': 2048, 'initializer_range': 0.02, 'intermediate_size': 16384, 'max_position_embeddings': 8192, 'model_type': 'gemma', 'num_attention_heads': 8, 'num_hidden_layers': 18, 'num_key_value_heads': 1, 'rms_norm_eps': 1e-06, 'rope_theta': 10000.0, 'torch_dtype': 'float32', 'use_cache': True, 'vocab_size': 257152}, 'vision_backbone': {'type': 'SigLIPViTBackbone', 'vision_backbone_id': 'siglip_224', 'vision_config': {'attention_dropout': 0.0, 'hidden_act': 'gelu_pytorch_tanh', 'hidden_size': 1152, 'image_size': 224, 'intermediate_size': 4304, 'layer_norm_eps': 1e-06, 'model_type': 'siglip_vision_model', 'num_attention_heads': 16, 'num_channels': 3, 'num_hidden_layers': 27, 'patch_size': 14, 'projection_dim': 2048, 'projector_hidden_act': 'gelu_fast', 'torch_dtype': 'float32', 'vision_use_head': False}}, 'projector': {'type': 'LinearProjector', 'in_dim': 1152, 'out_dim': 2048}, 'proj_width': 1024, 'n_action_steps': 10, 'action_in_proj': {'type': 'LinearProjector', 'in_dim': 32, 'out_dim': 1024}, 'action_out_proj': {'type': 'LinearProjector', 'in_dim': 1024, 'out_dim': 32}, 'time_mlp_in': {'type': 'LinearProjector', 'in_dim': 1024, 'out_dim': 1024}, 'time_mlp_out': {'type': 'LinearProjector', 'in_dim': 1024, 'out_dim': 1024}, 'max_action_dim': 32, 'llm_expert': {'type': 'ConditionGemmaModel', 'attention_bias': False, 'adarms_cond_dim': 1024, 'attention_dropout': 0.0, 'bos_token_id': 2, 'eos_token_id': 1, 'head_dim': 256, 'hidden_act': 'gelu_pytorch_tanh', 'hidden_activation': 'gelu_pytorch_tanh', 'hidden_size': 1024, 'initializer_range': 0.02, 'intermediate_size': 4096, 'max_position_embeddings': 8192, 'model_type': 'gemma', 'num_attention_heads': 8, 'num_hidden_layers': 18, 'num_key_value_heads': 1, 'pad_token_id': 0, 'rms_norm_eps': 1e-06, 'rope_theta': 10000.0, 'torch_dtype': 'float32', 'transformers_version': '4.48.1', 'use_adarms': True, 'use_cache': True, 'vocab_size': 257152}, 'freeze_llm_backbone': False, 'freeze_vision_backbone': False, 'pretrained_name_or_path': './checkpoints/pi05_libero/model.safetensors', 'name_mapping': {'llm_backbone': 'paligemma_with_expert.paligemma.model.language_model', 'vision_backbone.vision': 'paligemma_with_expert.paligemma.model.vision_tower', 'projector.projector': 'paligemma_with_expert.paligemma.model.multi_modal_projector.linear', 'llm_expert': 'paligemma_with_expert.gemma_expert.model', 'time_mlp_in.projector': 'time_mlp_in', 'time_mlp_out.projector': 'time_mlp_out', 'action_in_proj.projector': 'action_in_proj', 'action_out_proj.projector': 'action_out_proj', 'llm_backbone.embed_tokens': 'paligemma_with_expert.paligemma.lm_head'}}, 'train_dataloader': {'per_device_batch_size': 8, 'per_device_num_workers': 4, 'dataset': {'type': 'DistributedRepeatingDataset', 'name_mappings': {'observation.state': ['proprio'], 'action': ['action']}, 'statistic_keys': ['observation.state', 'timestamp', 'action'], 'statistic_name': 'libero_10_no_noops', 'datasets': {'type': 'ParquetDataset', 'data_root_path': './datasets/libero_10_no_noops_lerobotv2.1', 'transforms': [{'type': 'ProcessParquetInputs', 'parquet_keys': ['observation.state', 'timestamp', 'actions', 'info', 'stats', 'action_masks'], 'video_keys': ['observation.images.image', 'observation.images.wrist_image'], 'name_mappings': {'observation.state': ['states'], 'actions': ['actions']}}, {'type': 'ParquetPrompter', 'use_conversation': False}, {'type': 'ProcessPrompts', 'tokenizer': {'type': 'PaligemmaTokenizer'}}, {'type': 'ResizeImages', 'height': 224, 'width': 224}, {'type': 'NormalizeImages', 'means': [[123.515625, 116.04492188, 103.59375], [123.515625, 116.04492188, 103.59375]], 'stds': [[58.27148438, 57.02636719, 57.27539062], [58.27148438, 57.02636719, 57.27539062]]}, {'type': 'NormalizeStatesAndActions', 'action_dim': 32, 'state_dim': 32, 'state_key': 'proprio', 'action_key': 'action', 'norm_type': 'mean_std'}], 'action_window_size': 10, 'action_key': 'action', 'use_delta': False, 'statistic_name': 'libero_10_no_noops', 'window_start_idx': 0}}}, 'runner': {'type': 'FSDPTrainRunner', 'max_epochs': 24, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'sharding_strategy': 'no-shard', 'collator': {'type': 'DictCollator', 'keys': ['states', 'observation.eepose', 'timestamp', 'images', 'img_masks', 'lang_tokens', 'lang_masks', 'actions', 'action_masks'], 'meta_keys': ['task_description', 'prompt', 'info', 'stats']}, 'sampler': None, 'tokenizer': {'type': 'PaligemmaTokenizer'}, 'metric': {'type': 'VLAMetric', 'active_trackers': ['jsonl', 'tensorboard'], 'run_dir': '/limx/tos/users/jikun/wk_dir/pi05/libero_10', 'grad_accumulation_steps': 1, 'window_size': 1, 'hparams': Config (path: configs/pi05/pi05_paligemma_libero_10_full_finetune.py): {'model': {'type': 'PI05FlowMatching', 'llm_backbone': {'type': 'ConditionGemmaModel', 'adarms_cond_dim': None, 'attention_bias': False, 'attention_dropout': 0.0, 'bos_token_id': 2, 'eos_token_id': 1, 'head_dim': 256, 'hidden_act': 'gelu_pytorch_tanh', 'hidden_activation': 'gelu_pytorch_tanh', 'hidden_size': 2048, 'initializer_range': 0.02, 'intermediate_size': 16384, 'max_position_embeddings': 8192, 'model_type': 'gemma', 'num_attention_heads': 8, 'num_hidden_layers': 18, 'num_key_value_heads': 1, 'rms_norm_eps': 1e-06, 'rope_theta': 10000.0, 'torch_dtype': 'float32', 'use_cache': True, 'vocab_size': 257152}, 'vision_backbone': {'type': 'SigLIPViTBackbone', 'vision_backbone_id': 'siglip_224', 'vision_config': {'attention_dropout': 0.0, 'hidden_act': 'gelu_pytorch_tanh', 'hidden_size': 1152, 'image_size': 224, 'intermediate_size': 4304, 'layer_norm_eps': 1e-06, 'model_type': 'siglip_vision_model', 'num_attention_heads': 16, 'num_channels': 3, 'num_hidden_layers': 27, 'patch_size': 14, 'projection_dim': 2048, 'projector_hidden_act': 'gelu_fast', 'torch_dtype': 'float32', 'vision_use_head': False}}, 'projector': {'type': 'LinearProjector', 'in_dim': 1152, 'out_dim': 2048}, 'proj_width': 1024, 'n_action_steps': 10, 'action_in_proj': {'type': 'LinearProjector', 'in_dim': 32, 'out_dim': 1024}, 'action_out_proj': {'type': 'LinearProjector', 'in_dim': 1024, 'out_dim': 32}, 'time_mlp_in': {'type': 'LinearProjector', 'in_dim': 1024, 'out_dim': 1024}, 'time_mlp_out': {'type': 'LinearProjector', 'in_dim': 1024, 'out_dim': 1024}, 'max_action_dim': 32, 'llm_expert': {'type': 'ConditionGemmaModel', 'attention_bias': False, 'adarms_cond_dim': 1024, 'attention_dropout': 0.0, 'bos_token_id': 2, 'eos_token_id': 1, 'head_dim': 256, 'hidden_act': 'gelu_pytorch_tanh', 'hidden_activation': 'gelu_pytorch_tanh', 'hidden_size': 1024, 'initializer_range': 0.02, 'intermediate_size': 4096, 'max_position_embeddings': 8192, 'model_type': 'gemma', 'num_attention_heads': 8, 'num_hidden_layers': 18, 'num_key_value_heads': 1, 'pad_token_id': 0, 'rms_norm_eps': 1e-06, 'rope_theta': 10000.0, 'torch_dtype': 'float32', 'transformers_version': '4.48.1', 'use_adarms': True, 'use_cache': True, 'vocab_size': 257152}, 'freeze_llm_backbone': False, 'freeze_vision_backbone': False, 'pretrained_name_or_path': './checkpoints/pi05_libero/model.safetensors', 'name_mapping': {'llm_backbone': 'paligemma_with_expert.paligemma.model.language_model', 'vision_backbone.vision': 'paligemma_with_expert.paligemma.model.vision_tower', 'projector.projector': 'paligemma_with_expert.paligemma.model.multi_modal_projector.linear', 'llm_expert': 'paligemma_with_expert.gemma_expert.model', 'time_mlp_in.projector': 'time_mlp_in', 'time_mlp_out.projector': 'time_mlp_out', 'action_in_proj.projector': 'action_in_proj', 'action_out_proj.projector': 'action_out_proj', 'llm_backbone.embed_tokens': 'paligemma_with_expert.paligemma.lm_head'}}, 'train_dataloader': {'per_device_batch_size': 8, 'per_device_num_workers': 4, 'dataset': {'type': 'DistributedRepeatingDataset', 'name_mappings': {'observation.state': ['proprio'], 'action': ['action']}, 'statistic_keys': ['observation.state', 'timestamp', 'action'], 'statistic_name': 'libero_10_no_noops', 'datasets': {'type': 'ParquetDataset', 'data_root_path': './datasets/libero_10_no_noops_lerobotv2.1', 'transforms': [{'type': 'ProcessParquetInputs', 'parquet_keys': ['observation.state', 'timestamp', 'actions', 'info', 'stats', 'action_masks'], 'video_keys': ['observation.images.image', 'observation.images.wrist_image'], 'name_mappings': {'observation.state': ['states'], 'actions': ['actions']}}, {'type': 'ParquetPrompter', 'use_conversation': False}, {'type': 'ProcessPrompts', 'tokenizer': {'type': 'PaligemmaTokenizer'}}, {'type': 'ResizeImages', 'height': 224, 'width': 224}, {'type': 'NormalizeImages', 'means': [[123.515625, 116.04492188, 103.59375], [123.515625, 116.04492188, 103.59375]], 'stds': [[58.27148438, 57.02636719, 57.27539062], [58.27148438, 57.02636719, 57.27539062]]}, {'type': 'NormalizeStatesAndActions', 'action_dim': 32, 'state_dim': 32, 'state_key': 'proprio', 'action_key': 'action', 'norm_type': 'mean_std'}], 'action_window_size': 10, 'action_key': 'action', 'use_delta': False, 'statistic_name': 'libero_10_no_noops', 'window_start_idx': 0}}}, 'runner': {'type': 'FSDPTrainRunner', 'max_epochs': 24, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'sharding_strategy': 'no-shard', 'collator': {'type': 'DictCollator', 'keys': ['states', 'observation.eepose', 'timestamp', 'images', 'img_masks', 'lang_tokens', 'lang_masks', 'actions', 'action_masks'], 'meta_keys': ['task_description', 'prompt', 'info', 'stats']}, 'sampler': None, 'tokenizer': {'type': 'PaligemmaTokenizer'}, 'metric': {'type': 'VLAMetric', 'active_trackers': ['jsonl', 'tensorboard'], 'run_dir': '/limx/tos/users/jikun/wk_dir/pi05/libero_10', 'grad_accumulation_steps': 1, 'window_size': 1, 'hparams': Config (path: configs/pi05/pi05_paligemma_libero_10_full_finetune.py): {...}, 'run_id': 'pi05_paligemma_libero_10_full_finetune_2026_05_15_09_15_10'}, 'lr_scheduler_type': 'linear-warmup+cosine-decay', 'warmup_ratio': 0.03, 'enable_gradient_checkpointing': True, 'enable_mixed_precision_training': True, 'mixed_precision_dtype': 'bf16', 'change_key_name': False, 'cfg': Config (path: configs/pi05/pi05_paligemma_libero_10_full_finetune.py): {...}, 'args': Namespace(config='configs/pi05/pi05_paligemma_libero_10_full_finetune.py', work_dir='/limx/tos/users/jikun/wk_dir/pi05/libero_10', cfg_options={'runner.metric.active_trackers': ['jsonl', 'tensorboard']}, eval_after_train=True, resume_from=None)}, 'eval': {'type': 'LiberoEvalRunner', 'task_suite_name': 'libero_10', 'model_family': 'pi0', 'eval_chunk_size': 10, 'resize_size': 224, 'num_trials_per_task': 50, 'num_steps_wait': 10, 'seed': 7, 'dataset': {'type': 'LiberoParquetEvalDataset', 'transforms': [{'type': 'ProcessLiberoEvalInputs', 'img_keys': ['agentview_image', 'robot0_eye_in_hand_image']}, {'type': 'TransformImage', 'image_resize_strategy': 'resize-naive', 'input_sizes': [[3, 224, 224], [3, 224, 224]], 'means': [[123.515625, 116.04492188, 103.59375], [123.515625, 116.04492188, 103.59375]], 'stds': [[58.27148438, 57.02636719, 57.27539062], [58.27148438, 57.02636719, 57.27539062]]}, {'type': 'LiberoPromptFromInputs', 'use_conversation': False, 'tokenizer': {'type': 'PaligemmaTokenizer'}}, {'type': 'LiberoProprioFromInputs', 'norm_type': 'mean_std', 'pos_key': 'robot0_eef_pos', 'quat_key': 'robot0_eef_quat', 'gripper_key': 'robot0_gripper_qpos', 'state_dim': 32, 'out_key': 'states'}]}, 'denormalize_action': {'type': 'DenormalizeLiberoAction', 'norm_type': 'mean_std', 'action_dim': 7}}}, 'run_id': 'pi05_paligemma_libero_10_full_finetune_2026_05_15_09_15_10'}, 'lr_scheduler_type': 'linear-warmup+cosine-decay', 'warmup_ratio': 0.03, 'enable_gradient_checkpointing': True, 'enable_mixed_precision_training': True, 'mixed_precision_dtype': 'bf16', 'change_key_name': False, 'cfg': Config (path: configs/pi05/pi05_paligemma_libero_10_full_finetune.py): {'model': {'type': 'PI05FlowMatching', 'llm_backbone': {'type': 'ConditionGemmaModel', 'adarms_cond_dim': None, 'attention_bias': False, 'attention_dropout': 0.0, 'bos_token_id': 2, 'eos_token_id': 1, 'head_dim': 256, 'hidden_act': 'gelu_pytorch_tanh', 'hidden_activation': 'gelu_pytorch_tanh', 'hidden_size': 2048, 'initializer_range': 0.02, 'intermediate_size': 16384, 'max_position_embeddings': 8192, 'model_type': 'gemma', 'num_attention_heads': 8, 'num_hidden_layers': 18, 'num_key_value_heads': 1, 'rms_norm_eps': 1e-06, 'rope_theta': 10000.0, 'torch_dtype': 'float32', 'use_cache': True, 'vocab_size': 257152}, 'vision_backbone': {'type': 'SigLIPViTBackbone', 'vision_backbone_id': 'siglip_224', 'vision_config': {'attention_dropout': 0.0, 'hidden_act': 'gelu_pytorch_tanh', 'hidden_size': 1152, 'image_size': 224, 'intermediate_size': 4304, 'layer_norm_eps': 1e-06, 'model_type': 'siglip_vision_model', 'num_attention_heads': 16, 'num_channels': 3, 'num_hidden_layers': 27, 'patch_size': 14, 'projection_dim': 2048, 'projector_hidden_act': 'gelu_fast', 'torch_dtype': 'float32', 'vision_use_head': False}}, 'projector': {'type': 'LinearProjector', 'in_dim': 1152, 'out_dim': 2048}, 'proj_width': 1024, 'n_action_steps': 10, 'action_in_proj': {'type': 'LinearProjector', 'in_dim': 32, 'out_dim': 1024}, 'action_out_proj': {'type': 'LinearProjector', 'in_dim': 1024, 'out_dim': 32}, 'time_mlp_in': {'type': 'LinearProjector', 'in_dim': 1024, 'out_dim': 1024}, 'time_mlp_out': {'type': 'LinearProjector', 'in_dim': 1024, 'out_dim': 1024}, 'max_action_dim': 32, 'llm_expert': {'type': 'ConditionGemmaModel', 'attention_bias': False, 'adarms_cond_dim': 1024, 'attention_dropout': 0.0, 'bos_token_id': 2, 'eos_token_id': 1, 'head_dim': 256, 'hidden_act': 'gelu_pytorch_tanh', 'hidden_activation': 'gelu_pytorch_tanh', 'hidden_size': 1024, 'initializer_range': 0.02, 'intermediate_size': 4096, 'max_position_embeddings': 8192, 'model_type': 'gemma', 'num_attention_heads': 8, 'num_hidden_layers': 18, 'num_key_value_heads': 1, 'pad_token_id': 0, 'rms_norm_eps': 1e-06, 'rope_theta': 10000.0, 'torch_dtype': 'float32', 'transformers_version': '4.48.1', 'use_adarms': True, 'use_cache': True, 'vocab_size': 257152}, 'freeze_llm_backbone': False, 'freeze_vision_backbone': False, 'pretrained_name_or_path': './checkpoints/pi05_libero/model.safetensors', 'name_mapping': {'llm_backbone': 'paligemma_with_expert.paligemma.model.language_model', 'vision_backbone.vision': 'paligemma_with_expert.paligemma.model.vision_tower', 'projector.projector': 'paligemma_with_expert.paligemma.model.multi_modal_projector.linear', 'llm_expert': 'paligemma_with_expert.gemma_expert.model', 'time_mlp_in.projector': 'time_mlp_in', 'time_mlp_out.projector': 'time_mlp_out', 'action_in_proj.projector': 'action_in_proj', 'action_out_proj.projector': 'action_out_proj', 'llm_backbone.embed_tokens': 'paligemma_with_expert.paligemma.lm_head'}}, 'train_dataloader': {'per_device_batch_size': 8, 'per_device_num_workers': 4, 'dataset': {'type': 'DistributedRepeatingDataset', 'name_mappings': {'observation.state': ['proprio'], 'action': ['action']}, 'statistic_keys': ['observation.state', 'timestamp', 'action'], 'statistic_name': 'libero_10_no_noops', 'datasets': {'type': 'ParquetDataset', 'data_root_path': './datasets/libero_10_no_noops_lerobotv2.1', 'transforms': [{'type': 'ProcessParquetInputs', 'parquet_keys': ['observation.state', 'timestamp', 'actions', 'info', 'stats', 'action_masks'], 'video_keys': ['observation.images.image', 'observation.images.wrist_image'], 'name_mappings': {'observation.state': ['states'], 'actions': ['actions']}}, {'type': 'ParquetPrompter', 'use_conversation': False}, {'type': 'ProcessPrompts', 'tokenizer': {'type': 'PaligemmaTokenizer'}}, {'type': 'ResizeImages', 'height': 224, 'width': 224}, {'type': 'NormalizeImages', 'means': [[123.515625, 116.04492188, 103.59375], [123.515625, 116.04492188, 103.59375]], 'stds': [[58.27148438, 57.02636719, 57.27539062], [58.27148438, 57.02636719, 57.27539062]]}, {'type': 'NormalizeStatesAndActions', 'action_dim': 32, 'state_dim': 32, 'state_key': 'proprio', 'action_key': 'action', 'norm_type': 'mean_std'}], 'action_window_size': 10, 'action_key': 'action', 'use_delta': False, 'statistic_name': 'libero_10_no_noops', 'window_start_idx': 0}}}, 'runner': {'type': 'FSDPTrainRunner', 'max_epochs': 24, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'sharding_strategy': 'no-shard', 'collator': {'type': 'DictCollator', 'keys': ['states', 'observation.eepose', 'timestamp', 'images', 'img_masks', 'lang_tokens', 'lang_masks', 'actions', 'action_masks'], 'meta_keys': ['task_description', 'prompt', 'info', 'stats']}, 'sampler': None, 'tokenizer': {'type': 'PaligemmaTokenizer'}, 'metric': {'type': 'VLAMetric', 'active_trackers': ['jsonl', 'tensorboard'], 'run_dir': '/limx/tos/users/jikun/wk_dir/pi05/libero_10', 'grad_accumulation_steps': 1, 'window_size': 1, 'hparams': Config (path: configs/pi05/pi05_paligemma_libero_10_full_finetune.py): {...}, 'run_id': 'pi05_paligemma_libero_10_full_finetune_2026_05_15_09_15_10'}, 'lr_scheduler_type': 'linear-warmup+cosine-decay', 'warmup_ratio': 0.03, 'enable_gradient_checkpointing': True, 'enable_mixed_precision_training': True, 'mixed_precision_dtype': 'bf16', 'change_key_name': False, 'cfg': Config (path: configs/pi05/pi05_paligemma_libero_10_full_finetune.py): {...}, 'args': Namespace(config='configs/pi05/pi05_paligemma_libero_10_full_finetune.py', work_dir='/limx/tos/users/jikun/wk_dir/pi05/libero_10', cfg_options={'runner.metric.active_trackers': ['jsonl', 'tensorboard']}, eval_after_train=True, resume_from=None)}, 'eval': {'type': 'LiberoEvalRunner', 'task_suite_name': 'libero_10', 'model_family': 'pi0', 'eval_chunk_size': 10, 'resize_size': 224, 'num_trials_per_task': 50, 'num_steps_wait': 10, 'seed': 7, 'dataset': {'type': 'LiberoParquetEvalDataset', 'transforms': [{'type': 'ProcessLiberoEvalInputs', 'img_keys': ['agentview_image', 'robot0_eye_in_hand_image']}, {'type': 'TransformImage', 'image_resize_strategy': 'resize-naive', 'input_sizes': [[3, 224, 224], [3, 224, 224]], 'means': [[123.515625, 116.04492188, 103.59375], [123.515625, 116.04492188, 103.59375]], 'stds': [[58.27148438, 57.02636719, 57.27539062], [58.27148438, 57.02636719, 57.27539062]]}, {'type': 'LiberoPromptFromInputs', 'use_conversation': False, 'tokenizer': {'type': 'PaligemmaTokenizer'}}, {'type': 'LiberoProprioFromInputs', 'norm_type': 'mean_std', 'pos_key': 'robot0_eef_pos', 'quat_key': 'robot0_eef_quat', 'gripper_key': 'robot0_gripper_qpos', 'state_dim': 32, 'out_key': 'states'}]}, 'denormalize_action': {'type': 'DenormalizeLiberoAction', 'norm_type': 'mean_std', 'action_dim': 7}}}, 'args': Namespace(config='configs/pi05/pi05_paligemma_libero_10_full_finetune.py', work_dir='/limx/tos/users/jikun/wk_dir/pi05/libero_10', cfg_options={'runner.metric.active_trackers': ['jsonl', 'tensorboard']}, eval_after_train=True, resume_from=None)}, 'eval': {'type': 'LiberoEvalRunner', 'task_suite_name': 'libero_10', 'model_family': 'pi0', 'eval_chunk_size': 10, 'resize_size': 224, 'num_trials_per_task': 50, 'num_steps_wait': 10, 'seed': 7, 'dataset': {'type': 'LiberoParquetEvalDataset', 'transforms': [{'type': 'ProcessLiberoEvalInputs', 'img_keys': ['agentview_image', 'robot0_eye_in_hand_image']}, {'type': 'TransformImage', 'image_resize_strategy': 'resize-naive', 'input_sizes': [[3, 224, 224], [3, 224, 224]], 'means': [[123.515625, 116.04492188, 103.59375], [123.515625, 116.04492188, 103.59375]], 'stds': [[58.27148438, 57.02636719, 57.27539062], [58.27148438, 57.02636719, 57.27539062]]}, {'type': 'LiberoPromptFromInputs', 'use_conversation': False, 'tokenizer': {'type': 'PaligemmaTokenizer'}}, {'type': 'LiberoProprioFromInputs', 'norm_type': 'mean_std', 'pos_key': 'robot0_eef_pos', 'quat_key': 'robot0_eef_quat', 'gripper_key': 'robot0_gripper_qpos', 'state_dim': 32, 'out_key': 'states'}]}, 'denormalize_action': {'type': 'DenormalizeLiberoAction', 'norm_type': 'mean_std', 'action_dim': 7}}}", "run_id": "pi05_paligemma_libero_10_full_finetune_2026_05_15_09_15_10"}

pi05_paligemma_libero_goal_full_finetune_bs64/checkpoints/step-019848-epoch-24-loss=0.0145.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:54ef053d4c29540dd2ec21154716b433c05333173528b3c0d2abd740835d42f9
+size 7233625688

pi05_paligemma_libero_goal_full_finetune_bs64/config.json ADDED Viewed

	@@ -0,0 +1,355 @@

+{
+  "eval": {
+    "dataset": {
+      "transforms": [
+        {
+          "img_keys": [
+            "agentview_image",
+            "robot0_eye_in_hand_image"
+          ],
+          "type": "ProcessLiberoEvalInputs"
+        },
+        {
+          "image_resize_strategy": "resize-naive",
+          "input_sizes": [
+            [
+              3,
+              224,
+              224
+            ],
+            [
+              3,
+              224,
+              224
+            ]
+          ],
+          "means": [
+            [
+              123.515625,
+              116.04492188,
+              103.59375
+            ],
+            [
+              123.515625,
+              116.04492188,
+              103.59375
+            ]
+          ],
+          "stds": [
+            [
+              58.27148438,
+              57.02636719,
+              57.27539062
+            ],
+            [
+              58.27148438,
+              57.02636719,
+              57.27539062
+            ]
+          ],
+          "type": "TransformImage"
+        },
+        {
+          "tokenizer": {
+            "type": "PaligemmaTokenizer"
+          },
+          "type": "LiberoPromptFromInputs",
+          "use_conversation": false
+        },
+        {
+          "gripper_key": "robot0_gripper_qpos",
+          "norm_type": "mean_std",
+          "out_key": "states",
+          "pos_key": "robot0_eef_pos",
+          "quat_key": "robot0_eef_quat",
+          "state_dim": 32,
+          "type": "LiberoProprioFromInputs"
+        }
+      ],
+      "type": "LiberoParquetEvalDataset"
+    },
+    "denormalize_action": {
+      "action_dim": 7,
+      "norm_type": "mean_std",
+      "type": "DenormalizeLiberoAction"
+    },
+    "eval_chunk_size": 10,
+    "model_family": "pi0",
+    "num_steps_wait": 10,
+    "num_trials_per_task": 50,
+    "resize_size": 224,
+    "seed": 7,
+    "task_suite_name": "libero_goal",
+    "type": "LiberoEvalRunner"
+  },
+  "model": {
+    "action_in_proj": {
+      "in_dim": 32,
+      "out_dim": 1024,
+      "type": "LinearProjector"
+    },
+    "action_out_proj": {
+      "in_dim": 1024,
+      "out_dim": 32,
+      "type": "LinearProjector"
+    },
+    "freeze_llm_backbone": false,
+    "freeze_vision_backbone": false,
+    "llm_backbone": {
+      "adarms_cond_dim": null,
+      "attention_bias": false,
+      "attention_dropout": 0.0,
+      "bos_token_id": 2,
+      "eos_token_id": 1,
+      "head_dim": 256,
+      "hidden_act": "gelu_pytorch_tanh",
+      "hidden_activation": "gelu_pytorch_tanh",
+      "hidden_size": 2048,
+      "initializer_range": 0.02,
+      "intermediate_size": 16384,
+      "max_position_embeddings": 8192,
+      "model_type": "gemma",
+      "num_attention_heads": 8,
+      "num_hidden_layers": 18,
+      "num_key_value_heads": 1,
+      "rms_norm_eps": 1e-06,
+      "rope_theta": 10000.0,
+      "torch_dtype": "float32",
+      "type": "ConditionGemmaModel",
+      "use_cache": true,
+      "vocab_size": 257152
+    },
+    "llm_expert": {
+      "adarms_cond_dim": 1024,
+      "attention_bias": false,
+      "attention_dropout": 0.0,
+      "bos_token_id": 2,
+      "eos_token_id": 1,
+      "head_dim": 256,
+      "hidden_act": "gelu_pytorch_tanh",
+      "hidden_activation": "gelu_pytorch_tanh",
+      "hidden_size": 1024,
+      "initializer_range": 0.02,
+      "intermediate_size": 4096,
+      "max_position_embeddings": 8192,
+      "model_type": "gemma",
+      "num_attention_heads": 8,
+      "num_hidden_layers": 18,
+      "num_key_value_heads": 1,
+      "pad_token_id": 0,
+      "rms_norm_eps": 1e-06,
+      "rope_theta": 10000.0,
+      "torch_dtype": "float32",
+      "transformers_version": "4.48.1",
+      "type": "ConditionGemmaModel",
+      "use_adarms": true,
+      "use_cache": true,
+      "vocab_size": 257152
+    },
+    "max_action_dim": 32,
+    "n_action_steps": 10,
+    "name_mapping": {
+      "action_in_proj.projector": "action_in_proj",
+      "action_out_proj.projector": "action_out_proj",
+      "llm_backbone": "paligemma_with_expert.paligemma.model.language_model",
+      "llm_backbone.embed_tokens": "paligemma_with_expert.paligemma.lm_head",
+      "llm_expert": "paligemma_with_expert.gemma_expert.model",
+      "projector.projector": "paligemma_with_expert.paligemma.model.multi_modal_projector.linear",
+      "time_mlp_in.projector": "time_mlp_in",
+      "time_mlp_out.projector": "time_mlp_out",
+      "vision_backbone.vision": "paligemma_with_expert.paligemma.model.vision_tower"
+    },
+    "pretrained_name_or_path": "./checkpoints/pi05_libero/model.safetensors",
+    "proj_width": 1024,
+    "projector": {
+      "in_dim": 1152,
+      "out_dim": 2048,
+      "type": "LinearProjector"
+    },
+    "time_mlp_in": {
+      "in_dim": 1024,
+      "out_dim": 1024,
+      "type": "LinearProjector"
+    },
+    "time_mlp_out": {
+      "in_dim": 1024,
+      "out_dim": 1024,
+      "type": "LinearProjector"
+    },
+    "type": "PI05FlowMatching",
+    "vision_backbone": {
+      "type": "SigLIPViTBackbone",
+      "vision_backbone_id": "siglip_224",
+      "vision_config": {
+        "attention_dropout": 0.0,
+        "hidden_act": "gelu_pytorch_tanh",
+        "hidden_size": 1152,
+        "image_size": 224,
+        "intermediate_size": 4304,
+        "layer_norm_eps": 1e-06,
+        "model_type": "siglip_vision_model",
+        "num_attention_heads": 16,
+        "num_channels": 3,
+        "num_hidden_layers": 27,
+        "patch_size": 14,
+        "projection_dim": 2048,
+        "projector_hidden_act": "gelu_fast",
+        "torch_dtype": "float32",
+        "vision_use_head": false
+      }
+    }
+  },
+  "runner": {
+    "change_key_name": false,
+    "collator": {
+      "keys": [
+        "states",
+        "observation.eepose",
+        "timestamp",
+        "images",
+        "img_masks",
+        "lang_tokens",
+        "lang_masks",
+        "actions",
+        "action_masks"
+      ],
+      "meta_keys": [
+        "task_description",
+        "prompt",
+        "info",
+        "stats"
+      ],
+      "type": "DictCollator"
+    },
+    "enable_gradient_checkpointing": true,
+    "enable_mixed_precision_training": true,
+    "learning_rate": 5e-05,
+    "lr_scheduler_type": "linear-warmup+cosine-decay",
+    "max_epochs": 24,
+    "max_grad_norm": 1.0,
+    "metric": {
+      "active_trackers": [
+        "jsonl",
+        "tensorboard"
+      ],
+      "grad_accumulation_steps": 1,
+      "run_dir": "work_dirs",
+      "type": "VLAMetric",
+      "window_size": 1
+    },
+    "mixed_precision_dtype": "bf16",
+    "sampler": null,
+    "sharding_strategy": "no-shard",
+    "tokenizer": {
+      "type": "PaligemmaTokenizer"
+    },
+    "type": "FSDPTrainRunner",
+    "warmup_ratio": 0.03,
+    "weight_decay": 0.0
+  },
+  "train_dataloader": {
+    "dataset": {
+      "datasets": {
+        "action_key": "action",
+        "action_window_size": 10,
+        "data_root_path": "./datasets/libero_goal_no_noops_lerobotv2.1",
+        "statistic_name": "libero_goal_no_noops",
+        "transforms": [
+          {
+            "name_mappings": {
+              "actions": [
+                "actions"
+              ],
+              "observation.state": [
+                "states"
+              ]
+            },
+            "parquet_keys": [
+              "observation.state",
+              "timestamp",
+              "actions",
+              "info",
+              "stats",
+              "action_masks"
+            ],
+            "type": "ProcessParquetInputs",
+            "video_keys": [
+              "observation.images.image",
+              "observation.images.wrist_image"
+            ]
+          },
+          {
+            "type": "ParquetPrompter",
+            "use_conversation": false
+          },
+          {
+            "tokenizer": {
+              "type": "PaligemmaTokenizer"
+            },
+            "type": "ProcessPrompts"
+          },
+          {
+            "height": 224,
+            "type": "ResizeImages",
+            "width": 224
+          },
+          {
+            "means": [
+              [
+                123.515625,
+                116.04492188,
+                103.59375
+              ],
+              [
+                123.515625,
+                116.04492188,
+                103.59375
+              ]
+            ],
+            "stds": [
+              [
+                58.27148438,
+                57.02636719,
+                57.27539062
+              ],
+              [
+                58.27148438,
+                57.02636719,
+                57.27539062
+              ]
+            ],
+            "type": "NormalizeImages"
+          },
+          {
+            "action_dim": 32,
+            "action_key": "action",
+            "norm_type": "mean_std",
+            "state_dim": 32,
+            "state_key": "proprio",
+            "type": "NormalizeStatesAndActions"
+          }
+        ],
+        "type": "ParquetDataset",
+        "use_delta": false,
+        "window_start_idx": 0
+      },
+      "name_mappings": {
+        "action": [
+          "action"
+        ],
+        "observation.state": [
+          "proprio"
+        ]
+      },
+      "statistic_keys": [
+        "observation.state",
+        "timestamp",
+        "action"
+      ],
+      "statistic_name": "libero_goal_no_noops",
+      "type": "DistributedRepeatingDataset"
+    },
+    "per_device_batch_size": 8,
+    "per_device_num_workers": 4
+  }
+}