Upload folder using huggingface_hub

Browse files

Files changed (7) hide show

config.json +166 -0
model.safetensors +3 -0
policy_postprocessor.json +33 -0
policy_postprocessor_step_0_unnormalizer_processor.safetensors +3 -0
policy_preprocessor.json +88 -0
policy_preprocessor_step_2_normalizer_processor.safetensors +3 -0
train_config.json +476 -0

config.json ADDED Viewed

	@@ -0,0 +1,166 @@

+{
+    "type": "pi05_rl",
+    "n_obs_steps": 1,
+    "input_features": {
+        "observation.images.side": {
+            "type": "VISUAL",
+            "shape": [
+                3,
+                224,
+                224
+            ]
+        },
+        "observation.images.top": {
+            "type": "VISUAL",
+            "shape": [
+                3,
+                224,
+                224
+            ]
+        },
+        "observation.state": {
+            "type": "STATE",
+            "shape": [
+                6
+            ]
+        }
+    },
+    "output_features": {
+        "action": {
+            "type": "ACTION",
+            "shape": [
+                6
+            ]
+        }
+    },
+    "device": "cuda",
+    "use_amp": false,
+    "use_peft": false,
+    "push_to_hub": true,
+    "repo_id": "cijerezg/multi-task-toys-merged-v2",
+    "private": null,
+    "tags": null,
+    "license": null,
+    "pretrained_path": null,
+    "paligemma_variant": "gemma_2b",
+    "action_expert_variant": "gemma_300m",
+    "dtype": "bfloat16",
+    "chunk_size": 50,
+    "n_action_steps": 50,
+    "max_state_dim": 6,
+    "max_action_dim": 32,
+    "num_inference_steps": 5,
+    "time_sampling_beta_alpha": 1.5,
+    "time_sampling_beta_beta": 1.0,
+    "time_sampling_scale": 0.999,
+    "time_sampling_offset": 0.001,
+    "min_period": 0.004,
+    "max_period": 4.0,
+    "rtc_config": {
+        "enabled": true,
+        "prefix_attention_schedule": "LINEAR",
+        "max_guidance_weight": 10.0,
+        "execution_horizon": 10,
+        "debug": false,
+        "debug_maxlen": 100
+    },
+    "image_resolution": [
+        224,
+        224
+    ],
+    "empty_cameras": 0,
+    "use_dataset_stats": false,
+    "normalization_mapping": {
+        "VISUAL": "IDENTITY",
+        "STATE": "MIN_MAX",
+        "ENV": "MIN_MAX",
+        "ACTION": "QUANTILES"
+    },
+    "action_tokenizer_name": "physical-intelligence/fast",
+    "text_tokenizer_name": "google/paligemma-3b-pt-224",
+    "max_action_tokens": 256,
+    "fast_skip_tokens": 128,
+    "max_decoding_steps": 200,
+    "temperature": 0.0,
+    "subtask_regeneration_interval": 1.5,
+    "gradient_checkpointing": true,
+    "compile_model": false,
+    "compile_mode": "max-autotune",
+    "freeze_vision_encoder": false,
+    "train_expert_only": false,
+    "knowledge_insulation": true,
+    "action_encoding": "anchor",
+    "loss_weight_flow": 1.0,
+    "loss_weight_action_ce": 1.0,
+    "loss_weight_subtask_ce": 1.0,
+    "optimizer_lr": 2.5e-05,
+    "optimizer_betas": [
+        0.9,
+        0.95
+    ],
+    "optimizer_eps": 1e-08,
+    "optimizer_weight_decay": 0.1,
+    "optimizer_grad_clip_norm": 1.0,
+    "scheduler_warmup_steps": 1000,
+    "scheduler_decay_steps": 30000,
+    "scheduler_decay_lr": 2.5e-06,
+    "tokenizer_max_length": 64,
+    "task": "Pick up the orange cube and place it on the black X marker",
+    "action_dim": 6,
+    "drop_n_last_frames": 2,
+    "critic_target_update_weight": 0.005,
+    "num_critics": 1,
+    "discount": 0.97,
+    "reward_normalization_constant": 5.0,
+    "terminal_failure_reward": -16.0,
+    "online_steps": 20000,
+    "online_buffer_capacity": 5000,
+    "offline_buffer_capacity": 50000,
+    "async_prefetch": false,
+    "online_step_before_learning": 10,
+    "policy_update_freq": 1,
+    "grad_clip_norm": 2.0,
+    "gradient_accumulation_steps": 16,
+    "critic_lr": 5e-05,
+    "actor_lr": 5e-05,
+    "utd_ratio": 1,
+    "actor_device": "cuda:0",
+    "learner_device": "cuda:0",
+    "use_separate_critic": true,
+    "critic_llm_depth": 6,
+    "critic_network_kwargs": {
+        "hidden_dims": [
+            256,
+            256
+        ],
+        "activate_final": true
+    },
+    "trainable_params": {
+        "vision_encoder_from_layer": {
+            "vision_tower": 5,
+            "multi_modal_projector": true
+        },
+        "language_from_layer": 0,
+        "critic_language_from_layer": 5
+    },
+    "offline_steps": 10000,
+    "inference_advantage": 1.0,
+    "advantage_scaling": 0.2,
+    "pi05_checkpoint": "outputs/pi05_base",
+    "action_encoding_stats_path": "outputs/stats_jack/action_stats_anchor_jack_cube.pt",
+    "dataset_stats": null,
+    "storage_device": "cpu",
+    "shared_encoder": false,
+    "num_discrete_actions": null,
+    "vision_encoder_name": null,
+    "actor_learner_config": {
+        "learner_host": "192.168.50.1",
+        "learner_port": 50051,
+        "policy_parameters_push_frequency": 180,
+        "queue_get_timeout": 2
+    },
+    "concurrency": {
+        "actor": "threads",
+        "learner": "threads"
+    }
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fcd85218fb14c4783d7347a974ca2ef3cc2bb7ef6c24b1b9d051282ac7bed6fe
+size 9371429196

policy_postprocessor.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "name": "policy_postprocessor",
+  "steps": [
+    {
+      "registry_name": "unnormalizer_processor",
+      "config": {
+        "eps": 1e-08,
+        "features": {
+          "action": {
+            "type": "ACTION",
+            "shape": [
+              6
+            ]
+          }
+        },
+        "norm_map": {
+          "VISUAL": "IDENTITY",
+          "STATE": "MIN_MAX",
+          "ENV": "MIN_MAX",
+          "ACTION": "QUANTILES"
+        }
+      },
+      "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors"
+    },
+    {
+      "registry_name": "device_processor",
+      "config": {
+        "device": "cpu",
+        "float_dtype": null
+      }
+    }
+  ]
+}

policy_postprocessor_step_0_unnormalizer_processor.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:55c11e62ea79ad9d94d3940555548249042f3c145b7e37a78da5c1fba3d3af3d
+size 14436

policy_preprocessor.json ADDED Viewed

	@@ -0,0 +1,88 @@

+{
+  "name": "policy_preprocessor",
+  "steps": [
+    {
+      "registry_name": "rename_observations_processor",
+      "config": {
+        "rename_map": {}
+      }
+    },
+    {
+      "registry_name": "to_batch_processor",
+      "config": {}
+    },
+    {
+      "registry_name": "normalizer_processor",
+      "config": {
+        "eps": 1e-08,
+        "features": {
+          "observation.images.side": {
+            "type": "VISUAL",
+            "shape": [
+              3,
+              224,
+              224
+            ]
+          },
+          "observation.images.top": {
+            "type": "VISUAL",
+            "shape": [
+              3,
+              224,
+              224
+            ]
+          },
+          "observation.state": {
+            "type": "STATE",
+            "shape": [
+              6
+            ]
+          },
+          "action": {
+            "type": "ACTION",
+            "shape": [
+              6
+            ]
+          }
+        },
+        "norm_map": {
+          "VISUAL": "IDENTITY",
+          "STATE": "MIN_MAX",
+          "ENV": "MIN_MAX",
+          "ACTION": "QUANTILES"
+        }
+      },
+      "state_file": "policy_preprocessor_step_2_normalizer_processor.safetensors"
+    },
+    {
+      "registry_name": "pi05_full_prepare_state_tokenizer_processor_step",
+      "config": {}
+    },
+    {
+      "registry_name": "tokenizer_processor",
+      "config": {
+        "max_length": 64,
+        "task_key": "task",
+        "padding_side": "right",
+        "padding": "max_length",
+        "truncation": true,
+        "tokenizer_name": "google/paligemma-3b-pt-224"
+      }
+    },
+    {
+      "registry_name": "action_tokenizer_processor",
+      "config": {
+        "trust_remote_code": true,
+        "max_action_tokens": 256,
+        "action_tokenizer_name": "physical-intelligence/fast"
+      }
+    },
+    {
+      "registry_name": "device_processor",
+      "config": {
+        "device": "cuda",
+        "float_dtype": null
+      }
+    }
+  ]
+}

policy_preprocessor_step_2_normalizer_processor.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:55c11e62ea79ad9d94d3940555548249042f3c145b7e37a78da5c1fba3d3af3d
+size 14436

train_config.json ADDED Viewed

	@@ -0,0 +1,476 @@

+{
+    "dataset": {
+        "repo_id": "cijerezg/yellow-car-offline-training-v2",
+        "root": "outputs/jack_cube",
+        "episodes": null,
+        "max_episodes": null,
+        "image_transforms": {
+            "enable": false,
+            "max_num_transforms": 3,
+            "random_order": false,
+            "tfs": {
+                "brightness": {
+                    "weight": 1.0,
+                    "type": "ColorJitter",
+                    "kwargs": {
+                        "brightness": [
+                            0.8,
+                            1.2
+                        ]
+                    }
+                },
+                "contrast": {
+                    "weight": 1.0,
+                    "type": "ColorJitter",
+                    "kwargs": {
+                        "contrast": [
+                            0.8,
+                            1.2
+                        ]
+                    }
+                },
+                "saturation": {
+                    "weight": 1.0,
+                    "type": "ColorJitter",
+                    "kwargs": {
+                        "saturation": [
+                            0.5,
+                            1.5
+                        ]
+                    }
+                },
+                "hue": {
+                    "weight": 1.0,
+                    "type": "ColorJitter",
+                    "kwargs": {
+                        "hue": [
+                            -0.05,
+                            0.05
+                        ]
+                    }
+                },
+                "sharpness": {
+                    "weight": 1.0,
+                    "type": "SharpnessJitter",
+                    "kwargs": {
+                        "sharpness": [
+                            0.5,
+                            1.5
+                        ]
+                    }
+                },
+                "affine": {
+                    "weight": 1.0,
+                    "type": "RandomAffine",
+                    "kwargs": {
+                        "degrees": [
+                            -5.0,
+                            5.0
+                        ],
+                        "translate": [
+                            0.05,
+                            0.05
+                        ]
+                    }
+                }
+            }
+        },
+        "revision": null,
+        "use_imagenet_stats": false,
+        "video_backend": "pyav",
+        "return_uint8": false,
+        "streaming": false,
+        "additional_offline_dataset_paths": []
+    },
+    "env": {
+        "type": "gym_manipulator",
+        "task": "Pick up the red truck and put it in the bowl",
+        "fps": 30,
+        "features": {
+            "observation.images.side": {
+                "type": "VISUAL",
+                "shape": [
+                    3,
+                    128,
+                    128
+                ]
+            },
+            "observation.images.top": {
+                "type": "VISUAL",
+                "shape": [
+                    3,
+                    128,
+                    128
+                ]
+            },
+            "observation.state": {
+                "type": "STATE",
+                "shape": [
+                    6
+                ]
+            },
+            "action": {
+                "type": "ACTION",
+                "shape": [
+                    6
+                ]
+            }
+        },
+        "features_map": {
+            "observation.images.side": "observation.images.side",
+            "observation.images.top": "observation.images.top",
+            "observation.state": "observation.state",
+            "action": "action"
+        },
+        "max_parallel_tasks": 1,
+        "disable_env_checker": true,
+        "robot": {
+            "type": "so100_follower",
+            "port": "/dev/ttyACM0",
+            "disable_torque_on_disconnect": true,
+            "max_relative_target": null,
+            "cameras": {
+                "side": {
+                    "type": "opencv",
+                    "fps": 30,
+                    "width": 640,
+                    "height": 480,
+                    "index_or_path": 0,
+                    "color_mode": "rgb",
+                    "rotation": 0,
+                    "warmup_s": 1,
+                    "fourcc": null,
+                    "backend": 0
+                },
+                "top": {
+                    "type": "opencv",
+                    "fps": 30,
+                    "width": 640,
+                    "height": 480,
+                    "index_or_path": 2,
+                    "color_mode": "rgb",
+                    "rotation": 0,
+                    "warmup_s": 1,
+                    "fourcc": null,
+                    "backend": 0
+                }
+            },
+            "use_degrees": true,
+            "id": "follower_arm_v2",
+            "calibration_dir": null
+        },
+        "teleop": {
+            "type": "so100_leader",
+            "port": "/dev/ttyACM1",
+            "use_degrees": true,
+            "id": "leader_arm_v2",
+            "calibration_dir": null
+        },
+        "processor": {
+            "control_mode": "leader",
+            "observation": {
+                "add_joint_velocity_to_observation": false,
+                "add_current_to_observation": false,
+                "add_ee_pose_to_observation": false,
+                "display_cameras": false
+            },
+            "image_preprocessing": {
+                "crop_params_dict": null,
+                "resize_size": [
+                    224,
+                    224
+                ]
+            },
+            "gripper": {
+                "use_gripper": true,
+                "gripper_penalty": 0.0
+            },
+            "reset": {
+                "fixed_reset_joint_positions": [
+                    0.54,
+                    -90.69,
+                    99.55,
+                    73.7,
+                    -50.23,
+                    42.71
+                ],
+                "reset_time_s": 10.0,
+                "control_time_s": 200.0,
+                "terminate_on_success": true
+            },
+            "inverse_kinematics": null,
+            "reward_classifier": {
+                "pretrained_path": null,
+                "success_threshold": 0.5,
+                "success_reward": 1.0
+            },
+            "max_gripper_pos": 30.0
+        },
+        "name": "real_robot"
+    },
+    "policy": {
+        "type": "pi05_rl",
+        "n_obs_steps": 1,
+        "input_features": {
+            "observation.images.side": {
+                "type": "VISUAL",
+                "shape": [
+                    3,
+                    224,
+                    224
+                ]
+            },
+            "observation.images.top": {
+                "type": "VISUAL",
+                "shape": [
+                    3,
+                    224,
+                    224
+                ]
+            },
+            "observation.state": {
+                "type": "STATE",
+                "shape": [
+                    6
+                ]
+            }
+        },
+        "output_features": {
+            "action": {
+                "type": "ACTION",
+                "shape": [
+                    6
+                ]
+            }
+        },
+        "device": "cuda",
+        "use_amp": false,
+        "use_peft": false,
+        "push_to_hub": true,
+        "repo_id": "cijerezg/multi-task-toys-merged-v2",
+        "private": null,
+        "tags": null,
+        "license": null,
+        "pretrained_path": null,
+        "paligemma_variant": "gemma_2b",
+        "action_expert_variant": "gemma_300m",
+        "dtype": "bfloat16",
+        "chunk_size": 50,
+        "n_action_steps": 50,
+        "max_state_dim": 6,
+        "max_action_dim": 32,
+        "num_inference_steps": 5,
+        "time_sampling_beta_alpha": 1.5,
+        "time_sampling_beta_beta": 1.0,
+        "time_sampling_scale": 0.999,
+        "time_sampling_offset": 0.001,
+        "min_period": 0.004,
+        "max_period": 4.0,
+        "rtc_config": {
+            "enabled": true,
+            "prefix_attention_schedule": "LINEAR",
+            "max_guidance_weight": 10.0,
+            "execution_horizon": 10,
+            "debug": false,
+            "debug_maxlen": 100
+        },
+        "image_resolution": [
+            224,
+            224
+        ],
+        "empty_cameras": 0,
+        "use_dataset_stats": false,
+        "normalization_mapping": {
+            "VISUAL": "IDENTITY",
+            "STATE": "MIN_MAX",
+            "ENV": "MIN_MAX",
+            "ACTION": "QUANTILES"
+        },
+        "action_tokenizer_name": "physical-intelligence/fast",
+        "text_tokenizer_name": "google/paligemma-3b-pt-224",
+        "max_action_tokens": 256,
+        "fast_skip_tokens": 128,
+        "max_decoding_steps": 200,
+        "temperature": 0.0,
+        "subtask_regeneration_interval": 1.5,
+        "gradient_checkpointing": true,
+        "compile_model": false,
+        "compile_mode": "max-autotune",
+        "freeze_vision_encoder": false,
+        "train_expert_only": false,
+        "knowledge_insulation": true,
+        "action_encoding": "anchor",
+        "loss_weight_flow": 1.0,
+        "loss_weight_action_ce": 1.0,
+        "loss_weight_subtask_ce": 1.0,
+        "optimizer_lr": 2.5e-05,
+        "optimizer_betas": [
+            0.9,
+            0.95
+        ],
+        "optimizer_eps": 1e-08,
+        "optimizer_weight_decay": 0.1,
+        "optimizer_grad_clip_norm": 1.0,
+        "scheduler_warmup_steps": 1000,
+        "scheduler_decay_steps": 30000,
+        "scheduler_decay_lr": 2.5e-06,
+        "tokenizer_max_length": 64,
+        "task": "Pick up the orange cube and place it on the black X marker",
+        "action_dim": 6,
+        "drop_n_last_frames": 2,
+        "critic_target_update_weight": 0.005,
+        "num_critics": 1,
+        "discount": 0.97,
+        "reward_normalization_constant": 5.0,
+        "terminal_failure_reward": -16.0,
+        "online_steps": 20000,
+        "online_buffer_capacity": 5000,
+        "offline_buffer_capacity": 50000,
+        "async_prefetch": false,
+        "online_step_before_learning": 10,
+        "policy_update_freq": 1,
+        "grad_clip_norm": 2.0,
+        "gradient_accumulation_steps": 16,
+        "critic_lr": 5e-05,
+        "actor_lr": 5e-05,
+        "utd_ratio": 1,
+        "actor_device": "cuda:0",
+        "learner_device": "cuda:0",
+        "use_separate_critic": true,
+        "critic_llm_depth": 6,
+        "critic_network_kwargs": {
+            "hidden_dims": [
+                256,
+                256
+            ],
+            "activate_final": true
+        },
+        "trainable_params": {
+            "vision_encoder_from_layer": {
+                "vision_tower": 5,
+                "multi_modal_projector": true
+            },
+            "language_from_layer": 0,
+            "critic_language_from_layer": 5
+        },
+        "offline_steps": 10000,
+        "inference_advantage": 1.0,
+        "advantage_scaling": 0.2,
+        "pi05_checkpoint": "outputs/pi05_base",
+        "action_encoding_stats_path": "outputs/stats_jack/action_stats_anchor_jack_cube.pt",
+        "dataset_stats": null,
+        "storage_device": "cpu",
+        "shared_encoder": false,
+        "num_discrete_actions": null,
+        "vision_encoder_name": null,
+        "actor_learner_config": {
+            "learner_host": "192.168.50.1",
+            "learner_port": 50051,
+            "policy_parameters_push_frequency": 180,
+            "queue_get_timeout": 2
+        },
+        "concurrency": {
+            "actor": "threads",
+            "learner": "threads"
+        }
+    },
+    "reward_model": null,
+    "output_dir": "outputs/jack_pi05_full_offline_training_val_anchor_action_v1",
+    "job_name": "default",
+    "resume": false,
+    "seed": 42,
+    "cudnn_deterministic": false,
+    "num_workers": 4,
+    "batch_size": 8,
+    "prefetch_factor": 4,
+    "persistent_workers": true,
+    "steps": 100000,
+    "eval_freq": 20000,
+    "log_freq": 20,
+    "tolerance_s": 0.0001,
+    "save_checkpoint": true,
+    "save_freq": 100,
+    "use_policy_training_preset": true,
+    "optimizer": {
+        "type": "multi_adam",
+        "lr": 0.001,
+        "weight_decay": 0.1,
+        "grad_clip_norm": 10.0,
+        "optimizer_groups": {
+            "actor": {
+                "lr": 5e-05
+            },
+            "critic": {
+                "lr": 5e-05
+            }
+        }
+    },
+    "scheduler": null,
+    "eval": {
+        "n_episodes": 50,
+        "batch_size": 22,
+        "use_async_envs": true
+    },
+    "wandb": {
+        "enable": true,
+        "disable_artifact": true,
+        "project": "so101_real_offline-v1",
+        "entity": null,
+        "notes": null,
+        "run_id": "bfi8fh35",
+        "mode": null,
+        "offline_project": "so101_real_offline-v1",
+        "add_tags": true
+    },
+    "peft": null,
+    "sample_weighting": null,
+    "rename_map": {},
+    "checkpoint_path": null,
+    "offline_output_dir": "outputs/jack_pi05_full_offline_training_val_anchor_action_v1",
+    "offline_save_freq": 400,
+    "buffer_cache_dir": null,
+    "use_rerun": true,
+    "video_logging_cameras": [
+        "top",
+        "wrist"
+    ],
+    "episode_logging_freq": 4,
+    "episode_save_freq": 10,
+    "probe_parameters": {
+        "enable_actions": true,
+        "enable_representations": true,
+        "enable_attention": true,
+        "enable_offline_inference": true,
+        "enable_spatial_memorization": true,
+        "enable_action_drift_jacobian": true,
+        "enable_spatial_memorization_jacobian": true,
+        "output_dir": "outputs/probe",
+        "mode": "all",
+        "max_episodes": 1,
+        "n_frames_per_episode": 128,
+        "offline_inference_n_frames": 5,
+        "random_seed": 42,
+        "timestep": 0.5,
+        "ref_max_episodes": 2,
+        "ref_n_frames_per_episode": 256,
+        "action_pca_dims": 50,
+        "repr_pca_dims": 100,
+        "umap_n_neighbors": 15,
+        "umap_min_dist": 0.1,
+        "umap_seed": 42,
+        "sites": "prefix,suffix",
+        "ep_3d_a": 0,
+        "ep_3d_b": 1,
+        "subtask_injection": false,
+        "validation_batch_size": 32,
+        "attn_eval_episodes": null,
+        "attn_eval_subsample": 2,
+        "spatial_layers": "0,9,17",
+        "spatial_n_frames": 32
+    },
+    "val_dataset_path": "outputs/annotated_dataset_validation",
+    "val_split": 0.0,
+    "val_freq": 400,
+    "val_on_start": false,
+    "skip_critic": true
+}