octo-small / config.json
rail-berkeley's picture
Upload folder using huggingface_hub
9ec05c6
{"seed": 42, "num_steps": 300000, "save_dir": null, "model": {"observation_tokenizers": {"primary": {"module": "octo.model.components.tokenizers", "name": "ImageTokenizer", "args": [], "kwargs": {"obs_stack_keys": ["image_primary"], "task_stack_keys": ["image_primary"], "encoder": {"module": "octo.model.components.vit_encoders", "name": "SmallStem16", "args": [], "kwargs": {}}}}, "wrist": {"module": "octo.model.components.tokenizers", "name": "ImageTokenizer", "args": [], "kwargs": {"obs_stack_keys": ["image_wrist"], "task_stack_keys": ["image_wrist"], "encoder": {"module": "octo.model.components.vit_encoders", "name": "SmallStem16", "args": [], "kwargs": {}}}}}, "task_tokenizers": {"language": {"module": "octo.model.components.tokenizers", "name": "LanguageTokenizer", "args": [], "kwargs": {"encoder": "t5-base", "finetune_encoder": false}}}, "heads": {"action": {"module": "octo.model.components.action_heads", "name": "DiffusionActionHead", "args": [], "kwargs": {"readout_key": "readout_action", "use_map": false, "pred_horizon": 4, "action_dim": 7}}}, "readouts": {"action": 1}, "token_embedding_size": 384, "transformer_kwargs": {"attention_dropout_rate": 0.0, "add_position_embedding": false, "num_layers": 12, "mlp_dim": 1536, "num_attention_heads": 6, "dropout_rate": 0.0}, "max_horizon": 10}, "window_size": 2, "dataset_kwargs": {"oxe_kwargs": {"data_mix": "oxe_magic_soup", "data_dir": "gs://rail-octo-central2/resize_256_256", "load_camera_views": ["primary", "wrist"], "load_depth": false}, "traj_transform_kwargs": {"window_size": 2, "future_action_window_size": 3, "goal_relabeling_strategy": "uniform", "subsample_length": 100, "task_augment_strategy": "delete_task_conditioning", "task_augment_kwargs": {"keep_image_prob": 0.5}}, "frame_transform_kwargs": {"num_parallel_calls": 200, "resize_size": {"primary": [256, 256], "wrist": [128, 128]}, "image_augment_kwargs": [{"random_resized_crop": {"scale": [0.8, 1.0], "ratio": [0.9, 1.1]}, "random_brightness": [0.1], "random_contrast": [0.9, 1.1], "random_saturation": [0.9, 1.1], "random_hue": [0.05], "augment_order": ["random_resized_crop", "random_brightness", "random_contrast", "random_saturation", "random_hue"]}, {"random_brightness": [0.1], "random_contrast": [0.9, 1.1], "random_saturation": [0.9, 1.1], "random_hue": [0.05], "augment_order": ["random_brightness", "random_contrast", "random_saturation", "random_hue"]}]}, "traj_transform_threads": 48, "traj_read_threads": 48, "shuffle_buffer_size": 500000, "batch_size": 128, "balance_weights": true}, "optimizer": {"learning_rate": {"name": "rsqrt", "init_value": 0.0, "peak_value": 0.0003, "warmup_steps": 2000, "timescale": 10000}, "weight_decay": 0.1, "clip_gradient": 1.0, "frozen_keys": ["*hf_model*"]}, "prefetch_num_batches": 0, "start_step": null, "log_interval": 100, "eval_interval": 5000, "viz_interval": 20000, "save_interval": 10000, "val_kwargs": {"val_shuffle_buffer_size": 1000, "num_val_batches": 16}, "viz_kwargs": {"eval_batch_size": 128, "trajs_for_metrics": 100, "trajs_for_viz": 8, "samples_per_state": 8}, "resume_path": null, "text_processor": {"module": "octo.data.utils.text_processing", "name": "HFTokenizer", "args": [], "kwargs": {"encode_with_model": false, "tokenizer_kwargs": {"max_length": 16, "padding": "max_length", "truncation": true, "return_tensors": "np"}, "tokenizer_name": "t5-base"}}, "pretrained_loaders": [{"module": "octo.utils.train_utils", "name": "hf_weights_loader", "args": [], "kwargs": {"hf_model": "t5-base"}}], "wandb": {"project": "octo", "group": null, "entity": null}, "wandb_resume_id": null, "eval_datasets": ["bridge_dataset"]}