notmahi commited on
Commit
6511525
1 Parent(s): f652c13

Upload folder using huggingface_hub

Browse files
pretrained_model/README.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - pytorch_model_hub_mixin
4
+ - model_hub_mixin
5
+ ---
6
+
7
+ This model has been pushed to the Hub using the [PytorchModelHubMixin](https://huggingface.co/docs/huggingface_hub/package_reference/mixins#huggingface_hub.PyTorchModelHubMixin) integration:
8
+ - Library: [More Information Needed]
9
+ - Docs: [More Information Needed]
pretrained_model/config.json ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "beta_end": 0.02,
3
+ "beta_schedule": "squaredcos_cap_v2",
4
+ "beta_start": 0.0001,
5
+ "clip_sample": true,
6
+ "clip_sample_range": 1.0,
7
+ "crop_is_random": true,
8
+ "crop_shape": [
9
+ 224,
10
+ 224
11
+ ],
12
+ "diffusion_step_embed_dim": 128,
13
+ "do_mask_loss_for_padding": false,
14
+ "down_dims": [
15
+ 256,
16
+ 512,
17
+ 1024
18
+ ],
19
+ "ema_inv_gamma": 1.0,
20
+ "ema_max_alpha": 0.9999,
21
+ "ema_min_alpha": 0.0,
22
+ "ema_power": 0.75,
23
+ "ema_update_after_step": 0,
24
+ "horizon": 16,
25
+ "input_normalization_modes": {
26
+ "observation.images.scene_left_0": "mean_std",
27
+ "observation.images.scene_right_0": "mean_std",
28
+ "observation.images.wrist_left_plus": "mean_std",
29
+ "observation.images.wrist_right_minus": "mean_std",
30
+ "observation.state": "min_max"
31
+ },
32
+ "input_shapes": {
33
+ "observation.images.scene_left_0": [
34
+ 3,
35
+ 480,
36
+ 640
37
+ ],
38
+ "observation.images.scene_right_0": [
39
+ 3,
40
+ 480,
41
+ 640
42
+ ],
43
+ "observation.images.wrist_left_plus": [
44
+ 3,
45
+ 480,
46
+ 640
47
+ ],
48
+ "observation.images.wrist_right_minus": [
49
+ 3,
50
+ 480,
51
+ 640
52
+ ],
53
+ "observation.state": [
54
+ 20
55
+ ]
56
+ },
57
+ "kernel_size": 5,
58
+ "n_action_steps": 8,
59
+ "n_groups": 8,
60
+ "n_obs_steps": 2,
61
+ "noise_scheduler_type": "DDPM",
62
+ "num_inference_steps": null,
63
+ "num_train_timesteps": 100,
64
+ "output_normalization_modes": {
65
+ "action": "min_max"
66
+ },
67
+ "output_shapes": {
68
+ "action": [
69
+ 14
70
+ ]
71
+ },
72
+ "prediction_type": "epsilon",
73
+ "pretrained_backbone_weights": "IMAGENET1K_SWAG_LINEAR_V1",
74
+ "resize_shape": [
75
+ 256,
76
+ 342
77
+ ],
78
+ "spatial_softmax_num_keypoints": 32,
79
+ "use_ema": true,
80
+ "use_film_scale_modulation": true,
81
+ "use_group_norm": true,
82
+ "use_spatial_softmax": false,
83
+ "vision_backbone": "vit_b_16"
84
+ }
pretrained_model/config.yaml ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ resume: false
2
+ device: cuda
3
+ use_amp: false
4
+ seed: 100000
5
+ dataset_repo_id: notmahi/tutorial-tri-BimanualPutRedBellPepperInBin
6
+ video_backend: pyav
7
+ training:
8
+ offline_steps: 20000
9
+ online_steps: 0
10
+ online_steps_between_rollouts: 1
11
+ online_sampling_ratio: 0.5
12
+ online_env_seed: ???
13
+ eval_freq: 0
14
+ log_freq: 250
15
+ save_checkpoint: true
16
+ save_freq: 5000
17
+ num_workers: 4
18
+ batch_size: 32
19
+ image_transforms:
20
+ enable: false
21
+ max_num_transforms: 3
22
+ random_order: false
23
+ brightness:
24
+ weight: 1
25
+ min_max:
26
+ - 0.8
27
+ - 1.2
28
+ contrast:
29
+ weight: 1
30
+ min_max:
31
+ - 0.8
32
+ - 1.2
33
+ saturation:
34
+ weight: 1
35
+ min_max:
36
+ - 0.5
37
+ - 1.5
38
+ hue:
39
+ weight: 1
40
+ min_max:
41
+ - -0.05
42
+ - 0.05
43
+ sharpness:
44
+ weight: 1
45
+ min_max:
46
+ - 0.8
47
+ - 1.2
48
+ grad_clip_norm: 10
49
+ lr: 0.0001
50
+ lr_scheduler: cosine
51
+ lr_warmup_steps: 500
52
+ adam_betas:
53
+ - 0.95
54
+ - 0.999
55
+ adam_eps: 1.0e-08
56
+ adam_weight_decay: 1.0e-06
57
+ delta_timestamps:
58
+ observation.images.wrist_right_minus:
59
+ - -0.03333333333333333
60
+ - 0.0
61
+ observation.images.wrist_left_plus:
62
+ - -0.03333333333333333
63
+ - 0.0
64
+ observation.images.scene_right_0:
65
+ - -0.03333333333333333
66
+ - 0.0
67
+ observation.images.scene_left_0:
68
+ - -0.03333333333333333
69
+ - 0.0
70
+ observation.state:
71
+ - -0.03333333333333333
72
+ - 0.0
73
+ action:
74
+ - -0.03333333333333333
75
+ - 0.0
76
+ - 0.03333333333333333
77
+ - 0.06666666666666667
78
+ - 0.1
79
+ - 0.13333333333333333
80
+ - 0.16666666666666666
81
+ - 0.2
82
+ - 0.23333333333333334
83
+ - 0.26666666666666666
84
+ - 0.3
85
+ - 0.3333333333333333
86
+ - 0.36666666666666664
87
+ - 0.4
88
+ - 0.43333333333333335
89
+ - 0.4666666666666667
90
+ drop_n_last_frames: 7
91
+ eval:
92
+ n_episodes: 50
93
+ batch_size: 50
94
+ use_async_envs: false
95
+ wandb:
96
+ enable: false
97
+ disable_artifact: false
98
+ project: lerobot
99
+ notes: ''
100
+ fps: 30
101
+ env:
102
+ name: tri
103
+ task: PutSpatulaInUtensilCrock-v0
104
+ state_dim: 20
105
+ action_dim: 14
106
+ fps: ${fps}
107
+ episode_length: 400
108
+ gym:
109
+ fps: ${fps}
110
+ override_dataset_stats:
111
+ observation.images.wrist_right_minus:
112
+ mean:
113
+ - - - 0.485
114
+ - - - 0.456
115
+ - - - 0.406
116
+ std:
117
+ - - - 0.229
118
+ - - - 0.224
119
+ - - - 0.225
120
+ observation.images.wrist_left_plus:
121
+ mean:
122
+ - - - 0.485
123
+ - - - 0.456
124
+ - - - 0.406
125
+ std:
126
+ - - - 0.229
127
+ - - - 0.224
128
+ - - - 0.225
129
+ observation.images.scene_right_0:
130
+ mean:
131
+ - - - 0.485
132
+ - - - 0.456
133
+ - - - 0.406
134
+ std:
135
+ - - - 0.229
136
+ - - - 0.224
137
+ - - - 0.225
138
+ observation.images.scene_left_0:
139
+ mean:
140
+ - - - 0.485
141
+ - - - 0.456
142
+ - - - 0.406
143
+ std:
144
+ - - - 0.229
145
+ - - - 0.224
146
+ - - - 0.225
147
+ policy:
148
+ name: diffusion
149
+ n_obs_steps: 2
150
+ horizon: 16
151
+ n_action_steps: 8
152
+ input_shapes:
153
+ observation.images.wrist_right_minus:
154
+ - 3
155
+ - 480
156
+ - 640
157
+ observation.images.wrist_left_plus:
158
+ - 3
159
+ - 480
160
+ - 640
161
+ observation.images.scene_right_0:
162
+ - 3
163
+ - 480
164
+ - 640
165
+ observation.images.scene_left_0:
166
+ - 3
167
+ - 480
168
+ - 640
169
+ observation.state:
170
+ - ${env.state_dim}
171
+ output_shapes:
172
+ action:
173
+ - ${env.action_dim}
174
+ input_normalization_modes:
175
+ observation.images.wrist_right_minus: mean_std
176
+ observation.images.wrist_left_plus: mean_std
177
+ observation.images.scene_right_0: mean_std
178
+ observation.images.scene_left_0: mean_std
179
+ observation.state: min_max
180
+ output_normalization_modes:
181
+ action: min_max
182
+ vision_backbone: vit_b_16
183
+ pretrained_backbone_weights: IMAGENET1K_SWAG_LINEAR_V1
184
+ resize_shape:
185
+ - 256
186
+ - 342
187
+ crop_shape:
188
+ - 224
189
+ - 224
190
+ crop_is_random: true
191
+ use_spatial_softmax: false
192
+ use_group_norm: true
193
+ spatial_softmax_num_keypoints: 32
194
+ down_dims:
195
+ - 256
196
+ - 512
197
+ - 1024
198
+ kernel_size: 5
199
+ n_groups: 8
200
+ diffusion_step_embed_dim: 128
201
+ use_film_scale_modulation: true
202
+ noise_scheduler_type: DDPM
203
+ num_train_timesteps: 100
204
+ beta_schedule: squaredcos_cap_v2
205
+ beta_start: 0.0001
206
+ beta_end: 0.02
207
+ prediction_type: epsilon
208
+ clip_sample: true
209
+ clip_sample_range: 1.0
210
+ num_inference_steps: null
211
+ use_ema: true
212
+ ema_update_after_step: 0
213
+ ema_min_alpha: 0.0
214
+ ema_max_alpha: 0.9999
215
+ ema_inv_gamma: 1.0
216
+ ema_power: 0.75
217
+ do_mask_loss_for_padding: false
pretrained_model/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d5f0ba89c08511a9dc39d427ed15a1bd96fa6130f808af7f8c5bc2c419ee4d3
3
+ size 1899724320
training_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e54f2acf3aab55c24b9f25ece4a940f5aa48c47e9b649d7f0e5a27ebc97414a7
3
+ size 1899951950