snehal-allenai commited on
Commit
94e9157
·
verified ·
1 Parent(s): 5cfa5f3

Upload MolmoBot RBY1 DoorOpening weights (step56000 unsharded)

Browse files
Files changed (2) hide show
  1. config.yaml +683 -0
  2. model.pt +3 -0
config.yaml ADDED
@@ -0,0 +1,683 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_name: molmoflow-rby1-door-expert-16node-03-05-23-17-56_bs_1024_dbs_8_step_100000_llmlr_1e-5
2
+ model:
3
+ model_name: molmoact
4
+ data_formatter:
5
+ prompt_templates: uber_model_v2
6
+ message_format: qwen3
7
+ system_prompt: demo_or_style_v2
8
+ always_start_with_space: false
9
+ default_inference_len: 65
10
+ select_answer: best
11
+ debug: false
12
+ image_last: false
13
+ format_message_list: null
14
+ p_one_message: 0.0
15
+ eval_system_prompt_mapping: null
16
+ p_choice_content_in_mc: 1.0
17
+ template_video_mc_questions: true
18
+ pointing_format: html-v2
19
+ points_decimal_places: 1
20
+ use_seperate_non_pointing_qa_style: false
21
+ timestamp_mode: 50-percent-seconds
22
+ output_timestamp_mode: seconds
23
+ seconds_decimal_places: 1
24
+ p_multi_point_all_image: 0.5
25
+ use_seperate_count_without_pointing_style: false
26
+ sample_random_initial_point: true
27
+ llm:
28
+ d_model: 2560
29
+ n_heads: 32
30
+ n_kv_heads: 8
31
+ head_dim: 128
32
+ qkv_bias: false
33
+ clip_qkv: null
34
+ n_layers: 36
35
+ mlp_ratio: 4
36
+ mlp_hidden_size: 19456
37
+ activation_type: swiglu
38
+ block_type: sequential
39
+ rope: true
40
+ rope_full_precision: true
41
+ rope_theta: 5000000.0
42
+ rope_type: default
43
+ rope_factor: null
44
+ rope_high_freq_factor: null
45
+ rope_low_freq_factor: null
46
+ rope_original_max_position_embeddings: null
47
+ rope_attention_factor: null
48
+ rope_beta_fast: null
49
+ rope_beta_slow: null
50
+ rope_mscale: null
51
+ rope_mscale_all_dim: null
52
+ rope_truncate: null
53
+ attention_type: sdpa
54
+ full_attention_layers: null
55
+ sliding_attention_rope_scaling: false
56
+ float32_attention: true
57
+ attention_dropout: 0.0
58
+ attention_layer_norm: true
59
+ attention_layer_norm_type: qwen3
60
+ residual_dropout: 0.1
61
+ response_residual_dropout: 0.0
62
+ layer_norm_type: rms
63
+ layer_norm_with_affine: true
64
+ layer_norm_eps: 1.0e-06
65
+ attention_layer_norm_with_affine: true
66
+ max_sequence_length: 8192
67
+ max_position_embeddings: null
68
+ include_bias: false
69
+ bias_for_layer_norm: null
70
+ norm_after: false
71
+ moe_num_experts: 8
72
+ moe_top_k: 2
73
+ moe_mlp_impl: sparse
74
+ moe_log_expert_assignment: false
75
+ moe_shared_expert: false
76
+ moe_lbl_in_fp32: false
77
+ moe_interleave: false
78
+ moe_loss_weight: 0.1
79
+ moe_zloss_weight: null
80
+ moe_dropless: true
81
+ moe_capacity_factor: 1.25
82
+ embedding_dropout: 0.0
83
+ scale_logits: false
84
+ vocab_size: 151936
85
+ additional_vocab_size: 128
86
+ weight_tying: true
87
+ embedding_size: 151936
88
+ use_position_ids: true
89
+ tokenizer:
90
+ identifier: Qwen/Qwen3-4B-Instruct-2507
91
+ tokenizer_dir: null
92
+ init_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen3-4b-instruct.pt
93
+ init_incremental: null
94
+ new_embedding_init_range: 0.02
95
+ initializer_range: 0.02
96
+ normalize_input_embeds: false
97
+ activation_checkpoint: whole_layer
98
+ compile: blocks
99
+ fix_pad_tokenizer: false
100
+ init_std: 0.02
101
+ init_fn: normal
102
+ init_cutoff_factor: null
103
+ vision_backbone:
104
+ vit:
105
+ image_model_type: siglip
106
+ image_default_input_size:
107
+ - 378
108
+ - 378
109
+ image_patch_size: 14
110
+ image_pos_patch_size: 14
111
+ image_emb_dim: 1152
112
+ image_num_heads: 16
113
+ image_num_key_value_heads: 16
114
+ image_num_layers: 27
115
+ image_head_dim: 72
116
+ image_mlp_dim: 4304
117
+ image_mlp_activations: gelu_pytorch_tanh
118
+ image_dropout_rate: 0.0
119
+ image_num_pos: 729
120
+ image_norm_eps: 1.0e-06
121
+ attention_dropout: 0.0
122
+ residual_dropout: 0.0
123
+ initializer_range: 0.02
124
+ float32_attention: true
125
+ attention_type: sdpa
126
+ sdpa_backend: all
127
+ activation_checkpointing: true
128
+ init_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/siglip2-so400m-14-384.pt
129
+ resize_mode: siglip
130
+ pad_value: 0.0
131
+ normalize: siglip
132
+ image_pooling_2d: attention_meanq
133
+ pooling_attention_mask: true
134
+ image_projector: mlp
135
+ image_padding_embed: null
136
+ vit_layers:
137
+ - -3
138
+ - -9
139
+ skip_unused_layers: true
140
+ use_deepstack: false
141
+ share_connector: false
142
+ image_feature_dropout: 0.0
143
+ connector_activation_checkpointing: true
144
+ compile_vit: blocks
145
+ pool_size_embeds: null
146
+ compile_connector: null
147
+ normalize_on_gpu: true
148
+ use_image_augmentation: true
149
+ use_resize_bottleneck: false
150
+ mm_preprocessor:
151
+ max_answer_len: null
152
+ last_message_loss_only: false
153
+ max_text_tokens: null
154
+ loss_token_weighting: root_subsegments_root_tokens
155
+ max_frames: 1
156
+ frame_sample_mode: uniform_last_frame
157
+ candidate_sampling_fps:
158
+ - 0.25
159
+ - 0.5
160
+ - 1.0
161
+ - 2.0
162
+ - 4.0
163
+ - 6.0
164
+ - 8.0
165
+ - 16.0
166
+ cache_videos: true
167
+ loading_method: torchcodec_exact
168
+ max_fps:
169
+ - 2.0
170
+ time_sampling: true
171
+ time_mode: per-frame-compact
172
+ subtitle_mode: frame_1
173
+ max_crops: 1
174
+ overlap_margins:
175
+ - 4.0
176
+ - 4.0
177
+ use_col_tokens: false
178
+ periodic_high_res_frame: null
179
+ high_low_train_mode: local_rnd
180
+ high_res_frame_sample_options: null
181
+ periodic_sample_rate_training:
182
+ 4:
183
+ - 0.9
184
+ - 0.03
185
+ - 0.03
186
+ - 0.04
187
+ 3:
188
+ - 0.6
189
+ - 0.2
190
+ - 0.2
191
+ skip_low_res_in_high_low: false
192
+ pooling_w: 3
193
+ pooling_h: 3
194
+ high_res_pooling_w: null
195
+ high_res_pooling_h: null
196
+ query_based_resolution_selection: false
197
+ max_queries_for_resolution_selection: 8
198
+ use_frame_special_tokens: true
199
+ frame_sel_clip_identifier: google/siglip2-so400m-patch14-384
200
+ image_padding_mask: false
201
+ max_subtitle_tokens: null
202
+ image:
203
+ crop_mode: resize
204
+ use_col_tokens: true
205
+ max_crops: 8
206
+ high_res_max_crops: 24
207
+ p_high_res: 0.0
208
+ pooling_w: 2
209
+ pooling_h: 2
210
+ overlap_margins:
211
+ - 4
212
+ - 4
213
+ max_images: 5
214
+ max_multi_image_crops: 8
215
+ multi_image_pooling_w: 2
216
+ multi_image_pooling_h: 2
217
+ use_single_crop_col_tokens: false
218
+ use_single_crop_start_token: true
219
+ topk: null
220
+ prune_from_frame: 0
221
+ bi_directional_attn: image_tokens
222
+ shared_low_high_embedding: true
223
+ debug: null
224
+ cp_enabled: false
225
+ apply_cp_to_vision_backbone: false
226
+ action_dim: 20
227
+ action_horizon: 16
228
+ n_action_steps: 8
229
+ n_obs_steps: 1
230
+ action_expert:
231
+ max_horizon: 32
232
+ action_dim: 20
233
+ hidden_size: 768
234
+ num_layers: 36
235
+ num_heads: 8
236
+ mlp_ratio: 4.0
237
+ timestep_embed_dim: 256
238
+ dropout: 0.0
239
+ attn_dropout: 0.0
240
+ context_layer_norm: true
241
+ action_expert_layer_mode: per_layer
242
+ flow_matching_num_steps: 10
243
+ flow_matching_cutoff: 0.999
244
+ flow_matching_beta_alpha: 1.0
245
+ flow_matching_beta_beta: 1.5
246
+ num_flow_timestamps: 8
247
+ same_noise_per_time: false
248
+ robot_preprocessor:
249
+ stats_by_repo:
250
+ synthmanip:
251
+ observation.state:
252
+ min:
253
+ - -4.904874324798584
254
+ - -4.564780235290527
255
+ - -3.5160739421844482
256
+ - -2.356419563293457
257
+ - -0.47234979271888733
258
+ - -2.0865397453308105
259
+ - -3.343071222305298
260
+ - -5.8824052810668945
261
+ - -1.7488995790481567
262
+ - -2.967109203338623
263
+ - -0.11299018561840057
264
+ - -2.3546268939971924
265
+ - -3.1416664123535156
266
+ - -2.0946199893951416
267
+ - -3.2890703678131104
268
+ - -6.282893657684326
269
+ - -1.7483078241348267
270
+ - -2.967064142227173
271
+ - -0.12049419432878494
272
+ - -1.778153419494629
273
+ - -1.7587945461273193
274
+ - -1.5871200561523438
275
+ max:
276
+ - 17.08185577392578
277
+ - 33.73189163208008
278
+ - 3.2411913871765137
279
+ - 2.356658697128296
280
+ - 3.1416971683502197
281
+ - 2.1008245944976807
282
+ - 0.07229717075824738
283
+ - 6.270575523376465
284
+ - 2.0102994441986084
285
+ - 2.9668161869049072
286
+ - 0.021467044949531555
287
+ - 2.3977394104003906
288
+ - 0.34489157795906067
289
+ - 2.0900635719299316
290
+ - 0.07242166996002197
291
+ - 6.27663516998291
292
+ - 2.0076160430908203
293
+ - 2.9636759757995605
294
+ - 0.04509617015719414
295
+ - 0.919683575630188
296
+ - 1.6717331409454346
297
+ - 1.1039749383926392
298
+ action:
299
+ q01:
300
+ - -0.04400388523936272
301
+ - -0.044572047889232635
302
+ - -0.05000000074505806
303
+ - -0.05000000074505806
304
+ - -0.037506889551877975
305
+ - -0.03562070056796074
306
+ - -0.05000000074505806
307
+ - -0.05000000074505806
308
+ - -0.04800133779644966
309
+ - -0.05000000074505806
310
+ - -100.0
311
+ - -0.05000000074505806
312
+ - -0.05000000074505806
313
+ - -0.04927435144782066
314
+ - -0.05000000074505806
315
+ - -0.05000000074505806
316
+ - -0.0456085205078125
317
+ - -0.05000000074505806
318
+ - -100.0
319
+ - -0.025820335373282433
320
+ q99:
321
+ - 0.04579437896609306
322
+ - 0.04565873369574547
323
+ - 0.05000000074505806
324
+ - 0.05000000074505806
325
+ - 0.05000000074505806
326
+ - 0.03847877308726311
327
+ - 0.05000000074505806
328
+ - 0.05000000074505806
329
+ - 0.05000000074505806
330
+ - 0.05000000074505806
331
+ - 100.0
332
+ - 0.05000000074505806
333
+ - 0.03608553484082222
334
+ - 0.04896605759859085
335
+ - 0.05000000074505806
336
+ - 0.05000000074505806
337
+ - 0.05000000074505806
338
+ - 0.05000000074505806
339
+ - 100.0
340
+ - 0.7379999756813049
341
+ default_repo_id: synthmanip
342
+ action_key: action
343
+ state_keys:
344
+ - observation.state
345
+ action_norm_mode: quantiles
346
+ state_norm_mode: min_max
347
+ robot_postprocessor:
348
+ stats_by_repo:
349
+ synthmanip:
350
+ observation.state:
351
+ min:
352
+ - -4.904874324798584
353
+ - -4.564780235290527
354
+ - -3.5160739421844482
355
+ - -2.356419563293457
356
+ - -0.47234979271888733
357
+ - -2.0865397453308105
358
+ - -3.343071222305298
359
+ - -5.8824052810668945
360
+ - -1.7488995790481567
361
+ - -2.967109203338623
362
+ - -0.11299018561840057
363
+ - -2.3546268939971924
364
+ - -3.1416664123535156
365
+ - -2.0946199893951416
366
+ - -3.2890703678131104
367
+ - -6.282893657684326
368
+ - -1.7483078241348267
369
+ - -2.967064142227173
370
+ - -0.12049419432878494
371
+ - -1.778153419494629
372
+ - -1.7587945461273193
373
+ - -1.5871200561523438
374
+ max:
375
+ - 17.08185577392578
376
+ - 33.73189163208008
377
+ - 3.2411913871765137
378
+ - 2.356658697128296
379
+ - 3.1416971683502197
380
+ - 2.1008245944976807
381
+ - 0.07229717075824738
382
+ - 6.270575523376465
383
+ - 2.0102994441986084
384
+ - 2.9668161869049072
385
+ - 0.021467044949531555
386
+ - 2.3977394104003906
387
+ - 0.34489157795906067
388
+ - 2.0900635719299316
389
+ - 0.07242166996002197
390
+ - 6.27663516998291
391
+ - 2.0076160430908203
392
+ - 2.9636759757995605
393
+ - 0.04509617015719414
394
+ - 0.919683575630188
395
+ - 1.6717331409454346
396
+ - 1.1039749383926392
397
+ action:
398
+ q01:
399
+ - -0.04400388523936272
400
+ - -0.044572047889232635
401
+ - -0.05000000074505806
402
+ - -0.05000000074505806
403
+ - -0.037506889551877975
404
+ - -0.03562070056796074
405
+ - -0.05000000074505806
406
+ - -0.05000000074505806
407
+ - -0.04800133779644966
408
+ - -0.05000000074505806
409
+ - -100.0
410
+ - -0.05000000074505806
411
+ - -0.05000000074505806
412
+ - -0.04927435144782066
413
+ - -0.05000000074505806
414
+ - -0.05000000074505806
415
+ - -0.0456085205078125
416
+ - -0.05000000074505806
417
+ - -100.0
418
+ - -0.025820335373282433
419
+ q99:
420
+ - 0.04579437896609306
421
+ - 0.04565873369574547
422
+ - 0.05000000074505806
423
+ - 0.05000000074505806
424
+ - 0.05000000074505806
425
+ - 0.03847877308726311
426
+ - 0.05000000074505806
427
+ - 0.05000000074505806
428
+ - 0.05000000074505806
429
+ - 0.05000000074505806
430
+ - 100.0
431
+ - 0.05000000074505806
432
+ - 0.03608553484082222
433
+ - 0.04896605759859085
434
+ - 0.05000000074505806
435
+ - 0.05000000074505806
436
+ - 0.05000000074505806
437
+ - 0.05000000074505806
438
+ - 100.0
439
+ - 0.7379999756813049
440
+ default_repo_id: synthmanip
441
+ action_key: action
442
+ state_keys:
443
+ - observation.state
444
+ action_norm_mode: quantiles
445
+ state_norm_mode: min_max
446
+ parallelism:
447
+ data_parallel_replicate_degree: 1
448
+ enable_compiled_autograd: false
449
+ data_parallel_shard_degree: -1
450
+ fsdp_reshard_after_forward: default
451
+ context_parallel_config:
452
+ degree: 1
453
+ attention_type: ulysses
454
+ load_balancer: ulysses
455
+ head_stride: 1
456
+ tensor_parallel_config:
457
+ degree: 1
458
+ enable_async: false
459
+ data_parallel_config:
460
+ name: fsdp
461
+ param_dtype: null
462
+ reduce_dtype: float32
463
+ num_replicas: null
464
+ shard_degree: null
465
+ wrapping_strategy: full
466
+ prefetch_factor: 0
467
+ context_parallel_rotate_method: allgather
468
+ seed: 6198
469
+ epoch: null
470
+ dry_run: false
471
+ ft_llm: true
472
+ ft_vit: false
473
+ ft_connector: false
474
+ ft_embedding: lm_head
475
+ optimizer:
476
+ name: adamw
477
+ learning_rate: 0.0001
478
+ weight_decay: 0.01
479
+ betas:
480
+ - 0.9
481
+ - 0.95
482
+ eps: 1.0e-05
483
+ connector_learning_rate: 5.0e-06
484
+ vit_learning_rate: 5.0e-06
485
+ llm_learning_rate: 1.0e-05
486
+ frame_selector_learning_rate: 0.0001
487
+ temporal_token_scorer_learning_rate: 0.0001
488
+ action_expert_learning_rate: 0.0001
489
+ connector_weight_decay: 0.0
490
+ vit_weight_decay: 0.0
491
+ llm_weight_decay: 0.0
492
+ frame_selector_weight_decay: 0.01
493
+ temporal_token_scorer_weight_decay: 0.01
494
+ action_expert_weight_decay: 0.0
495
+ connector_betas:
496
+ - 0.9
497
+ - 0.95
498
+ vit_betas:
499
+ - 0.9
500
+ - 0.95
501
+ llm_betas:
502
+ - 0.9
503
+ - 0.95
504
+ frame_selector_betas:
505
+ - 0.9
506
+ - 0.95
507
+ temporal_token_scorer_betas:
508
+ - 0.9
509
+ - 0.95
510
+ action_expert_betas:
511
+ - 0.9
512
+ - 0.95
513
+ connector_eps: 1.0e-06
514
+ vit_eps: 1.0e-06
515
+ llm_eps: 1.0e-06
516
+ frame_selector_eps: 1.0e-06
517
+ temporal_token_scorer_eps: 1.0e-06
518
+ action_expert_eps: 1.0e-06
519
+ metrics_log_interval: -1
520
+ scheduler:
521
+ name: multimodal
522
+ units: steps
523
+ t_warmup: 100
524
+ t_max: null
525
+ alpha_f: 0.1
526
+ connector_t_warmup: 200
527
+ vit_t_warmup: 200
528
+ llm_t_warmup: 2000
529
+ frame_selector_t_warmup: 200
530
+ temporal_token_scorer_t_warmup: 200
531
+ action_expert_t_warmup: 200
532
+ grad_clip_warmup_steps: null
533
+ grad_clip_warmup_factor: null
534
+ warmup_min_lr: 0.0
535
+ data:
536
+ dataset: null
537
+ mixture:
538
+ synthmanip/task_0: 1.0
539
+ synthmanip/task_1: 1.0
540
+ root_size_mixture: null
541
+ kwargs_mixture: null
542
+ split: train
543
+ seed: 50189
544
+ pad: to_max
545
+ sequence_length: 1024
546
+ max_text_seq_len: null
547
+ shuffle: true
548
+ start_index: 0
549
+ packing: null
550
+ enable_variable_sized_token_pooling: true
551
+ num_workers: 4
552
+ drop_last: true
553
+ pin_memory: true
554
+ prefetch_factor: 4
555
+ persistent_workers: false
556
+ timeout: 300
557
+ action_data: null
558
+ action_loader_rate: null
559
+ action_batch_interval: 1
560
+ restore_dataloader: true
561
+ fast_forward_batches: null
562
+ evaluators: []
563
+ eval_interval: 0
564
+ inf_evaluators: []
565
+ inf_eval_interval: 1000
566
+ eval_on_last_step: true
567
+ eval_on_load: false
568
+ eval_on: []
569
+ save_folder: /weka/oe-training-default/snehalj/synthmanip_checkpoints/molmoflow-rby1-door-expert-16node-03-05-23-17-56_bs_1024_dbs_8_step_100000_llmlr_1e-5
570
+ checkpointer_config:
571
+ save_thread_count: null
572
+ load_thread_count: null
573
+ pre_download: false
574
+ work_dir: null
575
+ throttle_uploads: false
576
+ canceled_check_interval: 50
577
+ save_interval: 4000
578
+ save_at: null
579
+ save_final_optim: false
580
+ save_num_checkpoints_to_keep: 3
581
+ checkpoint_retention_frequency: 10000
582
+ save_final_unsharded_checkpoint: false
583
+ save_interval_ephemeral: null
584
+ save_overwrite: true
585
+ load_path: null
586
+ reset_optimizer_state: false
587
+ reset_trainer_state: false
588
+ initial_model_checkpoint: /weka/oe-training-default/hqfang/molmo2_checkpoints/4b-cp/step2000-unsharded/
589
+ allow_resume: true
590
+ max_duration: 100000
591
+ global_train_batch_size: 1024
592
+ device_train_microbatch_size: 8
593
+ max_grad_norm: 1.0
594
+ multi_component_grad_norm: true
595
+ batch_divisor: global_batch
596
+ max_grad_norm_ratio: null
597
+ precision: amp_bf16
598
+ wandb:
599
+ project: whirl-molmoflow-rby1
600
+ entity: prior-ai2
601
+ group: null
602
+ name: molmoflow-rby1-door-expert-16node-03-05-23-17-56_bs_1024_dbs_8_step_100000_llmlr_1e-5
603
+ tags:
604
+ - watching
605
+ log_artifacts: false
606
+ rank_zero_only: true
607
+ log_interval: 20
608
+ allow_resume: true
609
+ finish_on_sigterm: true
610
+ beaker_log_interval: 50
611
+ speed_monitor:
612
+ window_size: 20
613
+ gpu_flops_available: null
614
+ console_log_interval: 20
615
+ enable_timing_logs: false
616
+ gen1_gc_interval: 1
617
+ compile:
618
+ mode: default
619
+ fullgraph: false
620
+ dynamic: false
621
+ backend: inductor
622
+ activation_checkpointing: true
623
+ fsdp:
624
+ fsdp2: true
625
+ precision: pure
626
+ use_orig_params: true
627
+ wrapping_strategy: null
628
+ sharding_strategy: FULL_SHARD
629
+ hybrid_sharding_num_model_replicas: null
630
+ softmax_auxiliary_loss: false
631
+ softmax_auxiliary_loss_scale: 0.0001
632
+ response_logits_only: true
633
+ saliency_score_loss_wt: null
634
+ frame_score_loss_wt: null
635
+ frame_score_loss_type: mse
636
+ frame_score_loss_target: 0.7
637
+ time_limit: null
638
+ extra_steps_after_cancel: 0
639
+ python_profiling: false
640
+ torch_profiling: false
641
+ stop_at: 100000
642
+ stop_after: null
643
+ fused_loss: false
644
+ compile_loss: true
645
+ runtime_data:
646
+ args: launch_scripts/train_synthmanip.py /weka/oe-training-default/hqfang/molmo2_checkpoints/4b-cp/step2000-unsharded/
647
+ --data_paths /weka/prior/datasets/robomolmo/feb12_franka_and_rby1/DoorOpeningDataGenConfig
648
+ /weka/prior/datasets/robomolmo/feb15_franka_and_rby1/DoorOpeningDataGenConfig
649
+ --no_val --dataset_sample_rates 1.0 1.0 --stats_path=/weka/prior/datasets/robomolmo/rby1_multitask_norm_stats.yaml
650
+ --action_preset RBY1_multitask --camera_preset RBY1_full_with_head_gopro --wandb.name=molmoflow-rby1-door-expert-16node-03-05-23-17-56_bs_1024_dbs_8_step_100000_llmlr_1e-5
651
+ --wandb.entity=prior-ai2 --wandb.project=whirl-molmoflow-rby1 --seq_len=1024 --max_duration=100000
652
+ --device_batch_size=8 --global_batch_size=1024 --log_interval=20 --model.mm_preprocessor.use_frame_special_tokens=True
653
+ --model.mm_preprocessor.max_subtitle_tokens=null --data.num_workers=4 --prefetch_factor=4
654
+ --save_interval=4000 --save_num_checkpoints_to_keep=3 --checkpoint_retention_frequency=10000
655
+ --save_folder=/weka/oe-training-default/snehalj/synthmanip_checkpoints/molmoflow-rby1-door-expert-16node-03-05-23-17-56_bs_1024_dbs_8_step_100000_llmlr_1e-5
656
+ --exp_name=molmoflow-rby1-door-expert-16node-03-05-23-17-56_bs_1024_dbs_8_step_100000_llmlr_1e-5
657
+ --data.packing=null --model.mm_preprocessor.image.max_images=5 --model.mm_preprocessor.image.crop_mode=resize
658
+ --model.mm_preprocessor.max_frames=1 --model.same_noise_per_time=False --model.num_flow_timestamps=8
659
+ --use_point_prompts --randomize_prompts --point_prompt_camera=head_camera --max_points_in_conditioning_frame=1
660
+ --conditioning_frame=random_first_10 --cameras_to_warp head_camera --img_aug --ft_llm=True
661
+ --scheduler.llm_t_warmup=2000 --optimizer.llm_learning_rate=1e-5
662
+ hostname: jupiter-cs-aus-121.reviz.ai2.in
663
+ date: 03/05/2026, 22:21
664
+ world_size: 128
665
+ resuming_from: null
666
+ beaker_experiment_id: 01KK018HKCWPW1677ZM8GQAYXG
667
+ beaker_experiment_url: https://beaker.org/ex/01KK018HKCWPW1677ZM8GQAYXG
668
+ wandb_id: kg1npwco
669
+ wandb_url: https://wandb.ai/prior-ai2/whirl-molmoflow-rby1/runs/kg1npwco
670
+ distributed_eval_enabled: false
671
+ distributed_eval_benchmark_path: /weka/oe/rohunt/robo-bench/FrankaPickandPlaceDroidBench_5ep_json_benchmark
672
+ distributed_eval_config_cls: launch_scripts.synthvla.configure_mujoco_thor:FrankaState8ClampConfig
673
+ distributed_eval_task_horizon: 300
674
+ distributed_eval_num_worker_jobs: 1
675
+ distributed_eval_wandb_project: mjthor-online-eval
676
+ distributed_eval_workspace: ai2/robo-molmo
677
+ distributed_eval_clusters:
678
+ - ai2/saturn
679
+ - ai2/neptune
680
+ - ai2/rhea
681
+ - ai2/ceres
682
+ distributed_eval_priority: high
683
+ distributed_eval_preemptible: true
model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a1ddcaff58a9a53c66d891f15030aa4296fc355531818dc312d8db096bdd34b
3
+ size 19992232602