JHeisler commited on
Commit
f2eaf0a
·
verified ·
1 Parent(s): e2cf1e0

Add Hydra source training config

Browse files
Files changed (1) hide show
  1. training_config_source.yaml +98 -0
training_config_source.yaml ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+ # REFERENCE COPY — canonical is in this workstream's local lerobot clone at:
3
+ # ./lerobot/lerobot/configs/policy/act_diffusion_aloha_solo_real.yaml
4
+
5
+ # Single-arm (LEFT) ALOHA — Hybrid ACT+Diffusion policy.
6
+ # ACT encoder (ResNet18 + transformer) → DDIM diffusion U-Net → action chunks.
7
+ # 2 cameras: cam_left_wrist + cam_high. state_dim=action_dim=9.
8
+ # DOE winner: batch=24, lr=3e-5 (2026-04-20)
9
+
10
+ seed: 1000
11
+ dataset_repo_id: JHeisler/aloha_solo_left_4_6_26
12
+
13
+ override_dataset_stats:
14
+ observation.images.cam_left_wrist:
15
+ mean: [[[0.485]], [[0.456]], [[0.406]]]
16
+ std: [[[0.229]], [[0.224]], [[0.225]]]
17
+ observation.images.cam_high:
18
+ mean: [[[0.485]], [[0.456]], [[0.406]]]
19
+ std: [[[0.229]], [[0.224]], [[0.225]]]
20
+
21
+ use_amp: true
22
+ use_torch_compile: true
23
+
24
+ training:
25
+ offline_steps: 40000
26
+ online_steps: 0
27
+ eval_freq: -1
28
+ save_freq: 10000
29
+ log_freq: 100
30
+ save_checkpoint: true
31
+
32
+ batch_size: 28
33
+ lr: 3.5e-5
34
+ lr_backbone: 3.5e-5
35
+ lr_warmup_steps: 500
36
+ drop_n_last_frames: 2
37
+ weight_decay: 1e-4
38
+ grad_clip_norm: 10
39
+ online_steps_between_rollouts: 1
40
+
41
+ delta_timestamps:
42
+ action: "[i / ${fps} for i in range(${policy.chunk_size})]"
43
+
44
+ eval:
45
+ n_episodes: 50
46
+ batch_size: 50
47
+
48
+ policy:
49
+ name: hybrid_act_diffusion
50
+
51
+ n_obs_steps: 1
52
+ chunk_size: 100
53
+ n_action_steps: 100
54
+
55
+ input_shapes:
56
+ observation.images.cam_left_wrist: [3, 480, 640]
57
+ observation.images.cam_high: [3, 480, 640]
58
+ observation.state: ["${env.state_dim}"]
59
+ output_shapes:
60
+ action: ["${env.action_dim}"]
61
+
62
+ input_normalization_modes:
63
+ observation.images.cam_left_wrist: mean_std
64
+ observation.images.cam_high: mean_std
65
+ observation.state: mean_std
66
+ output_normalization_modes:
67
+ action: mean_std
68
+
69
+ # ACT visual encoder
70
+ vision_backbone: resnet18
71
+ pretrained_backbone_weights: ResNet18_Weights.IMAGENET1K_V1
72
+ replace_final_stride_with_dilation: false
73
+ pre_norm: false
74
+ dim_model: 512
75
+ n_heads: 8
76
+ dim_feedforward: 3200
77
+ feedforward_activation: relu
78
+ n_encoder_layers: 4
79
+ dropout: 0.1
80
+
81
+ # Diffusion U-Net
82
+ down_dims: [256, 512]
83
+ kernel_size: 5
84
+ n_groups: 8
85
+ diffusion_step_embed_dim: 128
86
+ use_film_scale_modulation: true
87
+
88
+ # Noise scheduler
89
+ noise_scheduler_type: DDPM
90
+ num_train_timesteps: 100
91
+ beta_schedule: squaredcos_cap_v2
92
+ beta_start: 0.0001
93
+ beta_end: 0.02
94
+ prediction_type: epsilon
95
+ clip_sample: true
96
+ clip_sample_range: 1.0
97
+ num_inference_steps: 10
98
+ do_mask_loss_for_padding: true