lsnu commited on
Commit
ddc5223
·
verified ·
1 Parent(s): d5d49c1

Add files using upload-large-folder tool

Browse files
Files changed (39) hide show
  1. artifacts/data/reveal_proxy/proxy_train_clip224_v5_actionhist.pt +3 -0
  2. artifacts/data/reveal_proxy/proxy_train_smoke_v5_actionhist.pt +3 -0
  3. artifacts/data/reveal_proxy/proxy_train_v5_actionhist.pt +3 -0
  4. artifacts/data/reveal_proxy/proxy_val_clip224_v5_actionhist.pt +3 -0
  5. artifacts/data/reveal_proxy/proxy_val_smoke_v5_actionhist.pt +3 -0
  6. artifacts/data/reveal_proxy/proxy_val_v5_actionhist.pt +3 -0
  7. artifacts/outputs/interaction_debug/ablation_no_interaction_head_actionhist/reveal_benchmark.json +15 -0
  8. artifacts/outputs/interaction_debug/ablation_no_interaction_head_actionhist/reveal_benchmark.md +13 -0
  9. artifacts/outputs/interaction_debug/ablation_no_world_model_actionhist/reveal_benchmark.json +15 -0
  10. artifacts/outputs/interaction_debug/ablation_none_actionhist/reveal_benchmark.json +15 -0
  11. artifacts/outputs/interaction_debug/ablation_none_actionhist/reveal_benchmark.md +13 -0
  12. artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/checkpoint_best.pt +3 -0
  13. artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/config_resolved.yaml +123 -0
  14. artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/diagnostics/proxy_diagnostics.json +7 -0
  15. artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/metrics.json +432 -0
  16. artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist_smoke/checkpoint_best.pt +3 -0
  17. artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist_smoke/config_resolved.yaml +123 -0
  18. artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/checkpoint_best.pt +3 -0
  19. artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/checkpoint_best.pt +3 -0
  20. artifacts/outputs/interaction_debug/reveal_eval_commit8_compare/reveal_benchmark.json +41 -0
  21. artifacts/outputs/interaction_debug/reveal_eval_commit8_compare/reveal_benchmark.md +37 -0
  22. artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_commit4/reveal_benchmark.json +15 -0
  23. artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_commit4/reveal_benchmark.md +13 -0
  24. artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_smoke/reveal_benchmark.json +15 -0
  25. artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_smoke/reveal_benchmark.md +13 -0
  26. artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_smoke_commit4_short/reveal_benchmark.json +15 -0
  27. artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_smoke_commit4_short/reveal_benchmark.md +13 -0
  28. artifacts/outputs/interaction_debug/reveal_eval_interaction_rolefix_full_commit4/reveal_benchmark.json +15 -0
  29. artifacts/outputs/interaction_debug/reveal_eval_interaction_rolefix_full_commit4/reveal_benchmark.md +13 -0
  30. artifacts/outputs/interaction_debug/reveal_eval_interaction_rolefix_smoke_commit4_short/reveal_benchmark.json +15 -0
  31. artifacts/outputs/interaction_debug/reveal_eval_interaction_rolefix_smoke_commit4_short/reveal_benchmark.md +13 -0
  32. artifacts/outputs/interaction_debug/reveal_eval_old_no_leak_baselines_commit4/reveal_benchmark.json +28 -0
  33. artifacts/outputs/interaction_debug/reveal_eval_old_no_leak_baselines_commit4/reveal_benchmark.md +25 -0
  34. artifacts/outputs/interaction_debug/rlbench_open_drawer_rollout_eval_commit4_clipped/rollout_eval.json +23 -0
  35. artifacts/outputs/interaction_debug/rlbench_open_drawer_rollout_eval_commit4_clipped/rollout_eval.md +11 -0
  36. artifacts/outputs/interaction_debug/rlbench_open_drawer_rollout_eval_commit4_recovered/rollout_eval.json +31 -0
  37. artifacts/outputs/interaction_debug/rlbench_open_drawer_rollout_eval_commit4_recovered/rollout_eval.md +11 -0
  38. artifacts/outputs/interaction_debug/rlbench_open_drawer_rollout_eval_commit4_rerun/rollout_eval.json +23 -0
  39. artifacts/outputs/interaction_debug/rlbench_open_drawer_rollout_eval_commit4_rerun/rollout_eval.md +11 -0
artifacts/data/reveal_proxy/proxy_train_clip224_v5_actionhist.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec73ee20b3f68ad49efbeb571f779a3554a657d75270fd2398f8ddd78c53524e
3
+ size 13366472
artifacts/data/reveal_proxy/proxy_train_smoke_v5_actionhist.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:208036101fb18e4c55e015d89599343370f883f0da87e1f63880ea389a4005da
3
+ size 822400
artifacts/data/reveal_proxy/proxy_train_v5_actionhist.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86bedaa237f1d35268462d39491f4d223cf3b0b0e9c1d2b9c0d6151a6124854b
3
+ size 13294184
artifacts/data/reveal_proxy/proxy_val_clip224_v5_actionhist.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e10909f7dd6c30f1c74f7d740b26b3202ad484f1219aa1dee8d23310bfb265b9
3
+ size 4408000
artifacts/data/reveal_proxy/proxy_val_smoke_v5_actionhist.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e93133295b1914e188baabd36e48adb5264c70fd5c27be202f339807608bd62
3
+ size 303736
artifacts/data/reveal_proxy/proxy_val_v5_actionhist.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cc98e332deb8194e0539aec72d1dead90b778b50f8021e890bb392b26bb6fa1
3
+ size 4581536
artifacts/outputs/interaction_debug/ablation_no_interaction_head_actionhist/reveal_benchmark.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "interaction": {
3
+ "per_task_success": {
4
+ "foliage_proxy": 0.16666666666666666,
5
+ "bag_proxy": 0.5,
6
+ "cloth_proxy": 0.5
7
+ },
8
+ "mean_success": 0.38888888888888884,
9
+ "visibility_integral": 42.193298303418686,
10
+ "corridor_availability": 0.9207814501391517,
11
+ "reocclusion_rate": 0.016840277777777777,
12
+ "persistence_horizon_mae": 0.0,
13
+ "disturbance_cost": 0.5719093395810988
14
+ }
15
+ }
artifacts/outputs/interaction_debug/ablation_no_interaction_head_actionhist/reveal_benchmark.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## interaction
4
+ - checkpoint: /workspace/VLAarchtests/artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/checkpoint_best.pt
5
+ - mean_success: 0.389
6
+ - visibility_integral: 42.193
7
+ - corridor_availability: 0.921
8
+ - reocclusion_rate: 0.017
9
+ - persistence_horizon_mae: 0.000
10
+ - disturbance_cost: 0.572
11
+ - foliage_proxy_success: 0.167
12
+ - bag_proxy_success: 0.500
13
+ - cloth_proxy_success: 0.500
artifacts/outputs/interaction_debug/ablation_no_world_model_actionhist/reveal_benchmark.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "interaction": {
3
+ "per_task_success": {
4
+ "foliage_proxy": 0.4166666666666667,
5
+ "bag_proxy": 0.5416666666666666,
6
+ "cloth_proxy": 0.625
7
+ },
8
+ "mean_success": 0.5277777777777778,
9
+ "visibility_integral": 32.94181125528283,
10
+ "corridor_availability": 0.8710797395971086,
11
+ "reocclusion_rate": 0.003125,
12
+ "persistence_horizon_mae": 1.1577362408331497,
13
+ "disturbance_cost": 0.42711537962572443
14
+ }
15
+ }
artifacts/outputs/interaction_debug/ablation_none_actionhist/reveal_benchmark.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "interaction": {
3
+ "per_task_success": {
4
+ "foliage_proxy": 0.4166666666666667,
5
+ "bag_proxy": 0.5416666666666666,
6
+ "cloth_proxy": 0.625
7
+ },
8
+ "mean_success": 0.5277777777777778,
9
+ "visibility_integral": 32.84789120488696,
10
+ "corridor_availability": 0.8711970953477753,
11
+ "reocclusion_rate": 0.003125,
12
+ "persistence_horizon_mae": 1.1544888946683267,
13
+ "disturbance_cost": 0.4288607043110662
14
+ }
15
+ }
artifacts/outputs/interaction_debug/ablation_none_actionhist/reveal_benchmark.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## interaction
4
+ - checkpoint: /workspace/VLAarchtests/artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/checkpoint_best.pt
5
+ - mean_success: 0.528
6
+ - visibility_integral: 32.848
7
+ - corridor_availability: 0.871
8
+ - reocclusion_rate: 0.003
9
+ - persistence_horizon_mae: 1.154
10
+ - disturbance_cost: 0.429
11
+ - foliage_proxy_success: 0.417
12
+ - bag_proxy_success: 0.542
13
+ - cloth_proxy_success: 0.625
artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/checkpoint_best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34a4ab95f18c0b0b1f6b3dee49341ac2cf46b05e8f3cfe8a9c4cefeb948bb495
3
+ size 14262770
artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/config_resolved.yaml ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: proxy_interaction_state_actionhist
2
+ output_dir: /workspace/VLAarchtests/artifacts/outputs/interaction_debug
3
+ device: cuda
4
+ seed: 13
5
+ data:
6
+ proxies:
7
+ - foliage_proxy
8
+ - bag_proxy
9
+ - cloth_proxy
10
+ resolution: 96
11
+ train_episodes_per_proxy: 48
12
+ val_episodes_per_proxy: 16
13
+ train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_v5_actionhist.pt
14
+ val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_v5_actionhist.pt
15
+ rebuild_dataset: true
16
+ chunk_horizon: 8
17
+ rollout_horizon: 5
18
+ history_steps: 6
19
+ planner_candidates: 8
20
+ seed: 13
21
+ optim:
22
+ epochs: 10
23
+ batch_size: 16
24
+ num_workers: 0
25
+ lr: 0.001
26
+ weight_decay: 0.0001
27
+ trainer:
28
+ policy_type: interaction_state
29
+ use_bf16: true
30
+ grad_clip_norm: 1.0
31
+ freeze_backbone: true
32
+ gradient_checkpointing: false
33
+ plan_during_train: true
34
+ plan_during_eval: true
35
+ support_mode_conditioning: true
36
+ planner_mode: trainable
37
+ policy:
38
+ backbone:
39
+ model_name: openai/clip-vit-base-patch32
40
+ hidden_dim: 128
41
+ max_text_tokens: 32
42
+ freeze_backbone: true
43
+ gradient_checkpointing: false
44
+ use_dummy_backbone: true
45
+ fusion:
46
+ hidden_dim: 128
47
+ num_cameras: 3
48
+ num_layers: 2
49
+ num_heads: 4
50
+ ff_dim: 256
51
+ dropout: 0.1
52
+ proprio_dim: 32
53
+ proprio_tokens: 1
54
+ memory:
55
+ hidden_dim: 128
56
+ action_dim: 14
57
+ history_steps: 6
58
+ num_layers: 2
59
+ dropout: 0.1
60
+ memory_bank_size: 4
61
+ num_heads: 4
62
+ max_history_steps: 8
63
+ decoder:
64
+ hidden_dim: 128
65
+ num_heads: 4
66
+ num_layers: 2
67
+ ff_dim: 256
68
+ dropout: 0.1
69
+ chunk_size: 8
70
+ action_dim: 14
71
+ arm_action_dim: 7
72
+ num_candidates: 8
73
+ num_phases: 5
74
+ num_arm_roles: 4
75
+ reveal_head:
76
+ hidden_dim: 128
77
+ num_support_modes: 3
78
+ num_approach_templates: 32
79
+ rollout_horizon: 5
80
+ belief_map_size: 32
81
+ field_size: 16
82
+ num_heads: 4
83
+ predict_belief_map: true
84
+ num_phases: 5
85
+ num_arm_roles: 4
86
+ num_interaction_tokens: 8
87
+ world_model:
88
+ hidden_dim: 128
89
+ action_dim: 14
90
+ num_support_modes: 3
91
+ num_approach_templates: 32
92
+ rollout_horizon: 5
93
+ field_size: 16
94
+ num_heads: 4
95
+ num_phases: 5
96
+ num_arm_roles: 4
97
+ num_interaction_tokens: 8
98
+ planner:
99
+ hidden_dim: 128
100
+ num_candidates: 8
101
+ action_dim: 14
102
+ num_support_modes: 3
103
+ utility_margin: 0.1
104
+ num_heads: 4
105
+ num_layers: 2
106
+ num_phases: 5
107
+ num_arm_roles: 4
108
+ loss_weights:
109
+ action: 1.0
110
+ phase: 0.15
111
+ arm_role: 0.2
112
+ support_mode: 0.15
113
+ corridor: 0.2
114
+ persistence: 0.1
115
+ disturbance: 0.1
116
+ world_model: 0.25
117
+ belief: 0.05
118
+ planner_success: 0.2
119
+ planner_risk: 0.1
120
+ planner_ranking: 0.1
121
+ proposal_reconstruction: 0.2
122
+ proposal_success: 0.1
123
+ proposal_ranking: 0.1
artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/diagnostics/proxy_diagnostics.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "planner_top1_accuracy": 0.1984732824427481,
3
+ "planner_regret": 0.07150506228208542,
4
+ "risk_calibration_mse": 0.009851997718214989,
5
+ "role_collapse_rate": 0.0,
6
+ "num_samples": 131
7
+ }
artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/metrics.json ADDED
@@ -0,0 +1,432 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "epoch": 0,
4
+ "train": {
5
+ "action": 0.10911335345978539,
6
+ "arm_role": 0.16803828751047453,
7
+ "belief": 0.19615339611967406,
8
+ "corridor": 0.3072426902751128,
9
+ "disturbance": 0.022138183771555003,
10
+ "persistence": 4.085065553585689,
11
+ "phase": 0.6928596496582031,
12
+ "planner_ranking": 0.13037589440743128,
13
+ "planner_risk": 0.02288868693479647,
14
+ "planner_success": 0.6770538414518038,
15
+ "proposal_ranking": 0.11255756175766389,
16
+ "proposal_reconstruction": 0.18574035447090864,
17
+ "proposal_success": 0.6725843846797943,
18
+ "reocclusion": 0.5957262838880221,
19
+ "support_mode": 0.5480141110407809,
20
+ "total": 1.8947660674651463,
21
+ "uncertainty": 0.21132264643286666,
22
+ "world_model": 3.2140407264232635
23
+ },
24
+ "val": {
25
+ "action": 0.03260375331673357,
26
+ "arm_role": 0.00019734147483379475,
27
+ "belief": 0.12217340038882361,
28
+ "corridor": 0.2606741752889421,
29
+ "disturbance": 0.0038526968952889242,
30
+ "persistence": 4.259798420800103,
31
+ "phase": 0.4765485127766927,
32
+ "planner_ranking": 0.09453996519247691,
33
+ "planner_risk": 0.014876836703883277,
34
+ "planner_success": 0.7184285786416795,
35
+ "proposal_ranking": 0.09570289817121294,
36
+ "proposal_reconstruction": 0.08512727750672235,
37
+ "proposal_success": 0.6778848502371047,
38
+ "reocclusion": 0.30221917231877643,
39
+ "support_mode": 0.02004129211935732,
40
+ "total": 1.6661332448323567,
41
+ "uncertainty": 0.08814782814847098,
42
+ "world_model": 3.2738216982947455
43
+ }
44
+ },
45
+ {
46
+ "epoch": 1,
47
+ "train": {
48
+ "action": 0.027130307474484045,
49
+ "arm_role": 8.922153043992391e-05,
50
+ "belief": 0.11714147465924422,
51
+ "corridor": 0.24812329125901064,
52
+ "disturbance": 0.0032305579976916,
53
+ "persistence": 4.067985733350118,
54
+ "phase": 0.4347850941121578,
55
+ "planner_ranking": 0.10891644563525915,
56
+ "planner_risk": 0.014627174474298954,
57
+ "planner_success": 0.5990801006555557,
58
+ "proposal_ranking": 0.09713093843311071,
59
+ "proposal_reconstruction": 0.080568744490544,
60
+ "proposal_success": 0.6738408307234446,
61
+ "reocclusion": 0.30479869805276394,
62
+ "support_mode": 0.00354524122303701,
63
+ "total": 1.5207980622847874,
64
+ "uncertainty": 0.043511384554828204,
65
+ "world_model": 2.9335382282733917
66
+ },
67
+ "val": {
68
+ "action": 0.020881797497471172,
69
+ "arm_role": 4.6685139548369785e-05,
70
+ "belief": 0.11459854990243912,
71
+ "corridor": 0.25242144697242314,
72
+ "disturbance": 0.0016878885928437943,
73
+ "persistence": 3.9848385651906333,
74
+ "phase": 0.5357780622111427,
75
+ "planner_ranking": 0.12248502175013225,
76
+ "planner_risk": 0.014007404032680724,
77
+ "planner_success": 0.5618854893578423,
78
+ "proposal_ranking": 0.08806857880618837,
79
+ "proposal_reconstruction": 0.07101386454370287,
80
+ "proposal_success": 0.6761446727646722,
81
+ "reocclusion": 0.2807581913140085,
82
+ "support_mode": 0.00033569296485640935,
83
+ "total": 1.5498341586854723,
84
+ "uncertainty": 0.03854438621136877,
85
+ "world_model": 3.084032244152493
86
+ }
87
+ },
88
+ {
89
+ "epoch": 2,
90
+ "train": {
91
+ "action": 0.021659845563893516,
92
+ "arm_role": 4.9689525894791586e-05,
93
+ "belief": 0.11830708974351485,
94
+ "corridor": 0.2507151396324237,
95
+ "disturbance": 0.004413674871708888,
96
+ "persistence": 2.643252914150556,
97
+ "phase": 0.43942634264628094,
98
+ "planner_ranking": 0.10935842959831159,
99
+ "planner_risk": 0.014072448286848763,
100
+ "planner_success": 0.5583219341933727,
101
+ "proposal_ranking": 0.09152727667242289,
102
+ "proposal_reconstruction": 0.0724022975191474,
103
+ "proposal_success": 0.6700933004419009,
104
+ "reocclusion": 0.31216587871313095,
105
+ "support_mode": 0.000645008350450856,
106
+ "total": 1.2870350579420726,
107
+ "uncertainty": 0.028561103138296556,
108
+ "world_model": 2.6294024040301642
109
+ },
110
+ "val": {
111
+ "action": 0.02809068684776624,
112
+ "arm_role": 5.452130871061753e-05,
113
+ "belief": 0.10888734211524327,
114
+ "corridor": 0.23266181018617418,
115
+ "disturbance": 0.0027104780440115267,
116
+ "persistence": 2.1311826043658786,
117
+ "phase": 0.48636723723676467,
118
+ "planner_ranking": 0.08748033228847715,
119
+ "planner_risk": 0.013984487702449163,
120
+ "planner_success": 0.5531185136901008,
121
+ "proposal_ranking": 0.08530248867140876,
122
+ "proposal_reconstruction": 0.07563025090429518,
123
+ "proposal_success": 0.6710293292999268,
124
+ "reocclusion": 0.31506533589628005,
125
+ "support_mode": 0.0013758842574639453,
126
+ "total": 1.145080222023858,
127
+ "uncertainty": 0.013753527258005407,
128
+ "world_model": 2.241931358973185
129
+ }
130
+ },
131
+ {
132
+ "epoch": 3,
133
+ "train": {
134
+ "action": 0.02241998988514145,
135
+ "arm_role": 7.662544400470021e-05,
136
+ "belief": 0.1198031771928072,
137
+ "corridor": 0.23061849611500898,
138
+ "disturbance": 0.008813309230996916,
139
+ "persistence": 1.791534848511219,
140
+ "phase": 0.4151729643344879,
141
+ "planner_ranking": 0.10807953278223674,
142
+ "planner_risk": 0.013897615058037141,
143
+ "planner_success": 0.5403684402505556,
144
+ "proposal_ranking": 0.09165094265093406,
145
+ "proposal_reconstruction": 0.07184119118998449,
146
+ "proposal_success": 0.6651807849605879,
147
+ "reocclusion": 0.27597781488051015,
148
+ "support_mode": 0.0006089698921035355,
149
+ "total": 1.035346123079459,
150
+ "uncertainty": 0.02133664352974544,
151
+ "world_model": 2.009356458981832
152
+ },
153
+ "val": {
154
+ "action": 0.024748968995279737,
155
+ "arm_role": 0.00046029978289475874,
156
+ "belief": 0.1361072982350985,
157
+ "corridor": 0.2555284963713752,
158
+ "disturbance": 0.01113364020259016,
159
+ "persistence": 1.8078472415606182,
160
+ "phase": 0.49851488404803806,
161
+ "planner_ranking": 0.09051681806643803,
162
+ "planner_risk": 0.012982964619166322,
163
+ "planner_success": 0.5361581941445669,
164
+ "proposal_ranking": 0.08841735952430302,
165
+ "proposal_reconstruction": 0.07644655058781306,
166
+ "proposal_success": 0.6653637025091383,
167
+ "reocclusion": 0.3103545730312665,
168
+ "support_mode": 0.0022364634981689355,
169
+ "total": 1.2436118059688144,
170
+ "uncertainty": 0.044811665597889155,
171
+ "world_model": 2.755778524610731
172
+ }
173
+ },
174
+ {
175
+ "epoch": 4,
176
+ "train": {
177
+ "action": 0.030993331456556916,
178
+ "arm_role": 8.697735347595881e-05,
179
+ "belief": 0.11502061566958825,
180
+ "corridor": 0.2168644548704227,
181
+ "disturbance": 0.005981470370898023,
182
+ "persistence": 1.325400508319338,
183
+ "phase": 0.4057002601524194,
184
+ "planner_ranking": 0.10196248659243186,
185
+ "planner_risk": 0.013080424706762036,
186
+ "planner_success": 0.5429841242730618,
187
+ "proposal_ranking": 0.09320679493248463,
188
+ "proposal_reconstruction": 0.08633392583578825,
189
+ "proposal_success": 0.6598697329560915,
190
+ "reocclusion": 0.23775681791206202,
191
+ "support_mode": 0.0005070095260938009,
192
+ "total": 0.9538104037443796,
193
+ "uncertainty": 0.01720601328027745,
194
+ "world_model": 1.8480148315429688
195
+ },
196
+ "val": {
197
+ "action": 0.02140410254812903,
198
+ "arm_role": 5.1328555046994654e-05,
199
+ "belief": 0.10880060328377618,
200
+ "corridor": 0.21354658570554522,
201
+ "disturbance": 0.0037805813287074366,
202
+ "persistence": 1.2975979381137424,
203
+ "phase": 0.47454749047756195,
204
+ "planner_ranking": 0.08853000981940164,
205
+ "planner_risk": 0.011068900529709127,
206
+ "planner_success": 0.5381979445616404,
207
+ "proposal_ranking": 0.09127837005588743,
208
+ "proposal_reconstruction": 0.07022183057334688,
209
+ "proposal_success": 0.6659399072329203,
210
+ "reocclusion": 0.19952762044138378,
211
+ "support_mode": 0.00027834434553773864,
212
+ "total": 0.952674925327301,
213
+ "uncertainty": 0.01160443677670426,
214
+ "world_model": 1.8811089727613661
215
+ }
216
+ },
217
+ {
218
+ "epoch": 5,
219
+ "train": {
220
+ "action": 0.02381352987140417,
221
+ "arm_role": 6.333578160896043e-05,
222
+ "belief": 0.1099607174595197,
223
+ "corridor": 0.21310261078178883,
224
+ "disturbance": 0.002459915580402594,
225
+ "persistence": 1.6992873040338357,
226
+ "phase": 0.4683965767423312,
227
+ "planner_ranking": 0.10114171418050925,
228
+ "planner_risk": 0.011117635760456324,
229
+ "planner_success": 0.5304318008323511,
230
+ "proposal_ranking": 0.09141425788402557,
231
+ "proposal_reconstruction": 0.07442743517458439,
232
+ "proposal_success": 0.6563858091831207,
233
+ "reocclusion": 0.331877409790953,
234
+ "support_mode": 0.3825794731577237,
235
+ "total": 0.9921379586060842,
236
+ "uncertainty": 0.010878979364254823,
237
+ "world_model": 1.6345916986465454
238
+ },
239
+ "val": {
240
+ "action": 0.02524013713830047,
241
+ "arm_role": 9.413575834413577e-05,
242
+ "belief": 0.12235430793629752,
243
+ "corridor": 0.249200153681967,
244
+ "disturbance": 0.004427346711357434,
245
+ "persistence": 3.935940318637424,
246
+ "phase": 0.6591929793357849,
247
+ "planner_ranking": 0.09305098156134288,
248
+ "planner_risk": 0.010506668748954931,
249
+ "planner_success": 0.5443058278825548,
250
+ "proposal_ranking": 0.08850305030743282,
251
+ "proposal_reconstruction": 0.07236838009622362,
252
+ "proposal_success": 0.6759181155098809,
253
+ "reocclusion": 0.7287290294965109,
254
+ "support_mode": 1.1460433138741388,
255
+ "total": 1.4354194932513766,
256
+ "uncertainty": 0.024242303644617397,
257
+ "world_model": 1.8577234480116103
258
+ }
259
+ },
260
+ {
261
+ "epoch": 6,
262
+ "train": {
263
+ "action": 0.02330849994905293,
264
+ "arm_role": 0.00015805537501970926,
265
+ "belief": 0.1124286272873481,
266
+ "corridor": 0.23743322926263014,
267
+ "disturbance": 0.0031262978930802396,
268
+ "persistence": 3.8017045756181083,
269
+ "phase": 0.6851730346679688,
270
+ "planner_ranking": 0.09221760835498571,
271
+ "planner_risk": 0.01065190702987214,
272
+ "planner_success": 0.52338948721687,
273
+ "proposal_ranking": 0.09460213221609592,
274
+ "proposal_reconstruction": 0.0690260889629523,
275
+ "proposal_success": 0.6765442565083504,
276
+ "reocclusion": 0.6601327558358511,
277
+ "support_mode": 0.7531089782714844,
278
+ "total": 1.3313229431708653,
279
+ "uncertainty": 0.016478817832345765,
280
+ "world_model": 1.75758895277977
281
+ },
282
+ "val": {
283
+ "action": 0.023909604797760647,
284
+ "arm_role": 0.000182229990362733,
285
+ "belief": 0.10623268286387126,
286
+ "corridor": 0.2390136702193154,
287
+ "disturbance": 0.0034688233410836095,
288
+ "persistence": 3.879435486263699,
289
+ "phase": 0.6545324060651991,
290
+ "planner_ranking": 0.09108450180954403,
291
+ "planner_risk": 0.00950968601844377,
292
+ "planner_success": 0.5444046192699008,
293
+ "proposal_ranking": 0.09340034839179781,
294
+ "proposal_reconstruction": 0.06825375143024656,
295
+ "proposal_success": 0.6806784537103441,
296
+ "reocclusion": 0.6431198517481486,
297
+ "support_mode": 0.6673565440707736,
298
+ "total": 1.3279486762152777,
299
+ "uncertainty": 0.008409220777038071,
300
+ "world_model": 1.7654762268066406
301
+ }
302
+ },
303
+ {
304
+ "epoch": 7,
305
+ "train": {
306
+ "action": 0.0222570748689274,
307
+ "arm_role": 0.00010212738478306467,
308
+ "belief": 0.11000798580547173,
309
+ "corridor": 0.23306679725646973,
310
+ "disturbance": 0.0033048741412737095,
311
+ "persistence": 3.4312864542007446,
312
+ "phase": 0.6377454921603203,
313
+ "planner_ranking": 0.09635432902723551,
314
+ "planner_risk": 0.011387234243253866,
315
+ "planner_success": 0.5442424913247427,
316
+ "proposal_ranking": 0.08986945698658626,
317
+ "proposal_reconstruction": 0.06708647248645623,
318
+ "proposal_success": 0.673826314508915,
319
+ "reocclusion": 0.5967350701491038,
320
+ "support_mode": 0.6117285092671713,
321
+ "total": 1.236641672750314,
322
+ "uncertainty": 0.01299802268234392,
323
+ "world_model": 1.639583835999171
324
+ },
325
+ "val": {
326
+ "action": 0.022002756595611572,
327
+ "arm_role": 2.9528787207608628e-05,
328
+ "belief": 0.12314293947484758,
329
+ "corridor": 0.24459290835592482,
330
+ "disturbance": 0.004033175555782186,
331
+ "persistence": 2.5022888514730663,
332
+ "phase": 0.5651864541901482,
333
+ "planner_ranking": 0.08950987623797522,
334
+ "planner_risk": 0.009576339947266711,
335
+ "planner_success": 0.5564733445644379,
336
+ "proposal_ranking": 0.08564452992545234,
337
+ "proposal_reconstruction": 0.0668929773900244,
338
+ "proposal_success": 0.6727176573541429,
339
+ "reocclusion": 0.44570984774165684,
340
+ "support_mode": 0.46422717306349015,
341
+ "total": 1.1864347391658359,
342
+ "uncertainty": 0.029489829101496272,
343
+ "world_model": 1.938715272479587
344
+ }
345
+ },
346
+ {
347
+ "epoch": 8,
348
+ "train": {
349
+ "action": 0.019501756915512185,
350
+ "arm_role": 4.518414956085811e-05,
351
+ "belief": 0.11856034491211176,
352
+ "corridor": 0.22101152688264847,
353
+ "disturbance": 0.005134509168177222,
354
+ "persistence": 2.0068868373831115,
355
+ "phase": 0.48705990488330525,
356
+ "planner_ranking": 0.09284681857873996,
357
+ "planner_risk": 0.01043116363386313,
358
+ "planner_success": 0.5206327053407828,
359
+ "proposal_ranking": 0.08962376570949952,
360
+ "proposal_reconstruction": 0.06520200536275904,
361
+ "proposal_success": 0.6658588672677676,
362
+ "reocclusion": 0.3816298767924309,
363
+ "support_mode": 0.28521304662960273,
364
+ "total": 0.9868118191758791,
365
+ "uncertainty": 0.015137382831502086,
366
+ "world_model": 1.557202582557996
367
+ },
368
+ "val": {
369
+ "action": 0.01789842639118433,
370
+ "arm_role": 4.689901551399897e-05,
371
+ "belief": 0.11476549340618981,
372
+ "corridor": 0.20894784231980643,
373
+ "disturbance": 0.006007373902118868,
374
+ "persistence": 1.3287181854248047,
375
+ "phase": 0.4533460769388411,
376
+ "planner_ranking": 0.09486487342251672,
377
+ "planner_risk": 0.009565845442314943,
378
+ "planner_success": 0.5391222304768033,
379
+ "proposal_ranking": 0.09564895927906036,
380
+ "proposal_reconstruction": 0.06366962691148122,
381
+ "proposal_success": 0.6531099279721578,
382
+ "reocclusion": 0.20913477883570725,
383
+ "support_mode": 0.020122037941796914,
384
+ "total": 0.8832472761472067,
385
+ "uncertainty": 0.004256232098365824,
386
+ "world_model": 1.6128649711608887
387
+ }
388
+ },
389
+ {
390
+ "epoch": 9,
391
+ "train": {
392
+ "action": 0.016334226393761735,
393
+ "arm_role": 6.246064898126254e-05,
394
+ "belief": 0.10857280095418294,
395
+ "corridor": 0.19824294683833918,
396
+ "disturbance": 0.003975103861497094,
397
+ "persistence": 1.1797814977665741,
398
+ "phase": 0.40182892481486004,
399
+ "planner_ranking": 0.09200050433476765,
400
+ "planner_risk": 0.009777653632530322,
401
+ "planner_success": 0.4811691368619601,
402
+ "proposal_ranking": 0.09789401820550363,
403
+ "proposal_reconstruction": 0.06257841860254605,
404
+ "proposal_success": 0.6470200767119726,
405
+ "reocclusion": 0.24410315960024795,
406
+ "support_mode": 0.004229606854399511,
407
+ "total": 0.8042063365379969,
408
+ "uncertainty": 0.004445769324471864,
409
+ "world_model": 1.4606107970078785
410
+ },
411
+ "val": {
412
+ "action": 0.016275615017447207,
413
+ "arm_role": 6.341466885512798e-05,
414
+ "belief": 0.10352056639062034,
415
+ "corridor": 0.19860318468676674,
416
+ "disturbance": 0.0032389340146134296,
417
+ "persistence": 1.292702876859241,
418
+ "phase": 0.44613948629962075,
419
+ "planner_ranking": 0.09405012097623613,
420
+ "planner_risk": 0.00965356407687068,
421
+ "planner_success": 0.5550677445199754,
422
+ "proposal_ranking": 0.09946481055683559,
423
+ "proposal_reconstruction": 0.062025451825724706,
424
+ "proposal_success": 0.637408435344696,
425
+ "reocclusion": 0.1942617915984657,
426
+ "support_mode": 0.0015144004525306325,
427
+ "total": 0.8655187752511766,
428
+ "uncertainty": 0.002816161386565202,
429
+ "world_model": 1.5848071972529094
430
+ }
431
+ }
432
+ ]
artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist_smoke/checkpoint_best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54df000293b3795beb34e6696ed2e18664ea87df6aaf81cf39ec9b23c589580a
3
+ size 3810162
artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist_smoke/config_resolved.yaml ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: proxy_interaction_state_actionhist_smoke
2
+ output_dir: /workspace/VLAarchtests/artifacts/outputs/interaction_debug
3
+ device: cuda
4
+ seed: 13
5
+ data:
6
+ proxies:
7
+ - foliage_proxy
8
+ - bag_proxy
9
+ - cloth_proxy
10
+ resolution: 64
11
+ train_episodes_per_proxy: 6
12
+ val_episodes_per_proxy: 2
13
+ train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_smoke_v5_actionhist.pt
14
+ val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_smoke_v5_actionhist.pt
15
+ rebuild_dataset: true
16
+ chunk_horizon: 4
17
+ rollout_horizon: 3
18
+ history_steps: 6
19
+ planner_candidates: 4
20
+ seed: 13
21
+ optim:
22
+ epochs: 4
23
+ batch_size: 8
24
+ num_workers: 0
25
+ lr: 0.001
26
+ weight_decay: 0.0001
27
+ trainer:
28
+ policy_type: interaction_state
29
+ use_bf16: true
30
+ grad_clip_norm: 1.0
31
+ freeze_backbone: true
32
+ gradient_checkpointing: false
33
+ plan_during_train: true
34
+ plan_during_eval: true
35
+ support_mode_conditioning: true
36
+ planner_mode: trainable
37
+ policy:
38
+ backbone:
39
+ model_name: openai/clip-vit-base-patch32
40
+ hidden_dim: 64
41
+ max_text_tokens: 32
42
+ freeze_backbone: true
43
+ gradient_checkpointing: false
44
+ use_dummy_backbone: true
45
+ fusion:
46
+ hidden_dim: 64
47
+ num_cameras: 3
48
+ num_layers: 2
49
+ num_heads: 4
50
+ ff_dim: 128
51
+ dropout: 0.1
52
+ proprio_dim: 32
53
+ proprio_tokens: 1
54
+ memory:
55
+ hidden_dim: 64
56
+ action_dim: 14
57
+ history_steps: 6
58
+ num_layers: 2
59
+ dropout: 0.1
60
+ memory_bank_size: 4
61
+ num_heads: 4
62
+ max_history_steps: 8
63
+ decoder:
64
+ hidden_dim: 64
65
+ num_heads: 4
66
+ num_layers: 2
67
+ ff_dim: 128
68
+ dropout: 0.1
69
+ chunk_size: 4
70
+ action_dim: 14
71
+ arm_action_dim: 7
72
+ num_candidates: 4
73
+ num_phases: 5
74
+ num_arm_roles: 4
75
+ reveal_head:
76
+ hidden_dim: 64
77
+ num_support_modes: 3
78
+ num_approach_templates: 32
79
+ rollout_horizon: 3
80
+ belief_map_size: 32
81
+ field_size: 16
82
+ num_heads: 4
83
+ predict_belief_map: true
84
+ num_phases: 5
85
+ num_arm_roles: 4
86
+ num_interaction_tokens: 8
87
+ world_model:
88
+ hidden_dim: 64
89
+ action_dim: 14
90
+ num_support_modes: 3
91
+ num_approach_templates: 32
92
+ rollout_horizon: 3
93
+ field_size: 16
94
+ num_heads: 4
95
+ num_phases: 5
96
+ num_arm_roles: 4
97
+ num_interaction_tokens: 8
98
+ planner:
99
+ hidden_dim: 64
100
+ num_candidates: 4
101
+ action_dim: 14
102
+ num_support_modes: 3
103
+ utility_margin: 0.1
104
+ num_heads: 4
105
+ num_layers: 2
106
+ num_phases: 5
107
+ num_arm_roles: 4
108
+ loss_weights:
109
+ action: 1.0
110
+ phase: 0.15
111
+ arm_role: 0.2
112
+ support_mode: 0.15
113
+ corridor: 0.2
114
+ persistence: 0.1
115
+ disturbance: 0.1
116
+ world_model: 0.25
117
+ belief: 0.05
118
+ planner_success: 0.2
119
+ planner_risk: 0.1
120
+ planner_ranking: 0.1
121
+ proposal_reconstruction: 0.2
122
+ proposal_success: 0.1
123
+ proposal_ranking: 0.1
artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/checkpoint_best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e4adeddd399c964ef4536e44b963d3e061fe4734a9e0ae84d6961ffc81b1331
3
+ size 942404672
artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/checkpoint_best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be03ccc29ab53e41e5b6065bd5cc4ae35edd53da600a4ae7e2e4beb5693c76ab
3
+ size 14260274
artifacts/outputs/interaction_debug/reveal_eval_commit8_compare/reveal_benchmark.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "interaction": {
3
+ "per_task_success": {
4
+ "foliage_proxy": 0.2916666666666667,
5
+ "bag_proxy": 0.5416666666666666,
6
+ "cloth_proxy": 0.7083333333333334
7
+ },
8
+ "mean_success": 0.5138888888888888,
9
+ "visibility_integral": 24.377886186043423,
10
+ "corridor_availability": 0.7185707421352466,
11
+ "reocclusion_rate": 0.035547913343965974,
12
+ "persistence_horizon_mae": 1.5261633908191676,
13
+ "disturbance_cost": 0.33828365347451633
14
+ },
15
+ "backbone": {
16
+ "per_task_success": {
17
+ "foliage_proxy": 0.2916666666666667,
18
+ "bag_proxy": 0.3333333333333333,
19
+ "cloth_proxy": 0.625
20
+ },
21
+ "mean_success": 0.4166666666666667,
22
+ "visibility_integral": 16.212429179085625,
23
+ "corridor_availability": 0.5099402347372638,
24
+ "reocclusion_rate": 0.035937500000000004,
25
+ "persistence_horizon_mae": 0.0,
26
+ "disturbance_cost": 0.14064574577949113
27
+ },
28
+ "reveal": {
29
+ "per_task_success": {
30
+ "foliage_proxy": 0.4166666666666667,
31
+ "bag_proxy": 0.5833333333333334,
32
+ "cloth_proxy": 0.6666666666666666
33
+ },
34
+ "mean_success": 0.5555555555555555,
35
+ "visibility_integral": 32.11319461464882,
36
+ "corridor_availability": 0.8064262109498183,
37
+ "reocclusion_rate": 0.05781655348648813,
38
+ "persistence_horizon_mae": 1.9629322701129155,
39
+ "disturbance_cost": 0.22681122702649897
40
+ }
41
+ }
artifacts/outputs/interaction_debug/reveal_eval_commit8_compare/reveal_benchmark.md ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## interaction
4
+ - checkpoint: /workspace/VLAarchtests/artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/checkpoint_best.pt
5
+ - mean_success: 0.514
6
+ - visibility_integral: 24.378
7
+ - corridor_availability: 0.719
8
+ - reocclusion_rate: 0.036
9
+ - persistence_horizon_mae: 1.526
10
+ - disturbance_cost: 0.338
11
+ - foliage_proxy_success: 0.292
12
+ - bag_proxy_success: 0.542
13
+ - cloth_proxy_success: 0.708
14
+
15
+ ## backbone
16
+ - checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only/checkpoint_best.pt
17
+ - mean_success: 0.417
18
+ - visibility_integral: 16.212
19
+ - corridor_availability: 0.510
20
+ - reocclusion_rate: 0.036
21
+ - persistence_horizon_mae: 0.000
22
+ - disturbance_cost: 0.141
23
+ - foliage_proxy_success: 0.292
24
+ - bag_proxy_success: 0.333
25
+ - cloth_proxy_success: 0.625
26
+
27
+ ## reveal
28
+ - checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_reveal_state/checkpoint_best.pt
29
+ - mean_success: 0.556
30
+ - visibility_integral: 32.113
31
+ - corridor_availability: 0.806
32
+ - reocclusion_rate: 0.058
33
+ - persistence_horizon_mae: 1.963
34
+ - disturbance_cost: 0.227
35
+ - foliage_proxy_success: 0.417
36
+ - bag_proxy_success: 0.583
37
+ - cloth_proxy_success: 0.667
artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_commit4/reveal_benchmark.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "interaction": {
3
+ "per_task_success": {
4
+ "foliage_proxy": 0.4166666666666667,
5
+ "bag_proxy": 0.5416666666666666,
6
+ "cloth_proxy": 0.625
7
+ },
8
+ "mean_success": 0.5277777777777778,
9
+ "visibility_integral": 32.84789120488696,
10
+ "corridor_availability": 0.8711970953477753,
11
+ "reocclusion_rate": 0.003125,
12
+ "persistence_horizon_mae": 1.1544888946683267,
13
+ "disturbance_cost": 0.4288607043110662
14
+ }
15
+ }
artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_commit4/reveal_benchmark.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## interaction
4
+ - checkpoint: /workspace/VLAarchtests/artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/checkpoint_best.pt
5
+ - mean_success: 0.528
6
+ - visibility_integral: 32.848
7
+ - corridor_availability: 0.871
8
+ - reocclusion_rate: 0.003
9
+ - persistence_horizon_mae: 1.154
10
+ - disturbance_cost: 0.429
11
+ - foliage_proxy_success: 0.417
12
+ - bag_proxy_success: 0.542
13
+ - cloth_proxy_success: 0.625
artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_smoke/reveal_benchmark.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "interaction": {
3
+ "per_task_success": {
4
+ "foliage_proxy": 0.0,
5
+ "bag_proxy": 0.0,
6
+ "cloth_proxy": 0.0
7
+ },
8
+ "mean_success": 0.0,
9
+ "visibility_integral": 63.63303746117486,
10
+ "corridor_availability": 0.9842249751091003,
11
+ "reocclusion_rate": 0.0,
12
+ "persistence_horizon_mae": 2.931804162517397,
13
+ "disturbance_cost": 0.8506438152657615
14
+ }
15
+ }
artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_smoke/reveal_benchmark.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## interaction
4
+ - checkpoint: /workspace/VLAarchtests/artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist_smoke/checkpoint_best.pt
5
+ - mean_success: 0.000
6
+ - visibility_integral: 63.633
7
+ - corridor_availability: 0.984
8
+ - reocclusion_rate: 0.000
9
+ - persistence_horizon_mae: 2.932
10
+ - disturbance_cost: 0.851
11
+ - foliage_proxy_success: 0.000
12
+ - bag_proxy_success: 0.000
13
+ - cloth_proxy_success: 0.000
artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_smoke_commit4_short/reveal_benchmark.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "interaction": {
3
+ "per_task_success": {
4
+ "foliage_proxy": 0.5,
5
+ "bag_proxy": 0.75,
6
+ "cloth_proxy": 0.625
7
+ },
8
+ "mean_success": 0.625,
9
+ "visibility_integral": 26.89287617057562,
10
+ "corridor_availability": 0.8617658143242201,
11
+ "reocclusion_rate": 0.0,
12
+ "persistence_horizon_mae": 2.787702427190893,
13
+ "disturbance_cost": 0.3262111305569609
14
+ }
15
+ }
artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_smoke_commit4_short/reveal_benchmark.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## interaction
4
+ - checkpoint: /workspace/VLAarchtests/artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist_smoke/checkpoint_best.pt
5
+ - mean_success: 0.625
6
+ - visibility_integral: 26.893
7
+ - corridor_availability: 0.862
8
+ - reocclusion_rate: 0.000
9
+ - persistence_horizon_mae: 2.788
10
+ - disturbance_cost: 0.326
11
+ - foliage_proxy_success: 0.500
12
+ - bag_proxy_success: 0.750
13
+ - cloth_proxy_success: 0.625
artifacts/outputs/interaction_debug/reveal_eval_interaction_rolefix_full_commit4/reveal_benchmark.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "interaction": {
3
+ "per_task_success": {
4
+ "foliage_proxy": 0.2916666666666667,
5
+ "bag_proxy": 0.041666666666666664,
6
+ "cloth_proxy": 0.16666666666666666
7
+ },
8
+ "mean_success": 0.16666666666666666,
9
+ "visibility_integral": 17.456528491444057,
10
+ "corridor_availability": 0.48336762624482316,
11
+ "reocclusion_rate": 0.11013933982683982,
12
+ "persistence_horizon_mae": 1.8837784738524963,
13
+ "disturbance_cost": 0.2859070710207258
14
+ }
15
+ }
artifacts/outputs/interaction_debug/reveal_eval_interaction_rolefix_full_commit4/reveal_benchmark.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## interaction
4
+ - checkpoint: /workspace/VLAarchtests/artifacts/outputs/interaction_rolefix_full/proxy_interaction_state/checkpoint_best.pt
5
+ - mean_success: 0.167
6
+ - visibility_integral: 17.457
7
+ - corridor_availability: 0.483
8
+ - reocclusion_rate: 0.110
9
+ - persistence_horizon_mae: 1.884
10
+ - disturbance_cost: 0.286
11
+ - foliage_proxy_success: 0.292
12
+ - bag_proxy_success: 0.042
13
+ - cloth_proxy_success: 0.167
artifacts/outputs/interaction_debug/reveal_eval_interaction_rolefix_smoke_commit4_short/reveal_benchmark.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "interaction": {
3
+ "per_task_success": {
4
+ "foliage_proxy": 0.375,
5
+ "bag_proxy": 0.375,
6
+ "cloth_proxy": 0.25
7
+ },
8
+ "mean_success": 0.3333333333333333,
9
+ "visibility_integral": 43.64047184586525,
10
+ "corridor_availability": 0.9287551492452621,
11
+ "reocclusion_rate": 0.0,
12
+ "persistence_horizon_mae": 2.8099890020956475,
13
+ "disturbance_cost": 0.6381748262792826
14
+ }
15
+ }
artifacts/outputs/interaction_debug/reveal_eval_interaction_rolefix_smoke_commit4_short/reveal_benchmark.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## interaction
4
+ - checkpoint: /workspace/VLAarchtests/artifacts/outputs/interaction_rolefix/proxy_interaction_state_smoke/checkpoint_best.pt
5
+ - mean_success: 0.333
6
+ - visibility_integral: 43.640
7
+ - corridor_availability: 0.929
8
+ - reocclusion_rate: 0.000
9
+ - persistence_horizon_mae: 2.810
10
+ - disturbance_cost: 0.638
11
+ - foliage_proxy_success: 0.375
12
+ - bag_proxy_success: 0.375
13
+ - cloth_proxy_success: 0.250
artifacts/outputs/interaction_debug/reveal_eval_old_no_leak_baselines_commit4/reveal_benchmark.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backbone": {
3
+ "per_task_success": {
4
+ "foliage_proxy": 0.4166666666666667,
5
+ "bag_proxy": 0.5833333333333334,
6
+ "cloth_proxy": 0.6666666666666666
7
+ },
8
+ "mean_success": 0.5555555555555555,
9
+ "visibility_integral": 29.27436817354626,
10
+ "corridor_availability": 0.7935162136952082,
11
+ "reocclusion_rate": 0.07854136604136604,
12
+ "persistence_horizon_mae": 0.0,
13
+ "disturbance_cost": 0.4006388829503622
14
+ },
15
+ "reveal": {
16
+ "per_task_success": {
17
+ "foliage_proxy": 0.4166666666666667,
18
+ "bag_proxy": 0.5833333333333334,
19
+ "cloth_proxy": 0.625
20
+ },
21
+ "mean_success": 0.5416666666666666,
22
+ "visibility_integral": 30.107333534293705,
23
+ "corridor_availability": 0.8134206715557311,
24
+ "reocclusion_rate": 0.05241552429052429,
25
+ "persistence_horizon_mae": 2.0996421982129196,
26
+ "disturbance_cost": 0.42389288420478505
27
+ }
28
+ }
artifacts/outputs/interaction_debug/reveal_eval_old_no_leak_baselines_commit4/reveal_benchmark.md ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## backbone
4
+ - checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only/checkpoint_best.pt
5
+ - mean_success: 0.556
6
+ - visibility_integral: 29.274
7
+ - corridor_availability: 0.794
8
+ - reocclusion_rate: 0.079
9
+ - persistence_horizon_mae: 0.000
10
+ - disturbance_cost: 0.401
11
+ - foliage_proxy_success: 0.417
12
+ - bag_proxy_success: 0.583
13
+ - cloth_proxy_success: 0.667
14
+
15
+ ## reveal
16
+ - checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_reveal_state/checkpoint_best.pt
17
+ - mean_success: 0.542
18
+ - visibility_integral: 30.107
19
+ - corridor_availability: 0.813
20
+ - reocclusion_rate: 0.052
21
+ - persistence_horizon_mae: 2.100
22
+ - disturbance_cost: 0.424
23
+ - foliage_proxy_success: 0.417
24
+ - bag_proxy_success: 0.583
25
+ - cloth_proxy_success: 0.625
artifacts/outputs/interaction_debug/rlbench_open_drawer_rollout_eval_commit4_clipped/rollout_eval.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "checkpoint": "/workspace/VLAarchtests/artifacts/outputs/interaction/rlbench_open_drawer_interaction_smoke/checkpoint_best.pt",
3
+ "plan_requested": true,
4
+ "plan_applied": true,
5
+ "planner_mode": "proxy_pretrained",
6
+ "support_mode_conditioning": true,
7
+ "episodes_per_task": 1,
8
+ "episode_length": 5,
9
+ "resolution": 128,
10
+ "cameras": [
11
+ "front",
12
+ "wrist_left",
13
+ "wrist_right"
14
+ ],
15
+ "tasks": {
16
+ "open_drawer": {
17
+ "error": "A path could not be found. Most likely due to the target being inaccessible or a collison was detected.",
18
+ "mean_success": 0.0,
19
+ "mean_return": 0.0
20
+ }
21
+ },
22
+ "mean_success": 0.0
23
+ }
artifacts/outputs/interaction_debug/rlbench_open_drawer_rollout_eval_commit4_clipped/rollout_eval.md ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # RLBench Rollout Eval
2
+
3
+ - Checkpoint: `/workspace/VLAarchtests/artifacts/outputs/interaction/rlbench_open_drawer_interaction_smoke/checkpoint_best.pt`
4
+ - Plan requested: `True`
5
+ - Plan applied: `True`
6
+ - Support-mode conditioning: `True`
7
+ - Mean success: `0.000`
8
+
9
+ ## Per-task
10
+
11
+ - `open_drawer`: error=A path could not be found. Most likely due to the target being inaccessible or a collison was detected.
artifacts/outputs/interaction_debug/rlbench_open_drawer_rollout_eval_commit4_recovered/rollout_eval.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "checkpoint": "/workspace/VLAarchtests/artifacts/outputs/interaction/rlbench_open_drawer_interaction_smoke/checkpoint_best.pt",
3
+ "plan_requested": true,
4
+ "plan_applied": true,
5
+ "planner_mode": "proxy_pretrained",
6
+ "support_mode_conditioning": true,
7
+ "episodes_per_task": 1,
8
+ "episode_length": 5,
9
+ "resolution": 128,
10
+ "cameras": [
11
+ "front",
12
+ "wrist_left",
13
+ "wrist_right"
14
+ ],
15
+ "tasks": {
16
+ "open_drawer": {
17
+ "task_class": "OpenDrawer",
18
+ "successes": [
19
+ 0.0
20
+ ],
21
+ "returns": [
22
+ 0.0
23
+ ],
24
+ "path_recoveries": 0,
25
+ "noop_fallbacks": 0,
26
+ "mean_success": 0.0,
27
+ "mean_return": 0.0
28
+ }
29
+ },
30
+ "mean_success": 0.0
31
+ }
artifacts/outputs/interaction_debug/rlbench_open_drawer_rollout_eval_commit4_recovered/rollout_eval.md ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # RLBench Rollout Eval
2
+
3
+ - Checkpoint: `/workspace/VLAarchtests/artifacts/outputs/interaction/rlbench_open_drawer_interaction_smoke/checkpoint_best.pt`
4
+ - Plan requested: `True`
5
+ - Plan applied: `True`
6
+ - Support-mode conditioning: `True`
7
+ - Mean success: `0.000`
8
+
9
+ ## Per-task
10
+
11
+ - `open_drawer`: mean_success=0.000, returns=[0.0]
artifacts/outputs/interaction_debug/rlbench_open_drawer_rollout_eval_commit4_rerun/rollout_eval.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "checkpoint": "/workspace/VLAarchtests/artifacts/outputs/interaction/rlbench_open_drawer_interaction_smoke/checkpoint_best.pt",
3
+ "plan_requested": true,
4
+ "plan_applied": true,
5
+ "planner_mode": "proxy_pretrained",
6
+ "support_mode_conditioning": true,
7
+ "episodes_per_task": 1,
8
+ "episode_length": 5,
9
+ "resolution": 128,
10
+ "cameras": [
11
+ "front",
12
+ "wrist_left",
13
+ "wrist_right"
14
+ ],
15
+ "tasks": {
16
+ "open_drawer": {
17
+ "error": "A path could not be found because the target is outside of workspace.",
18
+ "mean_success": 0.0,
19
+ "mean_return": 0.0
20
+ }
21
+ },
22
+ "mean_success": 0.0
23
+ }
artifacts/outputs/interaction_debug/rlbench_open_drawer_rollout_eval_commit4_rerun/rollout_eval.md ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # RLBench Rollout Eval
2
+
3
+ - Checkpoint: `/workspace/VLAarchtests/artifacts/outputs/interaction/rlbench_open_drawer_interaction_smoke/checkpoint_best.pt`
4
+ - Plan requested: `True`
5
+ - Plan applied: `True`
6
+ - Support-mode conditioning: `True`
7
+ - Mean success: `0.000`
8
+
9
+ ## Per-task
10
+
11
+ - `open_drawer`: error=A path could not be found because the target is outside of workspace.