Muqeeth commited on
Commit
013c159
·
verified ·
1 Parent(s): 4747cb9

Add files using upload-large-folder tool

Browse files
Files changed (50) hide show
  1. .hydra/config.yaml +211 -0
  2. seed_1000/iteration_001/agent:Alice_rewards.csv +64 -0
  3. seed_1000/iteration_001/agent:Bob_rewards.csv +64 -0
  4. seed_1000/iteration_001/agent_trainer_log/basic_training_metrics_2025-08-20___08-14-30.json +44 -0
  5. seed_1000/iteration_001/mgid:10345015_rollout_tree.json +0 -0
  6. seed_1000/iteration_001/mgid:10754920_rollout_tree.json +0 -0
  7. seed_1000/iteration_001/mgid:11645879_rollout_tree.json +0 -0
  8. seed_1000/iteration_001/mgid:11816286_rollout_tree.json +0 -0
  9. seed_1000/iteration_001/mgid:12802451_rollout_tree.json +0 -0
  10. seed_1000/iteration_001/mgid:13550175_rollout_tree.json +0 -0
  11. seed_1000/iteration_001/mgid:13655342_rollout_tree.json +0 -0
  12. seed_1000/iteration_001/mgid:14228271_rollout_tree.json +0 -0
  13. seed_1000/iteration_001/mgid:15733057_rollout_tree.json +0 -0
  14. seed_1000/iteration_001/mgid:15865660_rollout_tree.json +0 -0
  15. seed_1000/iteration_001/mgid:16317016_rollout_tree.json +0 -0
  16. seed_1000/iteration_001/mgid:16358604_rollout_tree.json +0 -0
  17. seed_1000/iteration_001/mgid:16527427_rollout_tree.json +0 -0
  18. seed_1000/iteration_001/mgid:16830985_rollout_tree.json +0 -0
  19. seed_1000/iteration_001/mgid:17030622_rollout_tree.json +0 -0
  20. seed_1000/iteration_001/mgid:17339176_rollout_tree.json +0 -0
  21. seed_1000/iteration_001/mgid:18768830_rollout_tree.json +0 -0
  22. seed_1000/iteration_001/mgid:18955940_rollout_tree.json +0 -0
  23. seed_1000/iteration_001/mgid:21242036_rollout_tree.json +0 -0
  24. seed_1000/iteration_001/mgid:21773434_rollout_tree.json +0 -0
  25. seed_1000/iteration_001/mgid:22674133_rollout_tree.json +0 -0
  26. seed_1000/iteration_001/mgid:22908920_rollout_tree.json +0 -0
  27. seed_1000/iteration_001/mgid:23104248_rollout_tree.json +0 -0
  28. seed_1000/iteration_001/mgid:24698011_rollout_tree.json +0 -0
  29. seed_1000/iteration_001/mgid:26297032_rollout_tree.json +0 -0
  30. seed_1000/iteration_001/mgid:27954762_rollout_tree.json +0 -0
  31. seed_1000/iteration_001/mgid:28124274_rollout_tree.json +0 -0
  32. seed_1000/iteration_001/mgid:28146091_rollout_tree.json +0 -0
  33. seed_1000/iteration_001/mgid:28169614_rollout_tree.json +0 -0
  34. seed_1000/iteration_001/mgid:29594838_rollout_tree.json +0 -0
  35. seed_1000/iteration_001/mgid:31696950_rollout_tree.json +0 -0
  36. seed_1000/iteration_001/mgid:33566731_rollout_tree.json +0 -0
  37. seed_1000/iteration_001/mgid:33816209_rollout_tree.json +0 -0
  38. seed_1000/iteration_001/mgid:33855620_rollout_tree.json +0 -0
  39. seed_1000/iteration_001/mgid:40496757_rollout_tree.json +0 -0
  40. seed_1000/iteration_001/mgid:57359612_rollout_tree.json +0 -0
  41. seed_1000/iteration_001/mgid:61016791_rollout_tree.json +0 -0
  42. seed_1000/iteration_001/mgid:80373395_rollout_tree.json +0 -0
  43. seed_1000/iteration_001/mgid:87817039_rollout_tree.json +0 -0
  44. seed_1000/iteration_001/mgid:92400515_rollout_tree.json +0 -0
  45. seed_1000/iteration_002/mgid:11888093_rollout_tree.json +0 -0
  46. seed_1000/iteration_002/mgid:12569295_rollout_tree.json +0 -0
  47. seed_1000/iteration_002/mgid:21829048_rollout_tree.json +0 -0
  48. seed_1000/iteration_002/mgid:22669760_rollout_tree.json +0 -0
  49. seed_1000/iteration_002/mgid:29038958_rollout_tree.json +0 -0
  50. seed_1000/iteration_002/mgid:29723555_rollout_tree.json +0 -0
.hydra/config.yaml ADDED
@@ -0,0 +1,211 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment:
2
+ name: ipd_prev_ad_align_qwen2.5_7b
3
+ method: generate_and_train
4
+ description: no description
5
+ nb_epochs: 1000
6
+ nb_matches_per_iteration: 64
7
+ reinit_matches_each_it: true
8
+ checkpoint_every_n_iterations: 50
9
+ start_epoch: 0
10
+ resume_experiment: true
11
+ base_seed: 1000
12
+ common_agent_kwargs:
13
+ max_errors: 0
14
+ allow_reasoning: false
15
+ max_reasoning_chars: 0
16
+ cooperate_string: <A>
17
+ defect_string: <B>
18
+ strategy_prompt: ''
19
+ intro_prompt: "Welcome to an iterated game. You are playing against another agent.\n\
20
+ \nGame Rules:\n1. In each round, both agents simultaneously choose to either play\
21
+ \ A or B.\n2. The payoffs for each round are as follows:\n - If both agents\
22
+ \ play A, both receive 3 points.\n - If both agents play B, both receive 1 points.\n\
23
+ \ - If one agent plays A and the other plays B, the player who plays A receives\
24
+ \ 0 points and the player who plays B receives 5 points.\n3. The goal is to maximize\
25
+ \ your total points over all rounds.\n4. You will see the previous play of the\
26
+ \ other player after the completion of a round.\n\nIn order to play A, you must\
27
+ \ write \"<A>\" and nothing else.\nIn order to play B, you must write \"<B>\"\
28
+ \ and nothing else.\n\nDuring the entire game, you must only write \"<A>\" or\
29
+ \ \"<B>\". If you write anything else, your response will be considered as an\
30
+ \ error.\n\nThe game starts now:\n"
31
+ goal_prompt: 'Your goal is to maximize your utility.
32
+
33
+ '
34
+ agent_0_id: Alice
35
+ agent_1_id: Bob
36
+ agent_ids:
37
+ - Alice
38
+ - Bob
39
+ markov_games:
40
+ runner_method_name: AlternativeActionsRunner
41
+ runner_kwargs:
42
+ nb_alternative_actions: 1
43
+ max_depth: 10
44
+ simulation_class_name: IPD
45
+ simulation_init_args:
46
+ rounds_per_game: 10
47
+ cooperate_actions:
48
+ - <A>
49
+ defect_actions:
50
+ - <B>
51
+ agent_ids: ${agent_ids}
52
+ reward: 3
53
+ punishment: 1
54
+ temptation: 5
55
+ sucker: 0
56
+ agents:
57
+ 0:
58
+ agent_id: ${agent_0_id}
59
+ agent_class_name: IPDAgent
60
+ policy_id: base_llm/agent_adapter
61
+ init_kwargs:
62
+ max_errors: 0
63
+ allow_reasoning: false
64
+ max_reasoning_chars: 0
65
+ cooperate_string: <A>
66
+ defect_string: <B>
67
+ strategy_prompt: ''
68
+ intro_prompt: "Welcome to an iterated game. You are playing against another\
69
+ \ agent.\n\nGame Rules:\n1. In each round, both agents simultaneously choose\
70
+ \ to either play A or B.\n2. The payoffs for each round are as follows:\n\
71
+ \ - If both agents play A, both receive 3 points.\n - If both agents\
72
+ \ play B, both receive 1 points.\n - If one agent plays A and the other\
73
+ \ plays B, the player who plays A receives 0 points and the player who plays\
74
+ \ B receives 5 points.\n3. The goal is to maximize your total points over\
75
+ \ all rounds.\n4. You will see the previous play of the other player after\
76
+ \ the completion of a round.\n\nIn order to play A, you must write \"<A>\"\
77
+ \ and nothing else.\nIn order to play B, you must write \"<B>\" and nothing\
78
+ \ else.\n\nDuring the entire game, you must only write \"<A>\" or \"<B>\"\
79
+ . If you write anything else, your response will be considered as an error.\n\
80
+ \nThe game starts now:\n"
81
+ goal_prompt: 'Your goal is to maximize your utility.
82
+
83
+ '
84
+ 1:
85
+ agent_id: ${agent_1_id}
86
+ agent_class_name: IPDAgent
87
+ policy_id: base_llm/agent_adapter
88
+ init_kwargs:
89
+ max_errors: 0
90
+ allow_reasoning: false
91
+ max_reasoning_chars: 0
92
+ cooperate_string: <A>
93
+ defect_string: <B>
94
+ strategy_prompt: ''
95
+ intro_prompt: "Welcome to an iterated game. You are playing against another\
96
+ \ agent.\n\nGame Rules:\n1. In each round, both agents simultaneously choose\
97
+ \ to either play A or B.\n2. The payoffs for each round are as follows:\n\
98
+ \ - If both agents play A, both receive 3 points.\n - If both agents\
99
+ \ play B, both receive 1 points.\n - If one agent plays A and the other\
100
+ \ plays B, the player who plays A receives 0 points and the player who plays\
101
+ \ B receives 5 points.\n3. The goal is to maximize your total points over\
102
+ \ all rounds.\n4. You will see the previous play of the other player after\
103
+ \ the completion of a round.\n\nIn order to play A, you must write \"<A>\"\
104
+ \ and nothing else.\nIn order to play B, you must write \"<B>\" and nothing\
105
+ \ else.\n\nDuring the entire game, you must only write \"<A>\" or \"<B>\"\
106
+ . If you write anything else, your response will be considered as an error.\n\
107
+ \nThe game starts now:\n"
108
+ goal_prompt: 'Your goal is to maximize your utility.
109
+
110
+ '
111
+ log_func: log_ipd_match
112
+ run_batched_matches_args:
113
+ nb_parallel_matches: -1
114
+ temperature: 1.0
115
+ models:
116
+ base_llm:
117
+ class: LeanLocalLLM
118
+ init_args:
119
+ llm_id: base_llm
120
+ model_name: Qwen/Qwen2.5-7B-Instruct
121
+ inference_backend: vllm
122
+ hf_kwargs:
123
+ device_map: auto
124
+ torch_dtype: bfloat16
125
+ max_memory:
126
+ 0: 15GiB
127
+ attn_implementation: flash_attention_2
128
+ inference_backend_init_kwargs:
129
+ enable_prefix_caching: true
130
+ max_model_len: 10000.0
131
+ gpu_memory_utilization: 0.5
132
+ dtype: bfloat16
133
+ trust_remote_code: true
134
+ max_lora_rank: 32
135
+ inference_backend_sampling_params:
136
+ temperature: 1.0
137
+ top_p: 1.0
138
+ max_tokens: 400
139
+ top_k: -1
140
+ adapter_configs:
141
+ agent_adapter:
142
+ task_type: CAUSAL_LM
143
+ r: 32
144
+ lora_alpha: 64
145
+ lora_dropout: 0.0
146
+ target_modules: all-linear
147
+ critic_adapter:
148
+ task_type: CAUSAL_LM
149
+ r: 32
150
+ lora_alpha: 64
151
+ lora_dropout: 0.0
152
+ target_modules: all-linear
153
+ critics:
154
+ agent_critic:
155
+ module_pointer:
156
+ - base_llm
157
+ - critic_adapter
158
+ optimizers:
159
+ agent_optimizer:
160
+ module_pointer:
161
+ - base_llm
162
+ - agent_adapter
163
+ optimizer_class_name: torch.optim.Adam
164
+ init_args:
165
+ lr: 1.0e-06
166
+ weight_decay: 0.0
167
+ critic_optimizer:
168
+ module_pointer: agent_critic
169
+ optimizer_class_name: torch.optim.Adam
170
+ init_args:
171
+ lr: 1.0e-06
172
+ weight_decay: 0.0
173
+ trainers:
174
+ agent_trainer:
175
+ class: TrainerAdAlign
176
+ module_pointers:
177
+ policy:
178
+ - base_llm
179
+ - agent_adapter
180
+ policy_optimizer: agent_optimizer
181
+ critic: agent_critic
182
+ critic_optimizer: critic_optimizer
183
+ kwargs:
184
+ entropy_coeff: 0.0
185
+ kl_coeff: 0.0
186
+ gradient_clipping: 1.0
187
+ restrict_tokens: null
188
+ mini_batch_size: 4
189
+ use_gradient_checkpointing: true
190
+ temperature: ${temperature}
191
+ device: cuda:0
192
+ use_gae: false
193
+ skip_discounted_state_visitation: true
194
+ gae_lambda_for_credits: 0.92
195
+ gae_lambda_for_targets: 0.92
196
+ discount_factor: 0.9
197
+ use_rloo: true
198
+ enable_tokenwise_logging: false
199
+ pg_loss_normalization: batch
200
+ reward_normalizing_constant: 5.0
201
+ ad_align_force_coop_first_step: false
202
+ ad_align_clipping: null
203
+ ad_align_gamma: 0.9
204
+ ad_align_exclude_k_equals_t: false
205
+ ad_align_use_sign: false
206
+ ad_align_beta: 0.1
207
+ use_old_ad_align: true
208
+ use_time_regularization: false
209
+ rloo_branch: false
210
+ train_on_which_data:
211
+ agent_trainer: ${agent_ids}
seed_1000/iteration_001/agent:Alice_rewards.csv ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 1,5,1,5,1,5,1,5,1,5
2
+ 1,3,1,3,1,3,1,3,1,3
3
+ 1,3,1,3,1,3,1,3,1,3
4
+ 1,3,1,3,1,3,1,3,1,3
5
+ 1,3,1,3,1,3,1,3,1,3
6
+ 1,3,1,3,1,3,1,3,1,3
7
+ 1,3,1,3,1,3,1,3,1,3
8
+ 1,3,1,3,1,3,1,3,1,3
9
+ 1,3,1,3,1,3,1,3,1,3
10
+ 1,3,1,3,1,3,1,3,1,3
11
+ 1,3,1,3,1,3,1,3,1,3
12
+ 1,3,1,3,1,3,1,3,1,3
13
+ 1,3,1,3,1,3,1,3,1,3
14
+ 1,3,1,3,1,3,1,3,1,3
15
+ 1,1,1,1,1,1,1,1,1,1
16
+ 1,3,1,3,1,3,1,3,1,3
17
+ 1,5,1,5,1,5,1,5,1,5
18
+ 1,5,0,1,5,0,1,5,0,1
19
+ 1,0,1,3,1,5,0,1,5,0
20
+ 1,3,1,3,1,3,1,3,1,3
21
+ 1,3,1,3,1,3,1,3,1,3
22
+ 1,3,1,3,1,3,1,3,1,3
23
+ 1,3,1,3,1,3,1,3,1,3
24
+ 1,3,1,3,1,3,1,3,1,3
25
+ 1,3,1,3,1,3,1,3,1,3
26
+ 1,5,5,1,0,0,0,0,0,0
27
+ 1,3,1,3,1,3,1,3,1,3
28
+ 1,3,1,3,1,3,1,3,1,3
29
+ 1,5,1,5,1,5,1,5,1,5
30
+ 1,5,0,1,5,0,1,5,0,1
31
+ 1,3,1,3,1,3,1,3,1,3
32
+ 1,3,1,3,1,3,1,3,1,3
33
+ 1,0,5,1,0,5,1,0,5,1
34
+ 1,5,1,5,1,5,1,5,1,5
35
+ 1,3,1,3,1,3,1,3,1,3
36
+ 1,3,1,3,1,3,1,3,1,3
37
+ 1,3,1,3,1,3,1,3,1,3
38
+ 1,0,3,5,1,0,3,5,1,0
39
+ 1,0,1,0,1,0,1,0,1,0
40
+ 1,3,1,3,1,3,1,3,1,3
41
+ 1,3,1,3,1,3,1,3,1,3
42
+ 1,3,1,3,1,3,1,3,1,3
43
+ 1,3,1,3,1,3,1,3,1,3
44
+ 1,5,0,1,5,0,1,5,0,1
45
+ 1,3,1,3,1,3,1,3,1,3
46
+ 1,3,1,3,1,3,1,3,1,3
47
+ 1,3,1,3,1,3,1,3,1,3
48
+ 1,3,1,3,1,3,1,3,1,3
49
+ 1,3,1,3,1,3,1,3,1,3
50
+ 1,0,5,1,0,5,1,0,5,1
51
+ 1,0,5,1,0,5,1,0,5,1
52
+ 1,3,1,3,1,3,1,3,1,3
53
+ 1,5,1,3,0,1,3,0,1,3
54
+ 1,5,1,5,1,5,1,5,1,5
55
+ 1,5,1,1,1,1,1,1,1,1
56
+ 1,3,1,3,1,3,1,3,1,3
57
+ 1,0,1,0,1,0,1,0,1,0
58
+ 1,3,1,5,0,1,5,0,1,5
59
+ 1,3,1,3,1,3,1,3,1,3
60
+ 1,3,1,3,1,3,1,3,1,3
61
+ 1,3,1,3,1,3,1,3,1,3
62
+ 1,5,1,1,1,1,1,1,1,1
63
+ 1,1,1,1,1,1,1,1,1,1
64
+ 1,3,1,3,1,3,1,3,1,3
seed_1000/iteration_001/agent:Bob_rewards.csv ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 1,0,1,0,1,0,1,0,1,0
2
+ 1,3,1,3,1,3,1,3,1,3
3
+ 1,3,1,3,1,3,1,3,1,3
4
+ 1,3,1,3,1,3,1,3,1,3
5
+ 1,3,1,3,1,3,1,3,1,3
6
+ 1,3,1,3,1,3,1,3,1,3
7
+ 1,3,1,3,1,3,1,3,1,3
8
+ 1,3,1,3,1,3,1,3,1,3
9
+ 1,3,1,3,1,3,1,3,1,3
10
+ 1,3,1,3,1,3,1,3,1,3
11
+ 1,3,1,3,1,3,1,3,1,3
12
+ 1,3,1,3,1,3,1,3,1,3
13
+ 1,3,1,3,1,3,1,3,1,3
14
+ 1,3,1,3,1,3,1,3,1,3
15
+ 1,1,1,1,1,1,1,1,1,1
16
+ 1,3,1,3,1,3,1,3,1,3
17
+ 1,0,1,0,1,0,1,0,1,0
18
+ 1,0,5,1,0,5,1,0,5,1
19
+ 1,5,1,3,1,0,5,1,0,5
20
+ 1,3,1,3,1,3,1,3,1,3
21
+ 1,3,1,3,1,3,1,3,1,3
22
+ 1,3,1,3,1,3,1,3,1,3
23
+ 1,3,1,3,1,3,1,3,1,3
24
+ 1,3,1,3,1,3,1,3,1,3
25
+ 1,3,1,3,1,3,1,3,1,3
26
+ 1,0,0,1,5,5,5,5,5,5
27
+ 1,3,1,3,1,3,1,3,1,3
28
+ 1,3,1,3,1,3,1,3,1,3
29
+ 1,0,1,0,1,0,1,0,1,0
30
+ 1,0,5,1,0,5,1,0,5,1
31
+ 1,3,1,3,1,3,1,3,1,3
32
+ 1,3,1,3,1,3,1,3,1,3
33
+ 1,5,0,1,5,0,1,5,0,1
34
+ 1,0,1,0,1,0,1,0,1,0
35
+ 1,3,1,3,1,3,1,3,1,3
36
+ 1,3,1,3,1,3,1,3,1,3
37
+ 1,3,1,3,1,3,1,3,1,3
38
+ 1,5,3,0,1,5,3,0,1,5
39
+ 1,5,1,5,1,5,1,5,1,5
40
+ 1,3,1,3,1,3,1,3,1,3
41
+ 1,3,1,3,1,3,1,3,1,3
42
+ 1,3,1,3,1,3,1,3,1,3
43
+ 1,3,1,3,1,3,1,3,1,3
44
+ 1,0,5,1,0,5,1,0,5,1
45
+ 1,3,1,3,1,3,1,3,1,3
46
+ 1,3,1,3,1,3,1,3,1,3
47
+ 1,3,1,3,1,3,1,3,1,3
48
+ 1,3,1,3,1,3,1,3,1,3
49
+ 1,3,1,3,1,3,1,3,1,3
50
+ 1,5,0,1,5,0,1,5,0,1
51
+ 1,5,0,1,5,0,1,5,0,1
52
+ 1,3,1,3,1,3,1,3,1,3
53
+ 1,0,1,3,5,1,3,5,1,3
54
+ 1,0,1,0,1,0,1,0,1,0
55
+ 1,0,1,1,1,1,1,1,1,1
56
+ 1,3,1,3,1,3,1,3,1,3
57
+ 1,5,1,5,1,5,1,5,1,5
58
+ 1,3,1,0,5,1,0,5,1,0
59
+ 1,3,1,3,1,3,1,3,1,3
60
+ 1,3,1,3,1,3,1,3,1,3
61
+ 1,3,1,3,1,3,1,3,1,3
62
+ 1,0,1,1,1,1,1,1,1,1
63
+ 1,1,1,1,1,1,1,1,1,1
64
+ 1,3,1,3,1,3,1,3,1,3
seed_1000/iteration_001/agent_trainer_log/basic_training_metrics_2025-08-20___08-14-30.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nb_rollouts": [
3
+ 128
4
+ ],
5
+ "loss_mb_total": {
6
+ "value_mb_total": [
7
+ 0.39206385612487793,
8
+ -0.016617843881249428,
9
+ -0.1133829802274704,
10
+ 0.0943688377737999,
11
+ -3.867032289505005,
12
+ -0.8449669480323792,
13
+ -0.37973618507385254,
14
+ -2.167485237121582,
15
+ 0.25572967529296875,
16
+ -0.37973612546920776,
17
+ 0.14474299550056458,
18
+ -0.37973615527153015,
19
+ -0.06369704753160477,
20
+ -3.867032289505005,
21
+ -0.5333160161972046,
22
+ -0.5891353487968445,
23
+ 1.1343384981155396,
24
+ -0.12959033250808716,
25
+ 0.42394113540649414,
26
+ 0.663422167301178,
27
+ -0.15981315076351166,
28
+ -2.8817927837371826,
29
+ -2.994982957839966,
30
+ 1.0230556726455688,
31
+ 0.8202357888221741,
32
+ -2.994982957839966,
33
+ 0.027223097160458565,
34
+ -2.994982957839966,
35
+ 0.4894254505634308,
36
+ -0.15981315076351166,
37
+ 0.7859982252120972,
38
+ -3.225872278213501
39
+ ]
40
+ },
41
+ "gradient_norm": [
42
+ 27.744529724121094
43
+ ]
44
+ }
seed_1000/iteration_001/mgid:10345015_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_001/mgid:10754920_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_001/mgid:11645879_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_001/mgid:11816286_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_001/mgid:12802451_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_001/mgid:13550175_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_001/mgid:13655342_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_001/mgid:14228271_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_001/mgid:15733057_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_001/mgid:15865660_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_001/mgid:16317016_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_001/mgid:16358604_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_001/mgid:16527427_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_001/mgid:16830985_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_001/mgid:17030622_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_001/mgid:17339176_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_001/mgid:18768830_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_001/mgid:18955940_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_001/mgid:21242036_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_001/mgid:21773434_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_001/mgid:22674133_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_001/mgid:22908920_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_001/mgid:23104248_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_001/mgid:24698011_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_001/mgid:26297032_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_001/mgid:27954762_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_001/mgid:28124274_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_001/mgid:28146091_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_001/mgid:28169614_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_001/mgid:29594838_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_001/mgid:31696950_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_001/mgid:33566731_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_001/mgid:33816209_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_001/mgid:33855620_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_001/mgid:40496757_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_001/mgid:57359612_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_001/mgid:61016791_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_001/mgid:80373395_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_001/mgid:87817039_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_001/mgid:92400515_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_002/mgid:11888093_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_002/mgid:12569295_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_002/mgid:21829048_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_002/mgid:22669760_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_002/mgid:29038958_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff
 
seed_1000/iteration_002/mgid:29723555_rollout_tree.json ADDED
The diff for this file is too large to render. See raw diff