atutej commited on
Commit
207243e
·
verified ·
1 Parent(s): ab98ec5

Add parsed training metrics and plots

Browse files
.gitattributes CHANGED
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  tokenizer.json filter=lfs diff=lfs merge=lfs -text
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ training_logs/20260428_203615_reward_vs_steps.png filter=lfs diff=lfs merge=lfs -text
training_logs/20260428_203615_metrics_a2_rl_stack_pytest_v2_387401.csv ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ async/discard_rate,async/discarded_count,async/effective_batch_groups,async/effective_batch_samples,async/staleness_max,async/staleness_mean,async/staleness_min,async/staleness_ratio,generate/avg_num_tokens,generate/avg_tokens_non_zero_rewards,generate/avg_tokens_zero_rewards,generate/max_num_tokens,generate/min_num_tokens,generate/std_num_tokens,loss/avg_final_rewards,loss/avg_raw_advantages,loss/avg_raw_advantages_abs,policy/final_loss,policy/policy_entropy,policy/policy_loss,policy/policy_lr,policy/policy_update_steps,policy/ppo_clip_ratio,policy/raw_grad_norm,reward/avg_pass_at_8,reward/avg_raw_reward,system/process_rss_gb,system/process_vms_gb,system/ram_available_gb,system/ram_percent,system/ram_total_gb,system/ram_used_gb,timing/cleanup_old_checkpoints,timing/compute_advantages_and_returns,timing/convert_to_training_input,timing/fwd_logprobs_values_reward,timing/policy_train,timing/run_training,timing/save_checkpoints,timing/step,timing/sync_weights,timing/train_critic_and_policy,timing/wait_for_generation_buffer,trainer/epoch,trainer/global_step,batch_errors/total_batches,batch_errors/total_instances,batch_errors/total_successful,batch_errors/total_failed,batch_errors/total_masked,batch_errors/avg_VerifierTimeoutError,batch_errors/total_VerifierTimeoutError,timing/save_hf_model,batch_errors/avg_RuntimeError,batch_errors/total_RuntimeError
2
+ 0.0,0,64,512,0,0.0,0,0.0,4741.9902,4480.4132,4822.9386,14849,1,2194.273,0.2363,0.0057,0.1467,0.0,0.1855,0.0,0.0,1.0,0.0,0.0289,0.4375,0.2363,12.8265,73.2355,355.5015,58.6,857.9688,502.4673,10.796,0.1113,2.7561,62.0087,643.9144,706.3755,43.6553,2779.7671,53.5533,644.2551,2017.0819,0,1,128.0,1024.0,1022.0,2.0,0.0,0.015625,2.0,,,
3
+ 0.0,0,64,512,1,1.0,1,1.0,5207.4805,4587.3176,5459.6346,15353,1,2543.871,0.2891,0.0032,0.2156,-0.0,0.1818,-0.0003,0.0,1.0,0.0,0.039,0.5,0.2891,13.6703,73.4764,355.181,58.6,857.9688,502.7878,0.0108,0.0549,3.5254,71.9423,683.3507,755.6622,31.4849,1392.6153,52.8988,683.6647,580.5066,0,2,64.0,512.0,511.0,1.0,0.0,0.015625,1.0,,,
4
+ 0.0,0,64,512,2,2.0,2,1.0,5185.8047,4404.2423,5662.6069,18658,1,2655.8627,0.3789,0.0056,0.1026,-0.0,0.1694,-0.0,0.0,1.0,0.0,0.0307,0.4688,0.3789,16.1691,73.7953,352.0549,59.0,857.9688,505.9138,0.0087,0.0759,3.5505,84.1237,702.8472,787.3638,32.8388,1345.9698,52.8027,703.1638,502.2264,0,3,64.0,512.0,510.0,1.0,0.0,0.03125,2.0,,,
5
+ 0.0,0,64,512,3,3.0,3,1.0,6173.4297,5171.7151,6744.9601,23599,1,2913.9495,0.3633,-0.0048,0.1501,0.0,0.1671,0.0,0.0,1.0,0.0,0.0269,0.5781,0.3633,19.7277,74.2299,348.8676,59.3,857.9688,509.1012,0.0089,0.0556,4.1262,92.6868,765.1808,858.2347,33.0996,2000.5081,53.4025,765.4919,1084.7149,0,4,64.0,512.0,512.0,0.0,0.0,,,,,
6
+ 0.0,0,64,512,4,4.0,4,1.0,6649.6289,5533.7975,7170.7765,21118,1405,3018.764,0.3184,0.002,0.0977,-0.0,0.1669,-0.0,0.0,1.0,0.0,0.0231,0.4062,0.3184,20.1003,74.4495,353.0518,58.9,857.9688,504.9169,0.0081,0.052,3.8742,89.0204,756.9617,846.374,33.0429,1451.9746,52.8114,757.3012,548.8843,0,5,64.0,512.0,512.0,0.0,0.0,,,36.8234,,
7
+ 0.0,0,64,512,5,5.0,5,1.0,6952.5547,6301.3432,7273.414,20095,1889,2871.2745,0.3301,0.0065,0.1201,-0.0,0.1707,-0.0,0.0,1.0,0.0,0.0208,0.4688,0.3301,20.2417,74.4911,352.5635,58.9,857.9688,505.4053,0.008,0.0486,3.8545,91.1339,754.2028,845.7448,31.6035,1824.3834,56.2481,754.562,918.5016,0,6,64.0,512.0,512.0,0.0,0.0,,,,,
8
+ 0.0,0,64,512,6,6.0,6,1.0,7821.9141,6336.3427,8397.6233,28454,2297,3823.9451,0.2793,-0.0032,0.1026,-0.0,0.1755,-0.0,0.0,1.0,0.0,0.0234,0.4062,0.2793,24.1808,74.7747,405.2173,52.8,857.9688,452.7515,0.011,0.0608,4.7397,122.4595,855.6703,978.5429,33.4132,4135.5872,14.0068,856.0222,3138.2634,0,7,64.0,512.0,512.0,0.0,0.0,,,,,
9
+ 0.0,0,64,512,0,0.0,0,0.0,5879.5469,5305.2657,6269.3049,14895,1513,2089.5078,0.4043,0.0018,0.096,-0.0,0.1721,-0.0,0.0,1.0,0.0,0.0209,0.5156,0.4043,24.3558,75.0631,401.2662,53.2,857.9688,456.7026,0.01,0.0396,3.3153,69.9986,642.9688,713.3496,32.0209,3348.3309,52.4998,643.311,2579.128,1,8,64.0,512.0,512.0,0.0,0.0,,,,,
10
+ 0.0,0,64,512,1,1.0,1,1.0,6734.0,6444.1044,6893.8818,18643,2175,2384.6491,0.3555,0.0017,0.1221,-0.0,0.1782,-0.0,0.0,1.0,0.0,0.0215,0.4375,0.3555,24.6022,75.2357,411.7228,52.0,857.9688,446.246,0.009,0.0439,3.6223,78.3919,702.1173,780.9468,31.0012,1826.3469,52.3499,702.5107,989.3979,1,9,64.0,512.0,509.0,2.0,0.0,0.046875,3.0,,,
11
+ 0.0,0,64,512,2,2.0,2,1.0,6511.9746,5525.2532,7336.0108,18605,1,2862.176,0.4551,0.0042,0.1035,-0.0001,0.1794,-0.002,0.0,1.0,0.0,0.0187,0.5469,0.4551,24.6203,75.1738,409.3499,52.3,857.9688,448.6188,0.0111,0.044,3.5639,90.4193,734.5286,825.4112,31.3292,1651.042,52.4615,734.9477,769.5638,1,10,64.0,512.0,512.0,0.0,0.0,,,36.5961,,
12
+ 0.0,0,64,512,3,3.0,3,1.0,7150.2969,5831.9757,8037.7941,22262,1340,2965.0407,0.4023,0.0008,0.1053,-0.0,0.1771,-0.0,0.0,1.0,0.0,0.0194,0.5156,0.4023,24.8219,75.2715,408.2212,52.4,857.9688,449.7476,0.0086,0.0515,4.1267,95.6788,762.1538,858.3374,32.4094,2182.5423,51.6537,762.6066,1268.3906,1,11,64.0,512.0,511.0,1.0,0.0,0.015625,1.0,,,
13
+ 0.0,0,64,512,4,4.0,4,1.0,7161.0645,6208.9091,7530.0569,19269,1,2859.9143,0.2793,-0.0043,0.1026,0.0,0.1809,0.0,0.0,1.0,0.0,0.0175,0.3906,0.2793,24.8525,75.2184,415.8303,51.5,857.9688,442.1384,0.0083,0.0535,3.7269,95.4867,753.3097,849.3001,32.3294,1734.4666,56.6358,753.7596,824.7671,1,12,64.0,512.0,509.0,2.0,0.0,0.03125,2.0,,0.015625,1.0
14
+ 0.0,0,64,512,5,5.0,5,1.0,7395.9434,6484.1768,7894.5227,19744,1,2920.1255,0.3535,0.0007,0.1193,0.0,0.1803,0.0,0.0,1.0,0.0,0.0186,0.4844,0.3535,24.9543,75.2349,412.6592,51.9,857.9688,445.3096,0.0093,0.05,3.7965,93.9943,736.3408,830.7211,32.1481,1894.2929,59.5623,736.6765,1000.1749,1,13,64.0,512.0,512.0,0.0,0.0,,,,,
15
+ 0.0,0,64,512,6,6.0,6,1.0,8946.791,7245.6212,9537.7237,29403,2237,4155.2128,0.2578,0.002,0.073,0.0,0.1889,0.0,0.0,1.0,0.0,0.0133,0.3438,0.2578,26.1139,75.4758,416.4844,51.5,857.9688,441.4843,0.0095,0.0622,4.9616,132.9495,904.4659,1037.814,32.4082,7012.0493,13.8671,904.8018,5955.3685,1,14,,,,,,,,,,
training_logs/20260428_203615_metrics_report.md ADDED
@@ -0,0 +1,231 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SkyRL Training Metrics Analysis
2
+
3
+ Generated from 1 log files
4
+
5
+ ## Overview
6
+
7
+ | Log File | Total Steps | Metric Blocks | Final Reward (mean) | Final Reward (max) | Total Time (s) |
8
+ |----------|-------------|---------------|---------------------|-------------------|----------------|
9
+ | a2_rl_stack_pytest_v2_387401 | 14 | 14 | 0.3359 | 0.4551 | 34579.9 |
10
+
11
+ ## Async Metrics
12
+
13
+ | | Mean | Std | Min | Max | Count |
14
+ |:------------------------------|-----------:|---------:|------:|------:|--------:|
15
+ | async/discard_rate | 0 | 0 | 0 | 0 | 14 |
16
+ | async/discarded_count | 0 | 0 | 0 | 0 | 14 |
17
+ | async/effective_batch_groups | 64 | 0 | 64 | 64 | 14 |
18
+ | async/effective_batch_samples | 512 | 0 | 512 | 512 | 14 |
19
+ | async/staleness_max | 3 | 2.0755 | 0 | 6 | 14 |
20
+ | async/staleness_mean | 3 | 2.0755 | 0 | 6 | 14 |
21
+ | async/staleness_min | 3 | 2.0755 | 0 | 6 | 14 |
22
+ | async/staleness_ratio | 0.857143 | 0.363137 | 0 | 1 | 14 |
23
+
24
+ ## Generate Metrics
25
+
26
+ | | Mean | Std | Min | Max | Count |
27
+ |:-------------------------------------|----------:|---------:|---------:|---------:|--------:|
28
+ | generate/avg_num_tokens | 6608.03 | 1126.45 | 4741.99 | 8946.79 | 14 |
29
+ | generate/avg_tokens_non_zero_rewards | 5704.32 | 854.75 | 4404.24 | 7245.62 | 14 |
30
+ | generate/avg_tokens_zero_rewards | 7073.66 | 1246.64 | 4822.94 | 9537.72 | 14 |
31
+ | generate/max_num_tokens | 20353.4 | 4453.55 | 14849 | 29403 | 14 |
32
+ | generate/min_num_tokens | 918.786 | 993.044 | 1 | 2297 | 14 |
33
+ | generate/std_num_tokens | 2875.61 | 558.202 | 2089.51 | 4155.21 | 14 |
34
+
35
+ ## Loss Metrics
36
+
37
+ | | Mean | Std | Min | Max | Count |
38
+ |:----------------------------|-----------:|-----------:|--------:|-------:|--------:|
39
+ | loss/avg_final_rewards | 0.335943 | 0.0629694 | 0.2363 | 0.4551 | 14 |
40
+ | loss/avg_raw_advantages | 0.00156429 | 0.00358149 | -0.0048 | 0.0065 | 14 |
41
+ | loss/avg_raw_advantages_abs | 0.118371 | 0.0344238 | 0.073 | 0.2156 | 14 |
42
+
43
+ ## Policy Metrics
44
+
45
+ | | Mean | Std | Min | Max | Count |
46
+ |:---------------------------|-------------:|------------:|--------:|-------:|--------:|
47
+ | policy/final_loss | -7.14286e-06 | 2.67261e-05 | -0.0001 | 0 | 14 |
48
+ | policy/policy_entropy | 0.1767 | 0.00675209 | 0.1669 | 0.1889 | 14 |
49
+ | policy/policy_loss | -0.000164286 | 0.000534368 | -0.002 | 0 | 14 |
50
+ | policy/policy_lr | 0 | 0 | 0 | 0 | 14 |
51
+ | policy/policy_update_steps | 1 | 0 | 1 | 1 | 14 |
52
+ | policy/ppo_clip_ratio | 0 | 0 | 0 | 0 | 14 |
53
+ | policy/raw_grad_norm | 0.02305 | 0.00651681 | 0.0133 | 0.039 | 14 |
54
+
55
+ ## Reward Metrics
56
+
57
+ | | Mean | Std | Min | Max | Count |
58
+ |:----------------------|---------:|----------:|-------:|-------:|--------:|
59
+ | reward/avg_pass_at_8 | 0.464286 | 0.0649824 | 0.3438 | 0.5781 | 14 |
60
+ | reward/avg_raw_reward | 0.335943 | 0.0629694 | 0.2363 | 0.4551 | 14 |
61
+
62
+ ## System Metrics
63
+
64
+ | | Mean | Std | Min | Max | Count |
65
+ |:------------------------|---------:|-------------:|---------:|---------:|--------:|
66
+ | system/process_rss_gb | 21.517 | 4.48783 | 12.8265 | 26.1139 | 14 |
67
+ | system/process_vms_gb | 74.6518 | 0.727882 | 73.2355 | 75.4758 | 14 |
68
+ | system/ram_available_gb | 385.569 | 29.6712 | 348.868 | 416.484 | 14 |
69
+ | system/ram_percent | 55.0643 | 3.46424 | 51.5 | 59.3 | 14 |
70
+ | system/ram_total_gb | 857.969 | 2.35957e-13 | 857.969 | 857.969 | 14 |
71
+ | system/ram_used_gb | 472.399 | 29.6712 | 441.484 | 509.101 | 14 |
72
+
73
+ ## Timing Metrics
74
+
75
+ | | Mean | Std | Min | Max | Count |
76
+ |:--------------------------------------|-------------:|-------------:|----------:|----------:|--------:|
77
+ | timing/cleanup_old_checkpoints | 0.779807 | 2.88286 | 0.008 | 10.796 | 14 |
78
+ | timing/compute_advantages_and_returns | 0.0574143 | 0.0179349 | 0.0396 | 0.1113 | 14 |
79
+ | timing/convert_to_training_input | 3.82427 | 0.55475 | 2.7561 | 4.9616 | 14 |
80
+ | timing/fwd_logprobs_values_reward | 90.7353 | 18.9282 | 62.0087 | 132.95 | 14 |
81
+ | timing/policy_train | 742.715 | 71.7004 | 642.969 | 904.466 | 14 |
82
+ | timing/run_training | 833.87 | 90.3807 | 706.375 | 1037.81 | 14 |
83
+ | timing/save_checkpoints | 33.056 | 3.13253 | 31.0012 | 43.6553 | 14 |
84
+ | timing/step | 2469.99 | 1529.45 | 1345.97 | 7012.05 | 14 |
85
+ | timing/sync_weights | 48.1967 | 14.6734 | 13.8671 | 59.5623 | 14 |
86
+ | timing/train_critic_and_policy | 743.077 | 71.7036 | 643.311 | 904.802 | 14 |
87
+ | timing/wait_for_generation_buffer | 1584.07 | 1485.24 | 502.226 | 5955.37 | 14 |
88
+ | timing/save_hf_model | 36.7097 | 0.160725 | 36.5961 | 36.8234 | 2 |
89
+
90
+ ## Trainer Metrics
91
+
92
+ | | Mean | Std | Min | Max | Count |
93
+ |:--------------------|-------:|---------:|------:|------:|--------:|
94
+ | trainer/epoch | 0.5 | 0.518875 | 0 | 1 | 14 |
95
+ | trainer/global_step | 7.5 | 4.1833 | 1 | 14 | 14 |
96
+
97
+ ## Batch_Errors Metrics
98
+
99
+ | | Mean | Std | Min | Max | Count |
100
+ |:----------------------------------------|------------:|------------:|-----------:|------------:|--------:|
101
+ | batch_errors/total_batches | 68.9231 | 17.7504 | 64 | 128 | 13 |
102
+ | batch_errors/total_instances | 551.385 | 142.003 | 512 | 1024 | 13 |
103
+ | batch_errors/total_successful | 550.462 | 141.684 | 509 | 1022 | 13 |
104
+ | batch_errors/total_failed | 0.692308 | 0.85485 | 0 | 2 | 13 |
105
+ | batch_errors/total_masked | 0 | 0 | 0 | 0 | 13 |
106
+ | batch_errors/avg_VerifierTimeoutError | 0.0260417 | 0.0127578 | 0.015625 | 0.046875 | 6 |
107
+ | batch_errors/total_VerifierTimeoutError | 1.83333 | 0.752773 | 1 | 3 | 6 |
108
+ | batch_errors/avg_RuntimeError | 0.015625 | nan | 0.015625 | 0.015625 | 1 |
109
+ | batch_errors/total_RuntimeError | 1 | nan | 1 | 1 | 1 |
110
+
111
+ ## Training Progression by Log
112
+
113
+ ### a2_rl_stack_pytest_v2_387401
114
+
115
+ | Step | Reward | Pass@8 | KL | Loss | Step Time (s) | Gen Wait (s) |
116
+ |------|--------|--------|-----|------|---------------|-------------|
117
+ | 1 | 0.2363 | 0.4375 | 0.000000 | 0.0000 | 2779.8 | 2017.1 |
118
+ | 2 | 0.2891 | 0.5000 | 0.000000 | -0.0000 | 1392.6 | 580.5 |
119
+ | 3 | 0.3789 | 0.4688 | 0.000000 | -0.0000 | 1346.0 | 502.2 |
120
+ | 4 | 0.3633 | 0.5781 | 0.000000 | 0.0000 | 2000.5 | 1084.7 |
121
+ | 5 | 0.3184 | 0.4062 | 0.000000 | -0.0000 | 1452.0 | 548.9 |
122
+ | 6 | 0.3301 | 0.4688 | 0.000000 | -0.0000 | 1824.4 | 918.5 |
123
+ | 7 | 0.2793 | 0.4062 | 0.000000 | -0.0000 | 4135.6 | 3138.3 |
124
+ | 8 | 0.4043 | 0.5156 | 0.000000 | -0.0000 | 3348.3 | 2579.1 |
125
+ | 9 | 0.3555 | 0.4375 | 0.000000 | -0.0000 | 1826.3 | 989.4 |
126
+ | 10 | 0.4551 | 0.5469 | 0.000000 | -0.0001 | 1651.0 | 769.6 |
127
+ | 11 | 0.4023 | 0.5156 | 0.000000 | -0.0000 | 2182.5 | 1268.4 |
128
+ | 12 | 0.2793 | 0.3906 | 0.000000 | 0.0000 | 1734.5 | 824.8 |
129
+ | 13 | 0.3535 | 0.4844 | 0.000000 | 0.0000 | 1894.3 | 1000.2 |
130
+ | 14 | 0.2578 | 0.3438 | 0.000000 | 0.0000 | 7012.0 | 5955.4 |
131
+
132
+ ## Timing Analysis
133
+
134
+ ### Average Time Breakdown (% of step time)
135
+
136
+ | Component | Avg % of Step Time |
137
+ |-----------|-------------------|
138
+ | wait_for_generation_buffer | 56.5% |
139
+ | run_training | 40.7% |
140
+ | train_critic_and_policy | 36.4% |
141
+ | policy_train | 36.4% |
142
+ | fwd_logprobs_values_reward | 4.4% |
143
+ | sync_weights | 2.6% |
144
+ | save_hf_model | 2.4% |
145
+ | save_checkpoints | 1.6% |
146
+ | convert_to_training_input | 0.2% |
147
+ | cleanup_old_checkpoints | 0.0% |
148
+ | compute_advantages_and_returns | 0.0% |
149
+
150
+ ## vLLM Inference Engine Analysis
151
+
152
+ Metrics from vLLM stat loggers (V1LoggingStatLoggerFixed).
153
+
154
+ > **Note**: Ray deduplicates similar log messages with `[repeated Nx across cluster]`,
155
+ > so we typically capture stats from one engine per timestamp. The stats shown are
156
+ > **per-engine** values. Multiply by num_inference_engines for cluster-wide estimates.
157
+
158
+ ### Summary by Log (Per-Engine Stats)
159
+
160
+ | Log | Avg Running/Engine | Avg Waiting/Engine | Avg Gen Throughput/Engine | Avg KV Cache % | Avg Prefix Hit % |
161
+ |-----|-------------------|-------------------|--------------------------|----------------|------------------|
162
+ | a2_rl_stack_pytest_v2_387401 | 3.5 | 0.0 | 69.6 tok/s | 5.9% | 85.6% |
163
+
164
+ ### Utilization Analysis (Per-Engine)
165
+
166
+ Key indicators of inference engine utilization:
167
+
168
+ - **Running requests/engine**: Concurrent requests being processed by each engine
169
+ - **Waiting requests**: Requests queued (0 = engine not saturated, has spare capacity)
170
+ - **Generation throughput**: Decode tokens/sec per engine
171
+ - 8B model on H100 can do **1000+ tok/s** when saturated
172
+ - If seeing <300 tok/s with 0 waiting, engine is **starved for requests**
173
+
174
+ #### a2_rl_stack_pytest_v2_387401
175
+
176
+ - **Running requests/engine**: avg=3.5, max=14
177
+ - **Waiting requests**: avg=0.0, max=0
178
+ - **Generation throughput/engine**: avg=69.6 tok/s, max=319.7 tok/s
179
+ - **KV cache usage**: avg=5.9%
180
+ - **Prefix cache hit rate**: avg=85.6%
181
+ - ⚠️ **Underutilized**: Engines starved for requests (0 waiting, avg 3.5 running)
182
+ - Bottleneck is likely upstream (environment execution, not inference)
183
+
184
+ ## Trial-Level Analysis (from result.json)
185
+
186
+ Total trials parsed: 7168
187
+
188
+ ### Turn Count Statistics
189
+
190
+ | Metric | Value |
191
+ |--------|-------|
192
+ | Mean | 3.3 |
193
+ | Median | 3.0 |
194
+ | Std | 1.6 |
195
+ | Min | 2 |
196
+ | Max | 19 |
197
+ | Count | 7168 |
198
+
199
+ ### Exception Distribution
200
+
201
+ | Exception Type | Count | % |
202
+ |---------------|-------|---|
203
+ | No exception | 7143 | 99.7% |
204
+ | AgentTimeoutError | 12 | 0.2% |
205
+ | VerifierTimeoutError | 11 | 0.2% |
206
+ | RuntimeError | 1 | 0.0% |
207
+ | ContextLengthExceededError | 1 | 0.0% |
208
+
209
+ ### Turn Count by Exception Type
210
+
211
+ | Exception Type | Mean Turns | Median Turns | Count |
212
+ |---------------|-----------|-------------|-------|
213
+ | ContextLengthExceededError | 15.0 | 15.0 | 1 |
214
+ | AgentTimeoutError | 9.3 | 9.5 | 12 |
215
+ | VerifierTimeoutError | 4.4 | 4.0 | 11 |
216
+ | RuntimeError | 4.0 | 4.0 | 1 |
217
+ | No exception | 3.3 | 3.0 | 7143 |
218
+
219
+ ### Turn Count by Outcome
220
+
221
+ | Outcome | Mean Turns | Median Turns | Count |
222
+ |---------|-----------|-------------|-------|
223
+ | Success | 2.9 | 3.0 | 2408 |
224
+ | Failure | 3.5 | 3.0 | 4748 |
225
+
226
+ ### Reward Summary
227
+
228
+ - Mean reward: 0.3365
229
+ - Success rate: 33.7%
230
+ - Trials with reward data: 7156
231
+
training_logs/20260428_203615_metrics_table.csv ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ log_file,async/discard_rate,async/discarded_count,async/effective_batch_groups,async/effective_batch_samples,async/staleness_max,async/staleness_mean,async/staleness_min,async/staleness_ratio,generate/avg_num_tokens,generate/avg_tokens_non_zero_rewards,generate/avg_tokens_zero_rewards,generate/max_num_tokens,generate/min_num_tokens,generate/std_num_tokens,loss/avg_final_rewards,loss/avg_raw_advantages,loss/avg_raw_advantages_abs,policy/final_loss,policy/policy_entropy,policy/policy_loss,policy/policy_lr,policy/policy_update_steps,policy/ppo_clip_ratio,policy/raw_grad_norm,reward/avg_pass_at_8,reward/avg_raw_reward,system/process_rss_gb,system/process_vms_gb,system/ram_available_gb,system/ram_percent,system/ram_total_gb,system/ram_used_gb,timing/cleanup_old_checkpoints,timing/compute_advantages_and_returns,timing/convert_to_training_input,timing/fwd_logprobs_values_reward,timing/policy_train,timing/run_training,timing/save_checkpoints,timing/step,timing/sync_weights,timing/train_critic_and_policy,timing/wait_for_generation_buffer,trainer/epoch,trainer/global_step,batch_errors/total_batches,batch_errors/total_instances,batch_errors/total_successful,batch_errors/total_failed,batch_errors/total_masked,batch_errors/avg_VerifierTimeoutError,batch_errors/total_VerifierTimeoutError,timing/save_hf_model,batch_errors/avg_RuntimeError,batch_errors/total_RuntimeError,global_step
2
+ a2_rl_stack_pytest_v2_387401,0.0,0,64,512,0,0.0,0,0.0,4741.9902,4480.4132,4822.9386,14849,1,2194.273,0.2363,0.0057,0.1467,0.0,0.1855,0.0,0.0,1.0,0.0,0.0289,0.4375,0.2363,12.8265,73.2355,355.5015,58.6,857.9688,502.4673,10.796,0.1113,2.7561,62.0087,643.9144,706.3755,43.6553,2779.7671,53.5533,644.2551,2017.0819,0,1,128.0,1024.0,1022.0,2.0,0.0,0.015625,2.0,,,,1
3
+ a2_rl_stack_pytest_v2_387401,0.0,0,64,512,1,1.0,1,1.0,5207.4805,4587.3176,5459.6346,15353,1,2543.871,0.2891,0.0032,0.2156,-0.0,0.1818,-0.0003,0.0,1.0,0.0,0.039,0.5,0.2891,13.6703,73.4764,355.181,58.6,857.9688,502.7878,0.0108,0.0549,3.5254,71.9423,683.3507,755.6622,31.4849,1392.6153,52.8988,683.6647,580.5066,0,2,64.0,512.0,511.0,1.0,0.0,0.015625,1.0,,,,2
4
+ a2_rl_stack_pytest_v2_387401,0.0,0,64,512,2,2.0,2,1.0,5185.8047,4404.2423,5662.6069,18658,1,2655.8627,0.3789,0.0056,0.1026,-0.0,0.1694,-0.0,0.0,1.0,0.0,0.0307,0.4688,0.3789,16.1691,73.7953,352.0549,59.0,857.9688,505.9138,0.0087,0.0759,3.5505,84.1237,702.8472,787.3638,32.8388,1345.9698,52.8027,703.1638,502.2264,0,3,64.0,512.0,510.0,1.0,0.0,0.03125,2.0,,,,3
5
+ a2_rl_stack_pytest_v2_387401,0.0,0,64,512,3,3.0,3,1.0,6173.4297,5171.7151,6744.9601,23599,1,2913.9495,0.3633,-0.0048,0.1501,0.0,0.1671,0.0,0.0,1.0,0.0,0.0269,0.5781,0.3633,19.7277,74.2299,348.8676,59.3,857.9688,509.1012,0.0089,0.0556,4.1262,92.6868,765.1808,858.2347,33.0996,2000.5081,53.4025,765.4919,1084.7149,0,4,64.0,512.0,512.0,0.0,0.0,,,,,,4
6
+ a2_rl_stack_pytest_v2_387401,0.0,0,64,512,4,4.0,4,1.0,6649.6289,5533.7975,7170.7765,21118,1405,3018.764,0.3184,0.002,0.0977,-0.0,0.1669,-0.0,0.0,1.0,0.0,0.0231,0.4062,0.3184,20.1003,74.4495,353.0518,58.9,857.9688,504.9169,0.0081,0.052,3.8742,89.0204,756.9617,846.374,33.0429,1451.9746,52.8114,757.3012,548.8843,0,5,64.0,512.0,512.0,0.0,0.0,,,36.8234,,,5
7
+ a2_rl_stack_pytest_v2_387401,0.0,0,64,512,5,5.0,5,1.0,6952.5547,6301.3432,7273.414,20095,1889,2871.2745,0.3301,0.0065,0.1201,-0.0,0.1707,-0.0,0.0,1.0,0.0,0.0208,0.4688,0.3301,20.2417,74.4911,352.5635,58.9,857.9688,505.4053,0.008,0.0486,3.8545,91.1339,754.2028,845.7448,31.6035,1824.3834,56.2481,754.562,918.5016,0,6,64.0,512.0,512.0,0.0,0.0,,,,,,6
8
+ a2_rl_stack_pytest_v2_387401,0.0,0,64,512,6,6.0,6,1.0,7821.9141,6336.3427,8397.6233,28454,2297,3823.9451,0.2793,-0.0032,0.1026,-0.0,0.1755,-0.0,0.0,1.0,0.0,0.0234,0.4062,0.2793,24.1808,74.7747,405.2173,52.8,857.9688,452.7515,0.011,0.0608,4.7397,122.4595,855.6703,978.5429,33.4132,4135.5872,14.0068,856.0222,3138.2634,0,7,64.0,512.0,512.0,0.0,0.0,,,,,,7
9
+ a2_rl_stack_pytest_v2_387401,0.0,0,64,512,0,0.0,0,0.0,5879.5469,5305.2657,6269.3049,14895,1513,2089.5078,0.4043,0.0018,0.096,-0.0,0.1721,-0.0,0.0,1.0,0.0,0.0209,0.5156,0.4043,24.3558,75.0631,401.2662,53.2,857.9688,456.7026,0.01,0.0396,3.3153,69.9986,642.9688,713.3496,32.0209,3348.3309,52.4998,643.311,2579.128,1,8,64.0,512.0,512.0,0.0,0.0,,,,,,8
10
+ a2_rl_stack_pytest_v2_387401,0.0,0,64,512,1,1.0,1,1.0,6734.0,6444.1044,6893.8818,18643,2175,2384.6491,0.3555,0.0017,0.1221,-0.0,0.1782,-0.0,0.0,1.0,0.0,0.0215,0.4375,0.3555,24.6022,75.2357,411.7228,52.0,857.9688,446.246,0.009,0.0439,3.6223,78.3919,702.1173,780.9468,31.0012,1826.3469,52.3499,702.5107,989.3979,1,9,64.0,512.0,509.0,2.0,0.0,0.046875,3.0,,,,9
11
+ a2_rl_stack_pytest_v2_387401,0.0,0,64,512,2,2.0,2,1.0,6511.9746,5525.2532,7336.0108,18605,1,2862.176,0.4551,0.0042,0.1035,-0.0001,0.1794,-0.002,0.0,1.0,0.0,0.0187,0.5469,0.4551,24.6203,75.1738,409.3499,52.3,857.9688,448.6188,0.0111,0.044,3.5639,90.4193,734.5286,825.4112,31.3292,1651.042,52.4615,734.9477,769.5638,1,10,64.0,512.0,512.0,0.0,0.0,,,36.5961,,,10
12
+ a2_rl_stack_pytest_v2_387401,0.0,0,64,512,3,3.0,3,1.0,7150.2969,5831.9757,8037.7941,22262,1340,2965.0407,0.4023,0.0008,0.1053,-0.0,0.1771,-0.0,0.0,1.0,0.0,0.0194,0.5156,0.4023,24.8219,75.2715,408.2212,52.4,857.9688,449.7476,0.0086,0.0515,4.1267,95.6788,762.1538,858.3374,32.4094,2182.5423,51.6537,762.6066,1268.3906,1,11,64.0,512.0,511.0,1.0,0.0,0.015625,1.0,,,,11
13
+ a2_rl_stack_pytest_v2_387401,0.0,0,64,512,4,4.0,4,1.0,7161.0645,6208.9091,7530.0569,19269,1,2859.9143,0.2793,-0.0043,0.1026,0.0,0.1809,0.0,0.0,1.0,0.0,0.0175,0.3906,0.2793,24.8525,75.2184,415.8303,51.5,857.9688,442.1384,0.0083,0.0535,3.7269,95.4867,753.3097,849.3001,32.3294,1734.4666,56.6358,753.7596,824.7671,1,12,64.0,512.0,509.0,2.0,0.0,0.03125,2.0,,0.015625,1.0,12
14
+ a2_rl_stack_pytest_v2_387401,0.0,0,64,512,5,5.0,5,1.0,7395.9434,6484.1768,7894.5227,19744,1,2920.1255,0.3535,0.0007,0.1193,0.0,0.1803,0.0,0.0,1.0,0.0,0.0186,0.4844,0.3535,24.9543,75.2349,412.6592,51.9,857.9688,445.3096,0.0093,0.05,3.7965,93.9943,736.3408,830.7211,32.1481,1894.2929,59.5623,736.6765,1000.1749,1,13,64.0,512.0,512.0,0.0,0.0,,,,,,13
15
+ a2_rl_stack_pytest_v2_387401,0.0,0,64,512,6,6.0,6,1.0,8946.791,7245.6212,9537.7237,29403,2237,4155.2128,0.2578,0.002,0.073,0.0,0.1889,0.0,0.0,1.0,0.0,0.0133,0.3438,0.2578,26.1139,75.4758,416.4844,51.5,857.9688,441.4843,0.0095,0.0622,4.9616,132.9495,904.4659,1037.814,32.4082,7012.0493,13.8671,904.8018,5955.3685,1,14,,,,,,,,,,,14
training_logs/20260428_203615_reward_vs_steps.png ADDED

Git LFS Details

  • SHA256: b76373f5ca18051354f9b4d1e962ecf1aa8d1aa5b0b6e5bf39148f0cc4f30874
  • Pointer size: 131 Bytes
  • Size of remote file: 145 kB
training_logs/20260428_203615_trial_results.csv ADDED
The diff for this file is too large to render. See raw diff
 
training_logs/20260428_203615_turn_count_distribution.png ADDED
training_logs/20260428_203615_vllm_metrics_a2_rl_stack_pytest_v2_387401.csv ADDED
The diff for this file is too large to render. See raw diff
 
training_logs/20260428_203615_vllm_metrics_table.csv ADDED
The diff for this file is too large to render. See raw diff