atutej commited on
Commit
0a97bd5
·
verified ·
1 Parent(s): e803168

Add parsed training metrics and plots

Browse files
.gitattributes CHANGED
@@ -34,3 +34,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  tokenizer.json filter=lfs diff=lfs merge=lfs -text
 
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ training_logs/20260428_204143_reward_vs_steps.png filter=lfs diff=lfs merge=lfs -text
38
+ training_logs/20260428_204143_turn_count_distribution.png filter=lfs diff=lfs merge=lfs -text
training_logs/20260428_204143_metrics_a2_rl_stack_selfdoc_v2_387394.csv ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ async/discard_rate,async/discarded_count,async/effective_batch_groups,async/effective_batch_samples,async/staleness_max,async/staleness_mean,async/staleness_min,async/staleness_ratio,generate/avg_num_tokens,generate/avg_tokens_non_zero_rewards,generate/avg_tokens_zero_rewards,generate/max_num_tokens,generate/min_num_tokens,generate/std_num_tokens,loss/avg_final_rewards,loss/avg_raw_advantages,loss/avg_raw_advantages_abs,policy/final_loss,policy/policy_entropy,policy/policy_loss,policy/policy_lr,policy/policy_update_steps,policy/ppo_clip_ratio,policy/raw_grad_norm,reward/avg_pass_at_8,reward/avg_raw_reward,system/process_rss_gb,system/process_vms_gb,system/ram_available_gb,system/ram_percent,system/ram_total_gb,system/ram_used_gb,timing/cleanup_old_checkpoints,timing/compute_advantages_and_returns,timing/convert_to_training_input,timing/fwd_logprobs_values_reward,timing/policy_train,timing/run_training,timing/save_checkpoints,timing/step,timing/sync_weights,timing/train_critic_and_policy,timing/wait_for_generation_buffer,trainer/epoch,trainer/global_step,batch_errors/total_batches,batch_errors/total_instances,batch_errors/total_successful,batch_errors/total_failed,batch_errors/total_masked,batch_errors/avg_DaytonaValidationError,batch_errors/total_DaytonaValidationError,timing/save_hf_model,batch_errors/avg_DaytonaError,batch_errors/total_DaytonaError,batch_errors/avg_AgentEnvironmentTimeoutError,batch_errors/total_AgentEnvironmentTimeoutError
2
+ 0.0,0,64,512,0,0.0,0,0.0,7275.4629,6357.3333,7351.1649,21632,1649,3419.6317,0.0762,-0.0004,0.0106,-0.0,0.2188,-0.0,0.0,1.0,0.0,0.0103,0.0938,0.0762,17.0837,73.4155,363.0786,57.7,857.9687,494.8901,10.7412,0.2127,3.9195,104.1328,802.9923,907.6513,43.3412,4905.1695,53.8667,803.3054,3939.7318,0,1,128,1024,1024,0,0,,,,,,,
3
+ 0.0,0,64,512,1,1.0,1,1.0,7957.168,4530.8788,8193.2171,27882,1407,3880.4011,0.0645,0.0001,0.0054,0.0,0.2389,0.0,0.0,1.0,0.0,0.0107,0.0781,0.0645,21.7723,73.6329,360.9631,57.9,857.9687,497.0056,0.0153,0.0599,5.1948,103.7265,845.794,949.9143,43.9428,2330.4149,56.3595,846.1277,1318.9172,0,2,64,512,512,0,0,,,,,,,
4
+ 0.0,0,64,512,2,2.0,2,1.0,9141.6328,3483.3529,9335.9576,31173,1378,4855.3059,0.0332,-0.0006,0.0052,0.0,0.2475,0.0,0.0,1.0,0.0,0.002,0.0469,0.0332,23.295,73.7708,359.9666,58.0,857.9687,498.0021,0.0409,0.0704,5.5762,136.0789,929.3459,1065.8459,36.5989,2676.449,54.5569,929.6963,1550.4301,0,3,64,512,511,1,0,0.015625,1.0,,,,,
5
+ 0.0,0,64,512,3,3.0,3,1.0,8364.1445,4752.7097,8596.8981,23247,1,4259.9952,0.0605,0.0007,0.0022,-0.0,0.2539,-0.0,0.0,1.0,0.0,0.0037,0.0625,0.0605,23.2993,73.6543,356.3336,58.5,857.9687,501.6351,0.0181,0.117,4.3804,128.8873,865.2484,994.5704,33.9489,2515.8877,60.3128,865.5658,1456.5852,0,4,64,512,512,0,0,,,,,,,
6
+ 0.0,0,64,512,4,4.0,4,1.0,8814.9668,3657.25,9068.625,26822,1549,4838.5142,0.0469,0.0,0.0,0.0,0.2494,0.0,0.0,1.0,0.0,0.0,0.0469,0.0469,23.7327,73.9993,393.292,54.2,857.9687,464.6767,0.0106,0.1039,4.8987,138.3911,914.3701,1053.2169,47.7862,2550.4358,53.8457,914.7216,1438.4277,0,5,64,512,512,0,0,,,35.6986,,,,
7
+ 0.0,0,64,512,5,5.0,5,1.0,9775.6992,0.0,9775.6992,29538,1766,4947.5396,0.0,0.0,0.0,0.0,0.2604,0.0,0.0,1.0,0.0,0.0,0.0,0.0,23.9927,74.1619,387.3311,54.9,857.9687,470.6376,0.0092,0.07,5.5063,151.9273,971.4513,1123.7893,46.0193,2638.3422,54.8806,971.7915,1454.126,0,6,64,512,512,0,0,,,,,,,
8
+ 0.0,0,64,512,6,6.0,6,1.0,12587.3262,8771.6667,12655.5984,31728,1795,7162.703,0.0176,0.0002,0.0017,0.0,0.2611,0.0,0.0,1.0,0.0,0.0025,0.0312,0.0176,25.2853,74.2525,394.0407,54.1,857.9687,463.928,0.0091,0.1162,5.7226,225.2383,1227.1218,1452.8258,30.4456,6368.1796,12.6889,1227.4709,4896.8985,0,7,64,512,512,0,0,,,,,,,
9
+ 0.0,0,64,512,0,0.0,0,0.0,8240.916,5900.9189,8423.1895,28834,1883,4195.3896,0.0723,-0.0008,0.0172,-0.0,0.2536,-0.0,0.0,1.0,0.0,0.0101,0.0938,0.0723,25.3742,74.3453,391.6837,54.3,857.9687,466.285,0.008,0.0717,5.0261,118.7069,857.695,976.8176,30.3715,5271.8876,56.4666,858.0387,4233.5266,1,8,64,512,512,0,0,,,,,,,
10
+ 0.0,0,64,512,1,1.0,1,1.0,9040.8418,3583.0968,9392.5884,26852,1490,4625.472,0.0605,-0.0,0.0026,-0.0,0.2651,-0.0,0.0,1.0,0.0,0.0031,0.0625,0.0605,25.4032,74.2527,390.5529,54.5,857.9687,467.4158,0.0143,0.0648,5.5196,117.0133,875.2519,992.6904,30.9643,3170.1598,55.3772,875.612,2116.5253,1,9,64,512,512,0,0,,,,,,,
11
+ 0.0,0,64,512,2,2.0,2,1.0,10337.2324,4228.0,10547.0444,30710,2240,5203.9956,0.0332,0.0002,0.0055,-0.0,0.2663,-0.0,0.0,1.0,0.0,0.0015,0.0469,0.0332,25.5461,74.2853,382.0897,55.5,857.9687,475.879,0.0093,0.0713,5.7217,144.2946,997.5803,1142.3512,51.3102,2492.1314,54.8136,997.985,1289.203,1,10,64,512,512,0,0,,,33.3725,,,,
12
+ 0.0,0,64,512,3,3.0,3,1.0,9823.8594,5472.55,10192.6144,31032,1842,4939.9625,0.0781,0.0,0.0,0.0,0.2664,0.0,0.0,1.0,0.0,0.0,0.0781,0.0781,26.0123,74.701,377.9515,55.9,857.9687,480.0172,0.013,0.0713,5.5893,124.439,955.5093,1080.4547,51.6951,2944.7977,57.2244,955.9438,1801.4832,1,11,64,512,511,1,1,,,,0.015625,1.0,,
13
+ 0.0,0,64,512,4,4.0,4,1.0,9383.625,4145.0833,9641.2582,25296,1,5089.0981,0.0469,0.0,0.0,0.0,0.2653,0.0,0.0,1.0,0.0,0.0,0.0469,0.0469,25.8842,74.5652,400.3334,53.3,857.9687,457.6353,0.0091,0.0605,4.7815,144.4199,946.7012,1091.543,51.978,2861.4366,53.809,947.0622,1711.2585,1,12,55,440,439,1,1,,,,,,0.01818181818181818,1.0
training_logs/20260428_204143_metrics_a2_rl_stack_selfdoc_v2_387395.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ async/discard_rate,async/discarded_count,async/effective_batch_groups,async/effective_batch_samples,async/staleness_max,async/staleness_mean,async/staleness_min,async/staleness_ratio,generate/avg_num_tokens,generate/avg_tokens_non_zero_rewards,generate/avg_tokens_zero_rewards,generate/max_num_tokens,generate/min_num_tokens,generate/std_num_tokens,loss/avg_final_rewards,loss/avg_raw_advantages,loss/avg_raw_advantages_abs,policy/final_loss,policy/policy_entropy,policy/policy_loss,policy/policy_lr,policy/policy_update_steps,policy/ppo_clip_ratio,policy/raw_grad_norm,reward/avg_pass_at_8,reward/avg_raw_reward,system/process_rss_gb,system/process_vms_gb,system/ram_available_gb,system/ram_percent,system/ram_total_gb,system/ram_used_gb,timing/cleanup_old_checkpoints,timing/compute_advantages_and_returns,timing/convert_to_training_input,timing/fwd_logprobs_values_reward,timing/policy_train,timing/run_training,timing/save_checkpoints,timing/step,timing/sync_weights,timing/train_critic_and_policy,timing/wait_for_generation_buffer,trainer/epoch,trainer/global_step,batch_errors/total_batches,batch_errors/total_instances,batch_errors/total_successful,batch_errors/total_failed,batch_errors/total_masked
2
+ 0.0,0,64,512,5,2.1875,0,0.4375,10429.4316,0.0,10429.4316,30063,1664,5216.7129,0.0,0.0,0.0,0.0,0.2684,0.0,0.0,1.0,0.0,0.0,0.0,0.0,23.0165,73.8899,371.7734,56.7,857.9687,486.1953,11.322,0.0818,5.7184,157.0005,1013.8685,1171.3008,45.1988,5267.0618,53.0551,1014.2173,4036.9872,1,13,64,512,512,0,0
3
+ 0.0,0,64,512,1,1.0,1,1.0,11246.5156,6659.4545,11347.2295,30998,907,6076.7695,0.0215,0.0004,0.0034,0.0,0.2719,0.0,0.0,1.0,0.0,0.0047,0.0312,0.0215,24.594,74.0568,357.858,58.3,857.9687,500.1107,0.0102,0.0736,5.798,187.2758,1083.085,1270.7739,33.2045,2948.7362,59.3865,1083.4237,1612.7324,1,14,17,136,136,0,0
training_logs/20260428_204143_metrics_report.md ADDED
@@ -0,0 +1,265 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SkyRL Training Metrics Analysis
2
+
3
+ Generated from 2 log files
4
+
5
+ ## Overview
6
+
7
+ | Log File | Total Steps | Metric Blocks | Final Reward (mean) | Final Reward (max) | Total Time (s) |
8
+ |----------|-------------|---------------|---------------------|-------------------|----------------|
9
+ | a2_rl_stack_selfdoc_v2_387394 | 12 | 12 | 0.0492 | 0.0781 | 40725.3 |
10
+ | a2_rl_stack_selfdoc_v2_387395 | 14 | 2 | 0.0107 | 0.0215 | 8215.8 |
11
+
12
+ ## Async Metrics
13
+
14
+ | | Mean | Std | Min | Max | Count |
15
+ |:------------------------------|-----------:|---------:|------:|------:|--------:|
16
+ | async/discard_rate | 0 | 0 | 0 | 0 | 14 |
17
+ | async/discarded_count | 0 | 0 | 0 | 0 | 14 |
18
+ | async/effective_batch_groups | 64 | 0 | 64 | 64 | 14 |
19
+ | async/effective_batch_samples | 512 | 0 | 512 | 512 | 14 |
20
+ | async/staleness_max | 2.64286 | 1.94569 | 0 | 6 | 14 |
21
+ | async/staleness_mean | 2.44196 | 1.82505 | 0 | 6 | 14 |
22
+ | async/staleness_min | 2.28571 | 1.93862 | 0 | 6 | 14 |
23
+ | async/staleness_ratio | 0.816964 | 0.376969 | 0 | 1 | 14 |
24
+
25
+ ## Generate Metrics
26
+
27
+ | | Mean | Std | Min | Max | Count |
28
+ |:-------------------------------------|---------:|---------:|---------:|---------:|--------:|
29
+ | generate/avg_num_tokens | 9458.49 | 1395.87 | 7275.46 | 12587.3 | 14 |
30
+ | generate/avg_tokens_non_zero_rewards | 4395.88 | 2361 | 0 | 8771.67 | 14 |
31
+ | generate/avg_tokens_zero_rewards | 9639.32 | 1363.22 | 7351.16 | 12655.6 | 14 |
32
+ | generate/max_num_tokens | 28271.9 | 3149.44 | 21632 | 31728 | 14 |
33
+ | generate/min_num_tokens | 1398 | 664.724 | 1 | 2240 | 14 |
34
+ | generate/std_num_tokens | 4907.96 | 917.956 | 3419.63 | 7162.7 | 14 |
35
+
36
+ ## Loss Metrics
37
+
38
+ | | Mean | Std | Min | Max | Count |
39
+ |:----------------------------|-------------:|------------:|--------:|-------:|--------:|
40
+ | loss/avg_final_rewards | 0.0436714 | 0.026731 | 0 | 0.0781 | 14 |
41
+ | loss/avg_raw_advantages | -1.42857e-05 | 0.000382013 | -0.0008 | 0.0007 | 14 |
42
+ | loss/avg_raw_advantages_abs | 0.00384286 | 0.0049143 | 0 | 0.0172 | 14 |
43
+
44
+ ## Policy Metrics
45
+
46
+ | | Mean | Std | Min | Max | Count |
47
+ |:---------------------------|-----------:|-----------:|--------:|-------:|--------:|
48
+ | policy/final_loss | 0 | 0 | -0 | 0 | 14 |
49
+ | policy/policy_entropy | 0.256214 | 0.0142426 | 0.2188 | 0.2719 | 14 |
50
+ | policy/policy_loss | 0 | 0 | -0 | 0 | 14 |
51
+ | policy/policy_lr | 0 | 0 | 0 | 0 | 14 |
52
+ | policy/policy_update_steps | 1 | 0 | 1 | 1 | 14 |
53
+ | policy/ppo_clip_ratio | 0 | 0 | 0 | 0 | 14 |
54
+ | policy/raw_grad_norm | 0.00347143 | 0.00403607 | 0 | 0.0107 | 14 |
55
+
56
+ ## Reward Metrics
57
+
58
+ | | Mean | Std | Min | Max | Count |
59
+ |:----------------------|----------:|---------:|------:|-------:|--------:|
60
+ | reward/avg_pass_at_8 | 0.0513429 | 0.029676 | 0 | 0.0938 | 14 |
61
+ | reward/avg_raw_reward | 0.0436714 | 0.026731 | 0 | 0.0781 | 14 |
62
+
63
+ ## System Metrics
64
+
65
+ | | Mean | Std | Min | Max | Count |
66
+ |:------------------------|---------:|-------------:|---------:|---------:|--------:|
67
+ | system/process_rss_gb | 23.878 | 2.32823 | 17.0837 | 26.0123 | 14 |
68
+ | system/process_vms_gb | 74.0702 | 0.367949 | 73.4155 | 74.701 | 14 |
69
+ | system/ram_available_gb | 377.661 | 15.6622 | 356.334 | 400.333 | 14 |
70
+ | system/ram_percent | 55.9857 | 1.82456 | 53.3 | 58.5 | 14 |
71
+ | system/ram_total_gb | 857.969 | 2.35957e-13 | 857.969 | 857.969 | 14 |
72
+ | system/ram_used_gb | 480.308 | 15.6622 | 457.635 | 501.635 | 14 |
73
+
74
+ ## Timing Metrics
75
+
76
+ | | Mean | Std | Min | Max | Count |
77
+ |:--------------------------------------|-------------:|-------------:|----------:|----------:|--------:|
78
+ | timing/cleanup_old_checkpoints | 1.58788 | 4.00255 | 0.008 | 11.322 | 14 |
79
+ | timing/compute_advantages_and_returns | 0.0889357 | 0.0403961 | 0.0599 | 0.2127 | 14 |
80
+ | timing/convert_to_training_input | 5.23951 | 0.571532 | 3.9195 | 5.798 | 14 |
81
+ | timing/fwd_logprobs_values_reward | 141.538 | 32.7042 | 103.727 | 225.238 | 14 |
82
+ | timing/policy_train | 949.001 | 109.823 | 802.992 | 1227.12 | 14 |
83
+ | timing/run_training | 1090.98 | 141.718 | 907.651 | 1452.83 | 14 |
84
+ | timing/save_checkpoints | 41.2004 | 8.31398 | 30.3715 | 51.978 | 14 |
85
+ | timing/step | 3495.79 | 1337.8 | 2330.41 | 6368.18 | 14 |
86
+ | timing/sync_weights | 52.6174 | 11.6862 | 12.6889 | 60.3128 | 14 |
87
+ | timing/train_critic_and_policy | 949.354 | 109.831 | 803.305 | 1227.47 | 14 |
88
+ | timing/wait_for_generation_buffer | 2346.92 | 1300.11 | 1289.2 | 4896.9 | 14 |
89
+ | timing/save_hf_model | 34.5356 | 1.6448 | 33.3725 | 35.6986 | 2 |
90
+
91
+ ## Trainer Metrics
92
+
93
+ | | Mean | Std | Min | Max | Count |
94
+ |:--------------------|-------:|---------:|------:|------:|--------:|
95
+ | trainer/epoch | 0.5 | 0.518875 | 0 | 1 | 14 |
96
+ | trainer/global_step | 7.5 | 4.1833 | 1 | 14 | 14 |
97
+
98
+ ## Batch_Errors Metrics
99
+
100
+ | | Mean | Std | Min | Max | Count |
101
+ |:------------------------------------------------|------------:|-----------:|------------:|-------------:|--------:|
102
+ | batch_errors/total_batches | 64.5714 | 22.1558 | 17 | 128 | 14 |
103
+ | batch_errors/total_instances | 516.571 | 177.246 | 136 | 1024 | 14 |
104
+ | batch_errors/total_successful | 516.357 | 177.284 | 136 | 1024 | 14 |
105
+ | batch_errors/total_failed | 0.214286 | 0.425815 | 0 | 1 | 14 |
106
+ | batch_errors/total_masked | 0.142857 | 0.363137 | 0 | 1 | 14 |
107
+ | batch_errors/avg_DaytonaValidationError | 0.015625 | nan | 0.015625 | 0.015625 | 1 |
108
+ | batch_errors/total_DaytonaValidationError | 1 | nan | 1 | 1 | 1 |
109
+ | batch_errors/avg_DaytonaError | 0.015625 | nan | 0.015625 | 0.015625 | 1 |
110
+ | batch_errors/total_DaytonaError | 1 | nan | 1 | 1 | 1 |
111
+ | batch_errors/avg_AgentEnvironmentTimeoutError | 0.0181818 | nan | 0.0181818 | 0.0181818 | 1 |
112
+ | batch_errors/total_AgentEnvironmentTimeoutError | 1 | nan | 1 | 1 | 1 |
113
+
114
+ ## Training Progression by Log
115
+
116
+ ### a2_rl_stack_selfdoc_v2_387394
117
+
118
+ | Step | Reward | Pass@8 | KL | Loss | Step Time (s) | Gen Wait (s) |
119
+ |------|--------|--------|-----|------|---------------|-------------|
120
+ | 1 | 0.0762 | 0.0938 | 0.000000 | -0.0000 | 4905.2 | 3939.7 |
121
+ | 2 | 0.0645 | 0.0781 | 0.000000 | 0.0000 | 2330.4 | 1318.9 |
122
+ | 3 | 0.0332 | 0.0469 | 0.000000 | 0.0000 | 2676.4 | 1550.4 |
123
+ | 4 | 0.0605 | 0.0625 | 0.000000 | -0.0000 | 2515.9 | 1456.6 |
124
+ | 5 | 0.0469 | 0.0469 | 0.000000 | 0.0000 | 2550.4 | 1438.4 |
125
+ | 6 | 0.0000 | 0.0000 | 0.000000 | 0.0000 | 2638.3 | 1454.1 |
126
+ | 7 | 0.0176 | 0.0312 | 0.000000 | 0.0000 | 6368.2 | 4896.9 |
127
+ | 8 | 0.0723 | 0.0938 | 0.000000 | -0.0000 | 5271.9 | 4233.5 |
128
+ | 9 | 0.0605 | 0.0625 | 0.000000 | -0.0000 | 3170.2 | 2116.5 |
129
+ | 10 | 0.0332 | 0.0469 | 0.000000 | -0.0000 | 2492.1 | 1289.2 |
130
+ | 11 | 0.0781 | 0.0781 | 0.000000 | 0.0000 | 2944.8 | 1801.5 |
131
+ | 12 | 0.0469 | 0.0469 | 0.000000 | 0.0000 | 2861.4 | 1711.3 |
132
+
133
+ ### a2_rl_stack_selfdoc_v2_387395
134
+
135
+ | Step | Reward | Pass@8 | KL | Loss | Step Time (s) | Gen Wait (s) |
136
+ |------|--------|--------|-----|------|---------------|-------------|
137
+ | 13 | 0.0000 | 0.0000 | 0.000000 | 0.0000 | 5267.1 | 4037.0 |
138
+ | 14 | 0.0215 | 0.0312 | 0.000000 | 0.0000 | 2948.7 | 1612.7 |
139
+
140
+ ## Timing Analysis
141
+
142
+ ### Average Time Breakdown (% of step time)
143
+
144
+ | Component | Avg % of Step Time |
145
+ |-----------|-------------------|
146
+ | wait_for_generation_buffer | 63.7% |
147
+ | run_training | 34.4% |
148
+ | train_critic_and_policy | 29.9% |
149
+ | policy_train | 29.9% |
150
+ | fwd_logprobs_values_reward | 4.4% |
151
+ | sync_weights | 1.7% |
152
+ | save_hf_model | 1.4% |
153
+ | save_checkpoints | 1.3% |
154
+ | convert_to_training_input | 0.2% |
155
+ | cleanup_old_checkpoints | 0.0% |
156
+ | compute_advantages_and_returns | 0.0% |
157
+
158
+ ## Cross-Log Comparison
159
+
160
+ | Log | Avg Reward | Pass@8 | Step Time (s) | Gen Wait Time (s) | Avg Tokens | Staleness |
161
+ |-----|------|------|------|------|------|------|
162
+ | a2_rl_stack_selfdoc_v2_387394 | 0.0492 | 0.0573 | 3393.7743 | 2267.2594 | 9228.5729 | 2.5833 |
163
+ | a2_rl_stack_selfdoc_v2_387395 | 0.0107 | 0.0156 | 4107.8990 | 2824.8598 | 10837.9736 | 1.5938 |
164
+
165
+ ## vLLM Inference Engine Analysis
166
+
167
+ Metrics from vLLM stat loggers (V1LoggingStatLoggerFixed).
168
+
169
+ > **Note**: Ray deduplicates similar log messages with `[repeated Nx across cluster]`,
170
+ > so we typically capture stats from one engine per timestamp. The stats shown are
171
+ > **per-engine** values. Multiply by num_inference_engines for cluster-wide estimates.
172
+
173
+ ### Summary by Log (Per-Engine Stats)
174
+
175
+ | Log | Avg Running/Engine | Avg Waiting/Engine | Avg Gen Throughput/Engine | Avg KV Cache % | Avg Prefix Hit % |
176
+ |-----|-------------------|-------------------|--------------------------|----------------|------------------|
177
+ | a2_rl_stack_selfdoc_v2_387394 | 4.2 | 0.0 | 81.5 tok/s | 9.1% | 88.9% |
178
+ | a2_rl_stack_selfdoc_v2_387395 | 3.9 | 0.0 | 79.9 tok/s | 10.3% | 90.8% |
179
+
180
+ ### Utilization Analysis (Per-Engine)
181
+
182
+ Key indicators of inference engine utilization:
183
+
184
+ - **Running requests/engine**: Concurrent requests being processed by each engine
185
+ - **Waiting requests**: Requests queued (0 = engine not saturated, has spare capacity)
186
+ - **Generation throughput**: Decode tokens/sec per engine
187
+ - 8B model on H100 can do **1000+ tok/s** when saturated
188
+ - If seeing <300 tok/s with 0 waiting, engine is **starved for requests**
189
+
190
+ #### a2_rl_stack_selfdoc_v2_387394
191
+
192
+ - **Running requests/engine**: avg=4.2, max=16
193
+ - **Waiting requests**: avg=0.0, max=0
194
+ - **Generation throughput/engine**: avg=81.5 tok/s, max=297.7 tok/s
195
+ - **KV cache usage**: avg=9.1%
196
+ - **Prefix cache hit rate**: avg=88.9%
197
+ - ⚠️ **Underutilized**: Engines starved for requests (0 waiting, avg 4.2 running)
198
+ - Bottleneck is likely upstream (environment execution, not inference)
199
+
200
+ #### a2_rl_stack_selfdoc_v2_387395
201
+
202
+ - **Running requests/engine**: avg=3.9, max=13
203
+ - **Waiting requests**: avg=0.0, max=0
204
+ - **Generation throughput/engine**: avg=79.9 tok/s, max=283.3 tok/s
205
+ - **KV cache usage**: avg=10.3%
206
+ - **Prefix cache hit rate**: avg=90.8%
207
+ - ⚠️ **Underutilized**: Engines starved for requests (0 waiting, avg 3.9 running)
208
+ - Bottleneck is likely upstream (environment execution, not inference)
209
+
210
+ ## Trial-Level Analysis (from result.json)
211
+
212
+ Total trials parsed: 7891
213
+
214
+ ### Turn Count Statistics
215
+
216
+ | Metric | Value |
217
+ |--------|-------|
218
+ | Mean | 6.2 |
219
+ | Median | 5.0 |
220
+ | Std | 3.9 |
221
+ | Min | 1 |
222
+ | Max | 36 |
223
+ | Count | 7891 |
224
+
225
+ ### Exception Distribution
226
+
227
+ | Exception Type | Count | % |
228
+ |---------------|-------|---|
229
+ | No exception | 7768 | 98.4% |
230
+ | AgentTimeoutError | 81 | 1.0% |
231
+ | ContextLengthExceededError | 28 | 0.4% |
232
+ | InternalServerError | 7 | 0.1% |
233
+ | CancelledError | 3 | 0.0% |
234
+ | RuntimeError | 1 | 0.0% |
235
+ | DaytonaValidationError | 1 | 0.0% |
236
+ | AgentEnvironmentTimeoutError | 1 | 0.0% |
237
+ | DaytonaError | 1 | 0.0% |
238
+
239
+ ### Turn Count by Exception Type
240
+
241
+ | Exception Type | Mean Turns | Median Turns | Count |
242
+ |---------------|-----------|-------------|-------|
243
+ | ContextLengthExceededError | 19.8 | 19.5 | 28 |
244
+ | DaytonaError | 14.0 | 14.0 | 1 |
245
+ | AgentTimeoutError | 13.3 | 14.0 | 81 |
246
+ | DaytonaValidationError | 10.0 | 10.0 | 1 |
247
+ | InternalServerError | 7.4 | 7.0 | 7 |
248
+ | CancelledError | 7.0 | 1.0 | 3 |
249
+ | No exception | 6.0 | 5.0 | 7768 |
250
+ | RuntimeError | 6.0 | 6.0 | 1 |
251
+ | AgentEnvironmentTimeoutError | 3.0 | 3.0 | 1 |
252
+
253
+ ### Turn Count by Outcome
254
+
255
+ | Outcome | Mean Turns | Median Turns | Count |
256
+ |---------|-----------|-------------|-------|
257
+ | Success | 2.8 | 2.0 | 313 |
258
+ | Failure | 6.3 | 6.0 | 7564 |
259
+
260
+ ### Reward Summary
261
+
262
+ - Mean reward: 0.0397
263
+ - Success rate: 4.0%
264
+ - Trials with reward data: 7877
265
+
training_logs/20260428_204143_metrics_table.csv ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ log_file,async/discard_rate,async/discarded_count,async/effective_batch_groups,async/effective_batch_samples,async/staleness_max,async/staleness_mean,async/staleness_min,async/staleness_ratio,generate/avg_num_tokens,generate/avg_tokens_non_zero_rewards,generate/avg_tokens_zero_rewards,generate/max_num_tokens,generate/min_num_tokens,generate/std_num_tokens,loss/avg_final_rewards,loss/avg_raw_advantages,loss/avg_raw_advantages_abs,policy/final_loss,policy/policy_entropy,policy/policy_loss,policy/policy_lr,policy/policy_update_steps,policy/ppo_clip_ratio,policy/raw_grad_norm,reward/avg_pass_at_8,reward/avg_raw_reward,system/process_rss_gb,system/process_vms_gb,system/ram_available_gb,system/ram_percent,system/ram_total_gb,system/ram_used_gb,timing/cleanup_old_checkpoints,timing/compute_advantages_and_returns,timing/convert_to_training_input,timing/fwd_logprobs_values_reward,timing/policy_train,timing/run_training,timing/save_checkpoints,timing/step,timing/sync_weights,timing/train_critic_and_policy,timing/wait_for_generation_buffer,trainer/epoch,trainer/global_step,batch_errors/total_batches,batch_errors/total_instances,batch_errors/total_successful,batch_errors/total_failed,batch_errors/total_masked,batch_errors/avg_DaytonaValidationError,batch_errors/total_DaytonaValidationError,timing/save_hf_model,batch_errors/avg_DaytonaError,batch_errors/total_DaytonaError,batch_errors/avg_AgentEnvironmentTimeoutError,batch_errors/total_AgentEnvironmentTimeoutError,global_step
2
+ a2_rl_stack_selfdoc_v2_387394,0.0,0,64,512,0,0.0,0,0.0,7275.4629,6357.3333,7351.1649,21632,1649,3419.6317,0.0762,-0.0004,0.0106,-0.0,0.2188,-0.0,0.0,1.0,0.0,0.0103,0.0938,0.0762,17.0837,73.4155,363.0786,57.7,857.9687,494.8901,10.7412,0.2127,3.9195,104.1328,802.9923,907.6513,43.3412,4905.1695,53.8667,803.3054,3939.7318,0,1,128,1024,1024,0,0,,,,,,,,1
3
+ a2_rl_stack_selfdoc_v2_387394,0.0,0,64,512,1,1.0,1,1.0,7957.168,4530.8788,8193.2171,27882,1407,3880.4011,0.0645,0.0001,0.0054,0.0,0.2389,0.0,0.0,1.0,0.0,0.0107,0.0781,0.0645,21.7723,73.6329,360.9631,57.9,857.9687,497.0056,0.0153,0.0599,5.1948,103.7265,845.794,949.9143,43.9428,2330.4149,56.3595,846.1277,1318.9172,0,2,64,512,512,0,0,,,,,,,,2
4
+ a2_rl_stack_selfdoc_v2_387394,0.0,0,64,512,2,2.0,2,1.0,9141.6328,3483.3529,9335.9576,31173,1378,4855.3059,0.0332,-0.0006,0.0052,0.0,0.2475,0.0,0.0,1.0,0.0,0.002,0.0469,0.0332,23.295,73.7708,359.9666,58.0,857.9687,498.0021,0.0409,0.0704,5.5762,136.0789,929.3459,1065.8459,36.5989,2676.449,54.5569,929.6963,1550.4301,0,3,64,512,511,1,0,0.015625,1.0,,,,,,3
5
+ a2_rl_stack_selfdoc_v2_387394,0.0,0,64,512,3,3.0,3,1.0,8364.1445,4752.7097,8596.8981,23247,1,4259.9952,0.0605,0.0007,0.0022,-0.0,0.2539,-0.0,0.0,1.0,0.0,0.0037,0.0625,0.0605,23.2993,73.6543,356.3336,58.5,857.9687,501.6351,0.0181,0.117,4.3804,128.8873,865.2484,994.5704,33.9489,2515.8877,60.3128,865.5658,1456.5852,0,4,64,512,512,0,0,,,,,,,,4
6
+ a2_rl_stack_selfdoc_v2_387394,0.0,0,64,512,4,4.0,4,1.0,8814.9668,3657.25,9068.625,26822,1549,4838.5142,0.0469,0.0,0.0,0.0,0.2494,0.0,0.0,1.0,0.0,0.0,0.0469,0.0469,23.7327,73.9993,393.292,54.2,857.9687,464.6767,0.0106,0.1039,4.8987,138.3911,914.3701,1053.2169,47.7862,2550.4358,53.8457,914.7216,1438.4277,0,5,64,512,512,0,0,,,35.6986,,,,,5
7
+ a2_rl_stack_selfdoc_v2_387394,0.0,0,64,512,5,5.0,5,1.0,9775.6992,0.0,9775.6992,29538,1766,4947.5396,0.0,0.0,0.0,0.0,0.2604,0.0,0.0,1.0,0.0,0.0,0.0,0.0,23.9927,74.1619,387.3311,54.9,857.9687,470.6376,0.0092,0.07,5.5063,151.9273,971.4513,1123.7893,46.0193,2638.3422,54.8806,971.7915,1454.126,0,6,64,512,512,0,0,,,,,,,,6
8
+ a2_rl_stack_selfdoc_v2_387394,0.0,0,64,512,6,6.0,6,1.0,12587.3262,8771.6667,12655.5984,31728,1795,7162.703,0.0176,0.0002,0.0017,0.0,0.2611,0.0,0.0,1.0,0.0,0.0025,0.0312,0.0176,25.2853,74.2525,394.0407,54.1,857.9687,463.928,0.0091,0.1162,5.7226,225.2383,1227.1218,1452.8258,30.4456,6368.1796,12.6889,1227.4709,4896.8985,0,7,64,512,512,0,0,,,,,,,,7
9
+ a2_rl_stack_selfdoc_v2_387394,0.0,0,64,512,0,0.0,0,0.0,8240.916,5900.9189,8423.1895,28834,1883,4195.3896,0.0723,-0.0008,0.0172,-0.0,0.2536,-0.0,0.0,1.0,0.0,0.0101,0.0938,0.0723,25.3742,74.3453,391.6837,54.3,857.9687,466.285,0.008,0.0717,5.0261,118.7069,857.695,976.8176,30.3715,5271.8876,56.4666,858.0387,4233.5266,1,8,64,512,512,0,0,,,,,,,,8
10
+ a2_rl_stack_selfdoc_v2_387394,0.0,0,64,512,1,1.0,1,1.0,9040.8418,3583.0968,9392.5884,26852,1490,4625.472,0.0605,-0.0,0.0026,-0.0,0.2651,-0.0,0.0,1.0,0.0,0.0031,0.0625,0.0605,25.4032,74.2527,390.5529,54.5,857.9687,467.4158,0.0143,0.0648,5.5196,117.0133,875.2519,992.6904,30.9643,3170.1598,55.3772,875.612,2116.5253,1,9,64,512,512,0,0,,,,,,,,9
11
+ a2_rl_stack_selfdoc_v2_387394,0.0,0,64,512,2,2.0,2,1.0,10337.2324,4228.0,10547.0444,30710,2240,5203.9956,0.0332,0.0002,0.0055,-0.0,0.2663,-0.0,0.0,1.0,0.0,0.0015,0.0469,0.0332,25.5461,74.2853,382.0897,55.5,857.9687,475.879,0.0093,0.0713,5.7217,144.2946,997.5803,1142.3512,51.3102,2492.1314,54.8136,997.985,1289.203,1,10,64,512,512,0,0,,,33.3725,,,,,10
12
+ a2_rl_stack_selfdoc_v2_387394,0.0,0,64,512,3,3.0,3,1.0,9823.8594,5472.55,10192.6144,31032,1842,4939.9625,0.0781,0.0,0.0,0.0,0.2664,0.0,0.0,1.0,0.0,0.0,0.0781,0.0781,26.0123,74.701,377.9515,55.9,857.9687,480.0172,0.013,0.0713,5.5893,124.439,955.5093,1080.4547,51.6951,2944.7977,57.2244,955.9438,1801.4832,1,11,64,512,511,1,1,,,,0.015625,1.0,,,11
13
+ a2_rl_stack_selfdoc_v2_387394,0.0,0,64,512,4,4.0,4,1.0,9383.625,4145.0833,9641.2582,25296,1,5089.0981,0.0469,0.0,0.0,0.0,0.2653,0.0,0.0,1.0,0.0,0.0,0.0469,0.0469,25.8842,74.5652,400.3334,53.3,857.9687,457.6353,0.0091,0.0605,4.7815,144.4199,946.7012,1091.543,51.978,2861.4366,53.809,947.0622,1711.2585,1,12,55,440,439,1,1,,,,,,0.01818181818181818,1.0,12
14
+ a2_rl_stack_selfdoc_v2_387395,0.0,0,64,512,5,2.1875,0,0.4375,10429.4316,0.0,10429.4316,30063,1664,5216.7129,0.0,0.0,0.0,0.0,0.2684,0.0,0.0,1.0,0.0,0.0,0.0,0.0,23.0165,73.8899,371.7734,56.7,857.9687,486.1953,11.322,0.0818,5.7184,157.0005,1013.8685,1171.3008,45.1988,5267.0618,53.0551,1014.2173,4036.9872,1,13,64,512,512,0,0,,,,,,,,13
15
+ a2_rl_stack_selfdoc_v2_387395,0.0,0,64,512,1,1.0,1,1.0,11246.5156,6659.4545,11347.2295,30998,907,6076.7695,0.0215,0.0004,0.0034,0.0,0.2719,0.0,0.0,1.0,0.0,0.0047,0.0312,0.0215,24.594,74.0568,357.858,58.3,857.9687,500.1107,0.0102,0.0736,5.798,187.2758,1083.085,1270.7739,33.2045,2948.7362,59.3865,1083.4237,1612.7324,1,14,17,136,136,0,0,,,,,,,,14
training_logs/20260428_204143_reward_vs_steps.png ADDED

Git LFS Details

  • SHA256: 441690153c58c231c0de8de8262b2dd34fdb6c488a75dc2405b4a7153fd762a4
  • Pointer size: 131 Bytes
  • Size of remote file: 159 kB
training_logs/20260428_204143_trial_results.csv ADDED
The diff for this file is too large to render. See raw diff
 
training_logs/20260428_204143_turn_count_distribution.png ADDED

Git LFS Details

  • SHA256: f8caca01e5cf0babffb4b4acb54b05bd7b3341b84b2e760d019e516b21587cea
  • Pointer size: 131 Bytes
  • Size of remote file: 108 kB
training_logs/20260428_204143_vllm_metrics_a2_rl_stack_selfdoc_v2_387394.csv ADDED
The diff for this file is too large to render. See raw diff
 
training_logs/20260428_204143_vllm_metrics_a2_rl_stack_selfdoc_v2_387395.csv ADDED
The diff for this file is too large to render. See raw diff
 
training_logs/20260428_204143_vllm_metrics_table.csv ADDED
The diff for this file is too large to render. See raw diff