Add parsed training metrics and plots
Browse files- .gitattributes +2 -0
- training_logs/20260428_204143_metrics_a2_rl_stack_selfdoc_v2_387394.csv +13 -0
- training_logs/20260428_204143_metrics_a2_rl_stack_selfdoc_v2_387395.csv +3 -0
- training_logs/20260428_204143_metrics_report.md +265 -0
- training_logs/20260428_204143_metrics_table.csv +15 -0
- training_logs/20260428_204143_reward_vs_steps.png +3 -0
- training_logs/20260428_204143_trial_results.csv +0 -0
- training_logs/20260428_204143_turn_count_distribution.png +3 -0
- training_logs/20260428_204143_vllm_metrics_a2_rl_stack_selfdoc_v2_387394.csv +0 -0
- training_logs/20260428_204143_vllm_metrics_a2_rl_stack_selfdoc_v2_387395.csv +0 -0
- training_logs/20260428_204143_vllm_metrics_table.csv +0 -0
.gitattributes
CHANGED
|
@@ -34,3 +34,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
training_logs/20260428_204143_reward_vs_steps.png filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
training_logs/20260428_204143_turn_count_distribution.png filter=lfs diff=lfs merge=lfs -text
|
training_logs/20260428_204143_metrics_a2_rl_stack_selfdoc_v2_387394.csv
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
async/discard_rate,async/discarded_count,async/effective_batch_groups,async/effective_batch_samples,async/staleness_max,async/staleness_mean,async/staleness_min,async/staleness_ratio,generate/avg_num_tokens,generate/avg_tokens_non_zero_rewards,generate/avg_tokens_zero_rewards,generate/max_num_tokens,generate/min_num_tokens,generate/std_num_tokens,loss/avg_final_rewards,loss/avg_raw_advantages,loss/avg_raw_advantages_abs,policy/final_loss,policy/policy_entropy,policy/policy_loss,policy/policy_lr,policy/policy_update_steps,policy/ppo_clip_ratio,policy/raw_grad_norm,reward/avg_pass_at_8,reward/avg_raw_reward,system/process_rss_gb,system/process_vms_gb,system/ram_available_gb,system/ram_percent,system/ram_total_gb,system/ram_used_gb,timing/cleanup_old_checkpoints,timing/compute_advantages_and_returns,timing/convert_to_training_input,timing/fwd_logprobs_values_reward,timing/policy_train,timing/run_training,timing/save_checkpoints,timing/step,timing/sync_weights,timing/train_critic_and_policy,timing/wait_for_generation_buffer,trainer/epoch,trainer/global_step,batch_errors/total_batches,batch_errors/total_instances,batch_errors/total_successful,batch_errors/total_failed,batch_errors/total_masked,batch_errors/avg_DaytonaValidationError,batch_errors/total_DaytonaValidationError,timing/save_hf_model,batch_errors/avg_DaytonaError,batch_errors/total_DaytonaError,batch_errors/avg_AgentEnvironmentTimeoutError,batch_errors/total_AgentEnvironmentTimeoutError
|
| 2 |
+
0.0,0,64,512,0,0.0,0,0.0,7275.4629,6357.3333,7351.1649,21632,1649,3419.6317,0.0762,-0.0004,0.0106,-0.0,0.2188,-0.0,0.0,1.0,0.0,0.0103,0.0938,0.0762,17.0837,73.4155,363.0786,57.7,857.9687,494.8901,10.7412,0.2127,3.9195,104.1328,802.9923,907.6513,43.3412,4905.1695,53.8667,803.3054,3939.7318,0,1,128,1024,1024,0,0,,,,,,,
|
| 3 |
+
0.0,0,64,512,1,1.0,1,1.0,7957.168,4530.8788,8193.2171,27882,1407,3880.4011,0.0645,0.0001,0.0054,0.0,0.2389,0.0,0.0,1.0,0.0,0.0107,0.0781,0.0645,21.7723,73.6329,360.9631,57.9,857.9687,497.0056,0.0153,0.0599,5.1948,103.7265,845.794,949.9143,43.9428,2330.4149,56.3595,846.1277,1318.9172,0,2,64,512,512,0,0,,,,,,,
|
| 4 |
+
0.0,0,64,512,2,2.0,2,1.0,9141.6328,3483.3529,9335.9576,31173,1378,4855.3059,0.0332,-0.0006,0.0052,0.0,0.2475,0.0,0.0,1.0,0.0,0.002,0.0469,0.0332,23.295,73.7708,359.9666,58.0,857.9687,498.0021,0.0409,0.0704,5.5762,136.0789,929.3459,1065.8459,36.5989,2676.449,54.5569,929.6963,1550.4301,0,3,64,512,511,1,0,0.015625,1.0,,,,,
|
| 5 |
+
0.0,0,64,512,3,3.0,3,1.0,8364.1445,4752.7097,8596.8981,23247,1,4259.9952,0.0605,0.0007,0.0022,-0.0,0.2539,-0.0,0.0,1.0,0.0,0.0037,0.0625,0.0605,23.2993,73.6543,356.3336,58.5,857.9687,501.6351,0.0181,0.117,4.3804,128.8873,865.2484,994.5704,33.9489,2515.8877,60.3128,865.5658,1456.5852,0,4,64,512,512,0,0,,,,,,,
|
| 6 |
+
0.0,0,64,512,4,4.0,4,1.0,8814.9668,3657.25,9068.625,26822,1549,4838.5142,0.0469,0.0,0.0,0.0,0.2494,0.0,0.0,1.0,0.0,0.0,0.0469,0.0469,23.7327,73.9993,393.292,54.2,857.9687,464.6767,0.0106,0.1039,4.8987,138.3911,914.3701,1053.2169,47.7862,2550.4358,53.8457,914.7216,1438.4277,0,5,64,512,512,0,0,,,35.6986,,,,
|
| 7 |
+
0.0,0,64,512,5,5.0,5,1.0,9775.6992,0.0,9775.6992,29538,1766,4947.5396,0.0,0.0,0.0,0.0,0.2604,0.0,0.0,1.0,0.0,0.0,0.0,0.0,23.9927,74.1619,387.3311,54.9,857.9687,470.6376,0.0092,0.07,5.5063,151.9273,971.4513,1123.7893,46.0193,2638.3422,54.8806,971.7915,1454.126,0,6,64,512,512,0,0,,,,,,,
|
| 8 |
+
0.0,0,64,512,6,6.0,6,1.0,12587.3262,8771.6667,12655.5984,31728,1795,7162.703,0.0176,0.0002,0.0017,0.0,0.2611,0.0,0.0,1.0,0.0,0.0025,0.0312,0.0176,25.2853,74.2525,394.0407,54.1,857.9687,463.928,0.0091,0.1162,5.7226,225.2383,1227.1218,1452.8258,30.4456,6368.1796,12.6889,1227.4709,4896.8985,0,7,64,512,512,0,0,,,,,,,
|
| 9 |
+
0.0,0,64,512,0,0.0,0,0.0,8240.916,5900.9189,8423.1895,28834,1883,4195.3896,0.0723,-0.0008,0.0172,-0.0,0.2536,-0.0,0.0,1.0,0.0,0.0101,0.0938,0.0723,25.3742,74.3453,391.6837,54.3,857.9687,466.285,0.008,0.0717,5.0261,118.7069,857.695,976.8176,30.3715,5271.8876,56.4666,858.0387,4233.5266,1,8,64,512,512,0,0,,,,,,,
|
| 10 |
+
0.0,0,64,512,1,1.0,1,1.0,9040.8418,3583.0968,9392.5884,26852,1490,4625.472,0.0605,-0.0,0.0026,-0.0,0.2651,-0.0,0.0,1.0,0.0,0.0031,0.0625,0.0605,25.4032,74.2527,390.5529,54.5,857.9687,467.4158,0.0143,0.0648,5.5196,117.0133,875.2519,992.6904,30.9643,3170.1598,55.3772,875.612,2116.5253,1,9,64,512,512,0,0,,,,,,,
|
| 11 |
+
0.0,0,64,512,2,2.0,2,1.0,10337.2324,4228.0,10547.0444,30710,2240,5203.9956,0.0332,0.0002,0.0055,-0.0,0.2663,-0.0,0.0,1.0,0.0,0.0015,0.0469,0.0332,25.5461,74.2853,382.0897,55.5,857.9687,475.879,0.0093,0.0713,5.7217,144.2946,997.5803,1142.3512,51.3102,2492.1314,54.8136,997.985,1289.203,1,10,64,512,512,0,0,,,33.3725,,,,
|
| 12 |
+
0.0,0,64,512,3,3.0,3,1.0,9823.8594,5472.55,10192.6144,31032,1842,4939.9625,0.0781,0.0,0.0,0.0,0.2664,0.0,0.0,1.0,0.0,0.0,0.0781,0.0781,26.0123,74.701,377.9515,55.9,857.9687,480.0172,0.013,0.0713,5.5893,124.439,955.5093,1080.4547,51.6951,2944.7977,57.2244,955.9438,1801.4832,1,11,64,512,511,1,1,,,,0.015625,1.0,,
|
| 13 |
+
0.0,0,64,512,4,4.0,4,1.0,9383.625,4145.0833,9641.2582,25296,1,5089.0981,0.0469,0.0,0.0,0.0,0.2653,0.0,0.0,1.0,0.0,0.0,0.0469,0.0469,25.8842,74.5652,400.3334,53.3,857.9687,457.6353,0.0091,0.0605,4.7815,144.4199,946.7012,1091.543,51.978,2861.4366,53.809,947.0622,1711.2585,1,12,55,440,439,1,1,,,,,,0.01818181818181818,1.0
|
training_logs/20260428_204143_metrics_a2_rl_stack_selfdoc_v2_387395.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
async/discard_rate,async/discarded_count,async/effective_batch_groups,async/effective_batch_samples,async/staleness_max,async/staleness_mean,async/staleness_min,async/staleness_ratio,generate/avg_num_tokens,generate/avg_tokens_non_zero_rewards,generate/avg_tokens_zero_rewards,generate/max_num_tokens,generate/min_num_tokens,generate/std_num_tokens,loss/avg_final_rewards,loss/avg_raw_advantages,loss/avg_raw_advantages_abs,policy/final_loss,policy/policy_entropy,policy/policy_loss,policy/policy_lr,policy/policy_update_steps,policy/ppo_clip_ratio,policy/raw_grad_norm,reward/avg_pass_at_8,reward/avg_raw_reward,system/process_rss_gb,system/process_vms_gb,system/ram_available_gb,system/ram_percent,system/ram_total_gb,system/ram_used_gb,timing/cleanup_old_checkpoints,timing/compute_advantages_and_returns,timing/convert_to_training_input,timing/fwd_logprobs_values_reward,timing/policy_train,timing/run_training,timing/save_checkpoints,timing/step,timing/sync_weights,timing/train_critic_and_policy,timing/wait_for_generation_buffer,trainer/epoch,trainer/global_step,batch_errors/total_batches,batch_errors/total_instances,batch_errors/total_successful,batch_errors/total_failed,batch_errors/total_masked
|
| 2 |
+
0.0,0,64,512,5,2.1875,0,0.4375,10429.4316,0.0,10429.4316,30063,1664,5216.7129,0.0,0.0,0.0,0.0,0.2684,0.0,0.0,1.0,0.0,0.0,0.0,0.0,23.0165,73.8899,371.7734,56.7,857.9687,486.1953,11.322,0.0818,5.7184,157.0005,1013.8685,1171.3008,45.1988,5267.0618,53.0551,1014.2173,4036.9872,1,13,64,512,512,0,0
|
| 3 |
+
0.0,0,64,512,1,1.0,1,1.0,11246.5156,6659.4545,11347.2295,30998,907,6076.7695,0.0215,0.0004,0.0034,0.0,0.2719,0.0,0.0,1.0,0.0,0.0047,0.0312,0.0215,24.594,74.0568,357.858,58.3,857.9687,500.1107,0.0102,0.0736,5.798,187.2758,1083.085,1270.7739,33.2045,2948.7362,59.3865,1083.4237,1612.7324,1,14,17,136,136,0,0
|
training_logs/20260428_204143_metrics_report.md
ADDED
|
@@ -0,0 +1,265 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SkyRL Training Metrics Analysis
|
| 2 |
+
|
| 3 |
+
Generated from 2 log files
|
| 4 |
+
|
| 5 |
+
## Overview
|
| 6 |
+
|
| 7 |
+
| Log File | Total Steps | Metric Blocks | Final Reward (mean) | Final Reward (max) | Total Time (s) |
|
| 8 |
+
|----------|-------------|---------------|---------------------|-------------------|----------------|
|
| 9 |
+
| a2_rl_stack_selfdoc_v2_387394 | 12 | 12 | 0.0492 | 0.0781 | 40725.3 |
|
| 10 |
+
| a2_rl_stack_selfdoc_v2_387395 | 14 | 2 | 0.0107 | 0.0215 | 8215.8 |
|
| 11 |
+
|
| 12 |
+
## Async Metrics
|
| 13 |
+
|
| 14 |
+
| | Mean | Std | Min | Max | Count |
|
| 15 |
+
|:------------------------------|-----------:|---------:|------:|------:|--------:|
|
| 16 |
+
| async/discard_rate | 0 | 0 | 0 | 0 | 14 |
|
| 17 |
+
| async/discarded_count | 0 | 0 | 0 | 0 | 14 |
|
| 18 |
+
| async/effective_batch_groups | 64 | 0 | 64 | 64 | 14 |
|
| 19 |
+
| async/effective_batch_samples | 512 | 0 | 512 | 512 | 14 |
|
| 20 |
+
| async/staleness_max | 2.64286 | 1.94569 | 0 | 6 | 14 |
|
| 21 |
+
| async/staleness_mean | 2.44196 | 1.82505 | 0 | 6 | 14 |
|
| 22 |
+
| async/staleness_min | 2.28571 | 1.93862 | 0 | 6 | 14 |
|
| 23 |
+
| async/staleness_ratio | 0.816964 | 0.376969 | 0 | 1 | 14 |
|
| 24 |
+
|
| 25 |
+
## Generate Metrics
|
| 26 |
+
|
| 27 |
+
| | Mean | Std | Min | Max | Count |
|
| 28 |
+
|:-------------------------------------|---------:|---------:|---------:|---------:|--------:|
|
| 29 |
+
| generate/avg_num_tokens | 9458.49 | 1395.87 | 7275.46 | 12587.3 | 14 |
|
| 30 |
+
| generate/avg_tokens_non_zero_rewards | 4395.88 | 2361 | 0 | 8771.67 | 14 |
|
| 31 |
+
| generate/avg_tokens_zero_rewards | 9639.32 | 1363.22 | 7351.16 | 12655.6 | 14 |
|
| 32 |
+
| generate/max_num_tokens | 28271.9 | 3149.44 | 21632 | 31728 | 14 |
|
| 33 |
+
| generate/min_num_tokens | 1398 | 664.724 | 1 | 2240 | 14 |
|
| 34 |
+
| generate/std_num_tokens | 4907.96 | 917.956 | 3419.63 | 7162.7 | 14 |
|
| 35 |
+
|
| 36 |
+
## Loss Metrics
|
| 37 |
+
|
| 38 |
+
| | Mean | Std | Min | Max | Count |
|
| 39 |
+
|:----------------------------|-------------:|------------:|--------:|-------:|--------:|
|
| 40 |
+
| loss/avg_final_rewards | 0.0436714 | 0.026731 | 0 | 0.0781 | 14 |
|
| 41 |
+
| loss/avg_raw_advantages | -1.42857e-05 | 0.000382013 | -0.0008 | 0.0007 | 14 |
|
| 42 |
+
| loss/avg_raw_advantages_abs | 0.00384286 | 0.0049143 | 0 | 0.0172 | 14 |
|
| 43 |
+
|
| 44 |
+
## Policy Metrics
|
| 45 |
+
|
| 46 |
+
| | Mean | Std | Min | Max | Count |
|
| 47 |
+
|:---------------------------|-----------:|-----------:|--------:|-------:|--------:|
|
| 48 |
+
| policy/final_loss | 0 | 0 | -0 | 0 | 14 |
|
| 49 |
+
| policy/policy_entropy | 0.256214 | 0.0142426 | 0.2188 | 0.2719 | 14 |
|
| 50 |
+
| policy/policy_loss | 0 | 0 | -0 | 0 | 14 |
|
| 51 |
+
| policy/policy_lr | 0 | 0 | 0 | 0 | 14 |
|
| 52 |
+
| policy/policy_update_steps | 1 | 0 | 1 | 1 | 14 |
|
| 53 |
+
| policy/ppo_clip_ratio | 0 | 0 | 0 | 0 | 14 |
|
| 54 |
+
| policy/raw_grad_norm | 0.00347143 | 0.00403607 | 0 | 0.0107 | 14 |
|
| 55 |
+
|
| 56 |
+
## Reward Metrics
|
| 57 |
+
|
| 58 |
+
| | Mean | Std | Min | Max | Count |
|
| 59 |
+
|:----------------------|----------:|---------:|------:|-------:|--------:|
|
| 60 |
+
| reward/avg_pass_at_8 | 0.0513429 | 0.029676 | 0 | 0.0938 | 14 |
|
| 61 |
+
| reward/avg_raw_reward | 0.0436714 | 0.026731 | 0 | 0.0781 | 14 |
|
| 62 |
+
|
| 63 |
+
## System Metrics
|
| 64 |
+
|
| 65 |
+
| | Mean | Std | Min | Max | Count |
|
| 66 |
+
|:------------------------|---------:|-------------:|---------:|---------:|--------:|
|
| 67 |
+
| system/process_rss_gb | 23.878 | 2.32823 | 17.0837 | 26.0123 | 14 |
|
| 68 |
+
| system/process_vms_gb | 74.0702 | 0.367949 | 73.4155 | 74.701 | 14 |
|
| 69 |
+
| system/ram_available_gb | 377.661 | 15.6622 | 356.334 | 400.333 | 14 |
|
| 70 |
+
| system/ram_percent | 55.9857 | 1.82456 | 53.3 | 58.5 | 14 |
|
| 71 |
+
| system/ram_total_gb | 857.969 | 2.35957e-13 | 857.969 | 857.969 | 14 |
|
| 72 |
+
| system/ram_used_gb | 480.308 | 15.6622 | 457.635 | 501.635 | 14 |
|
| 73 |
+
|
| 74 |
+
## Timing Metrics
|
| 75 |
+
|
| 76 |
+
| | Mean | Std | Min | Max | Count |
|
| 77 |
+
|:--------------------------------------|-------------:|-------------:|----------:|----------:|--------:|
|
| 78 |
+
| timing/cleanup_old_checkpoints | 1.58788 | 4.00255 | 0.008 | 11.322 | 14 |
|
| 79 |
+
| timing/compute_advantages_and_returns | 0.0889357 | 0.0403961 | 0.0599 | 0.2127 | 14 |
|
| 80 |
+
| timing/convert_to_training_input | 5.23951 | 0.571532 | 3.9195 | 5.798 | 14 |
|
| 81 |
+
| timing/fwd_logprobs_values_reward | 141.538 | 32.7042 | 103.727 | 225.238 | 14 |
|
| 82 |
+
| timing/policy_train | 949.001 | 109.823 | 802.992 | 1227.12 | 14 |
|
| 83 |
+
| timing/run_training | 1090.98 | 141.718 | 907.651 | 1452.83 | 14 |
|
| 84 |
+
| timing/save_checkpoints | 41.2004 | 8.31398 | 30.3715 | 51.978 | 14 |
|
| 85 |
+
| timing/step | 3495.79 | 1337.8 | 2330.41 | 6368.18 | 14 |
|
| 86 |
+
| timing/sync_weights | 52.6174 | 11.6862 | 12.6889 | 60.3128 | 14 |
|
| 87 |
+
| timing/train_critic_and_policy | 949.354 | 109.831 | 803.305 | 1227.47 | 14 |
|
| 88 |
+
| timing/wait_for_generation_buffer | 2346.92 | 1300.11 | 1289.2 | 4896.9 | 14 |
|
| 89 |
+
| timing/save_hf_model | 34.5356 | 1.6448 | 33.3725 | 35.6986 | 2 |
|
| 90 |
+
|
| 91 |
+
## Trainer Metrics
|
| 92 |
+
|
| 93 |
+
| | Mean | Std | Min | Max | Count |
|
| 94 |
+
|:--------------------|-------:|---------:|------:|------:|--------:|
|
| 95 |
+
| trainer/epoch | 0.5 | 0.518875 | 0 | 1 | 14 |
|
| 96 |
+
| trainer/global_step | 7.5 | 4.1833 | 1 | 14 | 14 |
|
| 97 |
+
|
| 98 |
+
## Batch_Errors Metrics
|
| 99 |
+
|
| 100 |
+
| | Mean | Std | Min | Max | Count |
|
| 101 |
+
|:------------------------------------------------|------------:|-----------:|------------:|-------------:|--------:|
|
| 102 |
+
| batch_errors/total_batches | 64.5714 | 22.1558 | 17 | 128 | 14 |
|
| 103 |
+
| batch_errors/total_instances | 516.571 | 177.246 | 136 | 1024 | 14 |
|
| 104 |
+
| batch_errors/total_successful | 516.357 | 177.284 | 136 | 1024 | 14 |
|
| 105 |
+
| batch_errors/total_failed | 0.214286 | 0.425815 | 0 | 1 | 14 |
|
| 106 |
+
| batch_errors/total_masked | 0.142857 | 0.363137 | 0 | 1 | 14 |
|
| 107 |
+
| batch_errors/avg_DaytonaValidationError | 0.015625 | nan | 0.015625 | 0.015625 | 1 |
|
| 108 |
+
| batch_errors/total_DaytonaValidationError | 1 | nan | 1 | 1 | 1 |
|
| 109 |
+
| batch_errors/avg_DaytonaError | 0.015625 | nan | 0.015625 | 0.015625 | 1 |
|
| 110 |
+
| batch_errors/total_DaytonaError | 1 | nan | 1 | 1 | 1 |
|
| 111 |
+
| batch_errors/avg_AgentEnvironmentTimeoutError | 0.0181818 | nan | 0.0181818 | 0.0181818 | 1 |
|
| 112 |
+
| batch_errors/total_AgentEnvironmentTimeoutError | 1 | nan | 1 | 1 | 1 |
|
| 113 |
+
|
| 114 |
+
## Training Progression by Log
|
| 115 |
+
|
| 116 |
+
### a2_rl_stack_selfdoc_v2_387394
|
| 117 |
+
|
| 118 |
+
| Step | Reward | Pass@8 | KL | Loss | Step Time (s) | Gen Wait (s) |
|
| 119 |
+
|------|--------|--------|-----|------|---------------|-------------|
|
| 120 |
+
| 1 | 0.0762 | 0.0938 | 0.000000 | -0.0000 | 4905.2 | 3939.7 |
|
| 121 |
+
| 2 | 0.0645 | 0.0781 | 0.000000 | 0.0000 | 2330.4 | 1318.9 |
|
| 122 |
+
| 3 | 0.0332 | 0.0469 | 0.000000 | 0.0000 | 2676.4 | 1550.4 |
|
| 123 |
+
| 4 | 0.0605 | 0.0625 | 0.000000 | -0.0000 | 2515.9 | 1456.6 |
|
| 124 |
+
| 5 | 0.0469 | 0.0469 | 0.000000 | 0.0000 | 2550.4 | 1438.4 |
|
| 125 |
+
| 6 | 0.0000 | 0.0000 | 0.000000 | 0.0000 | 2638.3 | 1454.1 |
|
| 126 |
+
| 7 | 0.0176 | 0.0312 | 0.000000 | 0.0000 | 6368.2 | 4896.9 |
|
| 127 |
+
| 8 | 0.0723 | 0.0938 | 0.000000 | -0.0000 | 5271.9 | 4233.5 |
|
| 128 |
+
| 9 | 0.0605 | 0.0625 | 0.000000 | -0.0000 | 3170.2 | 2116.5 |
|
| 129 |
+
| 10 | 0.0332 | 0.0469 | 0.000000 | -0.0000 | 2492.1 | 1289.2 |
|
| 130 |
+
| 11 | 0.0781 | 0.0781 | 0.000000 | 0.0000 | 2944.8 | 1801.5 |
|
| 131 |
+
| 12 | 0.0469 | 0.0469 | 0.000000 | 0.0000 | 2861.4 | 1711.3 |
|
| 132 |
+
|
| 133 |
+
### a2_rl_stack_selfdoc_v2_387395
|
| 134 |
+
|
| 135 |
+
| Step | Reward | Pass@8 | KL | Loss | Step Time (s) | Gen Wait (s) |
|
| 136 |
+
|------|--------|--------|-----|------|---------------|-------------|
|
| 137 |
+
| 13 | 0.0000 | 0.0000 | 0.000000 | 0.0000 | 5267.1 | 4037.0 |
|
| 138 |
+
| 14 | 0.0215 | 0.0312 | 0.000000 | 0.0000 | 2948.7 | 1612.7 |
|
| 139 |
+
|
| 140 |
+
## Timing Analysis
|
| 141 |
+
|
| 142 |
+
### Average Time Breakdown (% of step time)
|
| 143 |
+
|
| 144 |
+
| Component | Avg % of Step Time |
|
| 145 |
+
|-----------|-------------------|
|
| 146 |
+
| wait_for_generation_buffer | 63.7% |
|
| 147 |
+
| run_training | 34.4% |
|
| 148 |
+
| train_critic_and_policy | 29.9% |
|
| 149 |
+
| policy_train | 29.9% |
|
| 150 |
+
| fwd_logprobs_values_reward | 4.4% |
|
| 151 |
+
| sync_weights | 1.7% |
|
| 152 |
+
| save_hf_model | 1.4% |
|
| 153 |
+
| save_checkpoints | 1.3% |
|
| 154 |
+
| convert_to_training_input | 0.2% |
|
| 155 |
+
| cleanup_old_checkpoints | 0.0% |
|
| 156 |
+
| compute_advantages_and_returns | 0.0% |
|
| 157 |
+
|
| 158 |
+
## Cross-Log Comparison
|
| 159 |
+
|
| 160 |
+
| Log | Avg Reward | Pass@8 | Step Time (s) | Gen Wait Time (s) | Avg Tokens | Staleness |
|
| 161 |
+
|-----|------|------|------|------|------|------|
|
| 162 |
+
| a2_rl_stack_selfdoc_v2_387394 | 0.0492 | 0.0573 | 3393.7743 | 2267.2594 | 9228.5729 | 2.5833 |
|
| 163 |
+
| a2_rl_stack_selfdoc_v2_387395 | 0.0107 | 0.0156 | 4107.8990 | 2824.8598 | 10837.9736 | 1.5938 |
|
| 164 |
+
|
| 165 |
+
## vLLM Inference Engine Analysis
|
| 166 |
+
|
| 167 |
+
Metrics from vLLM stat loggers (V1LoggingStatLoggerFixed).
|
| 168 |
+
|
| 169 |
+
> **Note**: Ray deduplicates similar log messages with `[repeated Nx across cluster]`,
|
| 170 |
+
> so we typically capture stats from one engine per timestamp. The stats shown are
|
| 171 |
+
> **per-engine** values. Multiply by num_inference_engines for cluster-wide estimates.
|
| 172 |
+
|
| 173 |
+
### Summary by Log (Per-Engine Stats)
|
| 174 |
+
|
| 175 |
+
| Log | Avg Running/Engine | Avg Waiting/Engine | Avg Gen Throughput/Engine | Avg KV Cache % | Avg Prefix Hit % |
|
| 176 |
+
|-----|-------------------|-------------------|--------------------------|----------------|------------------|
|
| 177 |
+
| a2_rl_stack_selfdoc_v2_387394 | 4.2 | 0.0 | 81.5 tok/s | 9.1% | 88.9% |
|
| 178 |
+
| a2_rl_stack_selfdoc_v2_387395 | 3.9 | 0.0 | 79.9 tok/s | 10.3% | 90.8% |
|
| 179 |
+
|
| 180 |
+
### Utilization Analysis (Per-Engine)
|
| 181 |
+
|
| 182 |
+
Key indicators of inference engine utilization:
|
| 183 |
+
|
| 184 |
+
- **Running requests/engine**: Concurrent requests being processed by each engine
|
| 185 |
+
- **Waiting requests**: Requests queued (0 = engine not saturated, has spare capacity)
|
| 186 |
+
- **Generation throughput**: Decode tokens/sec per engine
|
| 187 |
+
- 8B model on H100 can do **1000+ tok/s** when saturated
|
| 188 |
+
- If seeing <300 tok/s with 0 waiting, engine is **starved for requests**
|
| 189 |
+
|
| 190 |
+
#### a2_rl_stack_selfdoc_v2_387394
|
| 191 |
+
|
| 192 |
+
- **Running requests/engine**: avg=4.2, max=16
|
| 193 |
+
- **Waiting requests**: avg=0.0, max=0
|
| 194 |
+
- **Generation throughput/engine**: avg=81.5 tok/s, max=297.7 tok/s
|
| 195 |
+
- **KV cache usage**: avg=9.1%
|
| 196 |
+
- **Prefix cache hit rate**: avg=88.9%
|
| 197 |
+
- ⚠️ **Underutilized**: Engines starved for requests (0 waiting, avg 4.2 running)
|
| 198 |
+
- Bottleneck is likely upstream (environment execution, not inference)
|
| 199 |
+
|
| 200 |
+
#### a2_rl_stack_selfdoc_v2_387395
|
| 201 |
+
|
| 202 |
+
- **Running requests/engine**: avg=3.9, max=13
|
| 203 |
+
- **Waiting requests**: avg=0.0, max=0
|
| 204 |
+
- **Generation throughput/engine**: avg=79.9 tok/s, max=283.3 tok/s
|
| 205 |
+
- **KV cache usage**: avg=10.3%
|
| 206 |
+
- **Prefix cache hit rate**: avg=90.8%
|
| 207 |
+
- ⚠️ **Underutilized**: Engines starved for requests (0 waiting, avg 3.9 running)
|
| 208 |
+
- Bottleneck is likely upstream (environment execution, not inference)
|
| 209 |
+
|
| 210 |
+
## Trial-Level Analysis (from result.json)
|
| 211 |
+
|
| 212 |
+
Total trials parsed: 7891
|
| 213 |
+
|
| 214 |
+
### Turn Count Statistics
|
| 215 |
+
|
| 216 |
+
| Metric | Value |
|
| 217 |
+
|--------|-------|
|
| 218 |
+
| Mean | 6.2 |
|
| 219 |
+
| Median | 5.0 |
|
| 220 |
+
| Std | 3.9 |
|
| 221 |
+
| Min | 1 |
|
| 222 |
+
| Max | 36 |
|
| 223 |
+
| Count | 7891 |
|
| 224 |
+
|
| 225 |
+
### Exception Distribution
|
| 226 |
+
|
| 227 |
+
| Exception Type | Count | % |
|
| 228 |
+
|---------------|-------|---|
|
| 229 |
+
| No exception | 7768 | 98.4% |
|
| 230 |
+
| AgentTimeoutError | 81 | 1.0% |
|
| 231 |
+
| ContextLengthExceededError | 28 | 0.4% |
|
| 232 |
+
| InternalServerError | 7 | 0.1% |
|
| 233 |
+
| CancelledError | 3 | 0.0% |
|
| 234 |
+
| RuntimeError | 1 | 0.0% |
|
| 235 |
+
| DaytonaValidationError | 1 | 0.0% |
|
| 236 |
+
| AgentEnvironmentTimeoutError | 1 | 0.0% |
|
| 237 |
+
| DaytonaError | 1 | 0.0% |
|
| 238 |
+
|
| 239 |
+
### Turn Count by Exception Type
|
| 240 |
+
|
| 241 |
+
| Exception Type | Mean Turns | Median Turns | Count |
|
| 242 |
+
|---------------|-----------|-------------|-------|
|
| 243 |
+
| ContextLengthExceededError | 19.8 | 19.5 | 28 |
|
| 244 |
+
| DaytonaError | 14.0 | 14.0 | 1 |
|
| 245 |
+
| AgentTimeoutError | 13.3 | 14.0 | 81 |
|
| 246 |
+
| DaytonaValidationError | 10.0 | 10.0 | 1 |
|
| 247 |
+
| InternalServerError | 7.4 | 7.0 | 7 |
|
| 248 |
+
| CancelledError | 7.0 | 1.0 | 3 |
|
| 249 |
+
| No exception | 6.0 | 5.0 | 7768 |
|
| 250 |
+
| RuntimeError | 6.0 | 6.0 | 1 |
|
| 251 |
+
| AgentEnvironmentTimeoutError | 3.0 | 3.0 | 1 |
|
| 252 |
+
|
| 253 |
+
### Turn Count by Outcome
|
| 254 |
+
|
| 255 |
+
| Outcome | Mean Turns | Median Turns | Count |
|
| 256 |
+
|---------|-----------|-------------|-------|
|
| 257 |
+
| Success | 2.8 | 2.0 | 313 |
|
| 258 |
+
| Failure | 6.3 | 6.0 | 7564 |
|
| 259 |
+
|
| 260 |
+
### Reward Summary
|
| 261 |
+
|
| 262 |
+
- Mean reward: 0.0397
|
| 263 |
+
- Success rate: 4.0%
|
| 264 |
+
- Trials with reward data: 7877
|
| 265 |
+
|
training_logs/20260428_204143_metrics_table.csv
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
log_file,async/discard_rate,async/discarded_count,async/effective_batch_groups,async/effective_batch_samples,async/staleness_max,async/staleness_mean,async/staleness_min,async/staleness_ratio,generate/avg_num_tokens,generate/avg_tokens_non_zero_rewards,generate/avg_tokens_zero_rewards,generate/max_num_tokens,generate/min_num_tokens,generate/std_num_tokens,loss/avg_final_rewards,loss/avg_raw_advantages,loss/avg_raw_advantages_abs,policy/final_loss,policy/policy_entropy,policy/policy_loss,policy/policy_lr,policy/policy_update_steps,policy/ppo_clip_ratio,policy/raw_grad_norm,reward/avg_pass_at_8,reward/avg_raw_reward,system/process_rss_gb,system/process_vms_gb,system/ram_available_gb,system/ram_percent,system/ram_total_gb,system/ram_used_gb,timing/cleanup_old_checkpoints,timing/compute_advantages_and_returns,timing/convert_to_training_input,timing/fwd_logprobs_values_reward,timing/policy_train,timing/run_training,timing/save_checkpoints,timing/step,timing/sync_weights,timing/train_critic_and_policy,timing/wait_for_generation_buffer,trainer/epoch,trainer/global_step,batch_errors/total_batches,batch_errors/total_instances,batch_errors/total_successful,batch_errors/total_failed,batch_errors/total_masked,batch_errors/avg_DaytonaValidationError,batch_errors/total_DaytonaValidationError,timing/save_hf_model,batch_errors/avg_DaytonaError,batch_errors/total_DaytonaError,batch_errors/avg_AgentEnvironmentTimeoutError,batch_errors/total_AgentEnvironmentTimeoutError,global_step
|
| 2 |
+
a2_rl_stack_selfdoc_v2_387394,0.0,0,64,512,0,0.0,0,0.0,7275.4629,6357.3333,7351.1649,21632,1649,3419.6317,0.0762,-0.0004,0.0106,-0.0,0.2188,-0.0,0.0,1.0,0.0,0.0103,0.0938,0.0762,17.0837,73.4155,363.0786,57.7,857.9687,494.8901,10.7412,0.2127,3.9195,104.1328,802.9923,907.6513,43.3412,4905.1695,53.8667,803.3054,3939.7318,0,1,128,1024,1024,0,0,,,,,,,,1
|
| 3 |
+
a2_rl_stack_selfdoc_v2_387394,0.0,0,64,512,1,1.0,1,1.0,7957.168,4530.8788,8193.2171,27882,1407,3880.4011,0.0645,0.0001,0.0054,0.0,0.2389,0.0,0.0,1.0,0.0,0.0107,0.0781,0.0645,21.7723,73.6329,360.9631,57.9,857.9687,497.0056,0.0153,0.0599,5.1948,103.7265,845.794,949.9143,43.9428,2330.4149,56.3595,846.1277,1318.9172,0,2,64,512,512,0,0,,,,,,,,2
|
| 4 |
+
a2_rl_stack_selfdoc_v2_387394,0.0,0,64,512,2,2.0,2,1.0,9141.6328,3483.3529,9335.9576,31173,1378,4855.3059,0.0332,-0.0006,0.0052,0.0,0.2475,0.0,0.0,1.0,0.0,0.002,0.0469,0.0332,23.295,73.7708,359.9666,58.0,857.9687,498.0021,0.0409,0.0704,5.5762,136.0789,929.3459,1065.8459,36.5989,2676.449,54.5569,929.6963,1550.4301,0,3,64,512,511,1,0,0.015625,1.0,,,,,,3
|
| 5 |
+
a2_rl_stack_selfdoc_v2_387394,0.0,0,64,512,3,3.0,3,1.0,8364.1445,4752.7097,8596.8981,23247,1,4259.9952,0.0605,0.0007,0.0022,-0.0,0.2539,-0.0,0.0,1.0,0.0,0.0037,0.0625,0.0605,23.2993,73.6543,356.3336,58.5,857.9687,501.6351,0.0181,0.117,4.3804,128.8873,865.2484,994.5704,33.9489,2515.8877,60.3128,865.5658,1456.5852,0,4,64,512,512,0,0,,,,,,,,4
|
| 6 |
+
a2_rl_stack_selfdoc_v2_387394,0.0,0,64,512,4,4.0,4,1.0,8814.9668,3657.25,9068.625,26822,1549,4838.5142,0.0469,0.0,0.0,0.0,0.2494,0.0,0.0,1.0,0.0,0.0,0.0469,0.0469,23.7327,73.9993,393.292,54.2,857.9687,464.6767,0.0106,0.1039,4.8987,138.3911,914.3701,1053.2169,47.7862,2550.4358,53.8457,914.7216,1438.4277,0,5,64,512,512,0,0,,,35.6986,,,,,5
|
| 7 |
+
a2_rl_stack_selfdoc_v2_387394,0.0,0,64,512,5,5.0,5,1.0,9775.6992,0.0,9775.6992,29538,1766,4947.5396,0.0,0.0,0.0,0.0,0.2604,0.0,0.0,1.0,0.0,0.0,0.0,0.0,23.9927,74.1619,387.3311,54.9,857.9687,470.6376,0.0092,0.07,5.5063,151.9273,971.4513,1123.7893,46.0193,2638.3422,54.8806,971.7915,1454.126,0,6,64,512,512,0,0,,,,,,,,6
|
| 8 |
+
a2_rl_stack_selfdoc_v2_387394,0.0,0,64,512,6,6.0,6,1.0,12587.3262,8771.6667,12655.5984,31728,1795,7162.703,0.0176,0.0002,0.0017,0.0,0.2611,0.0,0.0,1.0,0.0,0.0025,0.0312,0.0176,25.2853,74.2525,394.0407,54.1,857.9687,463.928,0.0091,0.1162,5.7226,225.2383,1227.1218,1452.8258,30.4456,6368.1796,12.6889,1227.4709,4896.8985,0,7,64,512,512,0,0,,,,,,,,7
|
| 9 |
+
a2_rl_stack_selfdoc_v2_387394,0.0,0,64,512,0,0.0,0,0.0,8240.916,5900.9189,8423.1895,28834,1883,4195.3896,0.0723,-0.0008,0.0172,-0.0,0.2536,-0.0,0.0,1.0,0.0,0.0101,0.0938,0.0723,25.3742,74.3453,391.6837,54.3,857.9687,466.285,0.008,0.0717,5.0261,118.7069,857.695,976.8176,30.3715,5271.8876,56.4666,858.0387,4233.5266,1,8,64,512,512,0,0,,,,,,,,8
|
| 10 |
+
a2_rl_stack_selfdoc_v2_387394,0.0,0,64,512,1,1.0,1,1.0,9040.8418,3583.0968,9392.5884,26852,1490,4625.472,0.0605,-0.0,0.0026,-0.0,0.2651,-0.0,0.0,1.0,0.0,0.0031,0.0625,0.0605,25.4032,74.2527,390.5529,54.5,857.9687,467.4158,0.0143,0.0648,5.5196,117.0133,875.2519,992.6904,30.9643,3170.1598,55.3772,875.612,2116.5253,1,9,64,512,512,0,0,,,,,,,,9
|
| 11 |
+
a2_rl_stack_selfdoc_v2_387394,0.0,0,64,512,2,2.0,2,1.0,10337.2324,4228.0,10547.0444,30710,2240,5203.9956,0.0332,0.0002,0.0055,-0.0,0.2663,-0.0,0.0,1.0,0.0,0.0015,0.0469,0.0332,25.5461,74.2853,382.0897,55.5,857.9687,475.879,0.0093,0.0713,5.7217,144.2946,997.5803,1142.3512,51.3102,2492.1314,54.8136,997.985,1289.203,1,10,64,512,512,0,0,,,33.3725,,,,,10
|
| 12 |
+
a2_rl_stack_selfdoc_v2_387394,0.0,0,64,512,3,3.0,3,1.0,9823.8594,5472.55,10192.6144,31032,1842,4939.9625,0.0781,0.0,0.0,0.0,0.2664,0.0,0.0,1.0,0.0,0.0,0.0781,0.0781,26.0123,74.701,377.9515,55.9,857.9687,480.0172,0.013,0.0713,5.5893,124.439,955.5093,1080.4547,51.6951,2944.7977,57.2244,955.9438,1801.4832,1,11,64,512,511,1,1,,,,0.015625,1.0,,,11
|
| 13 |
+
a2_rl_stack_selfdoc_v2_387394,0.0,0,64,512,4,4.0,4,1.0,9383.625,4145.0833,9641.2582,25296,1,5089.0981,0.0469,0.0,0.0,0.0,0.2653,0.0,0.0,1.0,0.0,0.0,0.0469,0.0469,25.8842,74.5652,400.3334,53.3,857.9687,457.6353,0.0091,0.0605,4.7815,144.4199,946.7012,1091.543,51.978,2861.4366,53.809,947.0622,1711.2585,1,12,55,440,439,1,1,,,,,,0.01818181818181818,1.0,12
|
| 14 |
+
a2_rl_stack_selfdoc_v2_387395,0.0,0,64,512,5,2.1875,0,0.4375,10429.4316,0.0,10429.4316,30063,1664,5216.7129,0.0,0.0,0.0,0.0,0.2684,0.0,0.0,1.0,0.0,0.0,0.0,0.0,23.0165,73.8899,371.7734,56.7,857.9687,486.1953,11.322,0.0818,5.7184,157.0005,1013.8685,1171.3008,45.1988,5267.0618,53.0551,1014.2173,4036.9872,1,13,64,512,512,0,0,,,,,,,,13
|
| 15 |
+
a2_rl_stack_selfdoc_v2_387395,0.0,0,64,512,1,1.0,1,1.0,11246.5156,6659.4545,11347.2295,30998,907,6076.7695,0.0215,0.0004,0.0034,0.0,0.2719,0.0,0.0,1.0,0.0,0.0047,0.0312,0.0215,24.594,74.0568,357.858,58.3,857.9687,500.1107,0.0102,0.0736,5.798,187.2758,1083.085,1270.7739,33.2045,2948.7362,59.3865,1083.4237,1612.7324,1,14,17,136,136,0,0,,,,,,,,14
|
training_logs/20260428_204143_reward_vs_steps.png
ADDED
|
Git LFS Details
|
training_logs/20260428_204143_trial_results.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
training_logs/20260428_204143_turn_count_distribution.png
ADDED
|
Git LFS Details
|
training_logs/20260428_204143_vllm_metrics_a2_rl_stack_selfdoc_v2_387394.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
training_logs/20260428_204143_vllm_metrics_a2_rl_stack_selfdoc_v2_387395.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
training_logs/20260428_204143_vllm_metrics_table.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|