Add parsed training metrics and plots

Browse files

Files changed (9) hide show

.gitattributes +1 -0
training_logs/20260428_203615_metrics_a2_rl_stack_pytest_v2_387401.csv +15 -0
training_logs/20260428_203615_metrics_report.md +231 -0
training_logs/20260428_203615_metrics_table.csv +15 -0
training_logs/20260428_203615_reward_vs_steps.png +3 -0
training_logs/20260428_203615_trial_results.csv +0 -0
training_logs/20260428_203615_turn_count_distribution.png +0 -0
training_logs/20260428_203615_vllm_metrics_a2_rl_stack_pytest_v2_387401.csv +0 -0
training_logs/20260428_203615_vllm_metrics_table.csv +0 -0

.gitattributes CHANGED Viewed

@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 tokenizer.json filter=lfs diff=lfs merge=lfs -text

 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 tokenizer.json filter=lfs diff=lfs merge=lfs -text
+training_logs/20260428_203615_reward_vs_steps.png filter=lfs diff=lfs merge=lfs -text

training_logs/20260428_203615_metrics_a2_rl_stack_pytest_v2_387401.csv ADDED Viewed

	@@ -0,0 +1,15 @@

+async/discard_rate,async/discarded_count,async/effective_batch_groups,async/effective_batch_samples,async/staleness_max,async/staleness_mean,async/staleness_min,async/staleness_ratio,generate/avg_num_tokens,generate/avg_tokens_non_zero_rewards,generate/avg_tokens_zero_rewards,generate/max_num_tokens,generate/min_num_tokens,generate/std_num_tokens,loss/avg_final_rewards,loss/avg_raw_advantages,loss/avg_raw_advantages_abs,policy/final_loss,policy/policy_entropy,policy/policy_loss,policy/policy_lr,policy/policy_update_steps,policy/ppo_clip_ratio,policy/raw_grad_norm,reward/avg_pass_at_8,reward/avg_raw_reward,system/process_rss_gb,system/process_vms_gb,system/ram_available_gb,system/ram_percent,system/ram_total_gb,system/ram_used_gb,timing/cleanup_old_checkpoints,timing/compute_advantages_and_returns,timing/convert_to_training_input,timing/fwd_logprobs_values_reward,timing/policy_train,timing/run_training,timing/save_checkpoints,timing/step,timing/sync_weights,timing/train_critic_and_policy,timing/wait_for_generation_buffer,trainer/epoch,trainer/global_step,batch_errors/total_batches,batch_errors/total_instances,batch_errors/total_successful,batch_errors/total_failed,batch_errors/total_masked,batch_errors/avg_VerifierTimeoutError,batch_errors/total_VerifierTimeoutError,timing/save_hf_model,batch_errors/avg_RuntimeError,batch_errors/total_RuntimeError
+0.0,0,64,512,0,0.0,0,0.0,4741.9902,4480.4132,4822.9386,14849,1,2194.273,0.2363,0.0057,0.1467,0.0,0.1855,0.0,0.0,1.0,0.0,0.0289,0.4375,0.2363,12.8265,73.2355,355.5015,58.6,857.9688,502.4673,10.796,0.1113,2.7561,62.0087,643.9144,706.3755,43.6553,2779.7671,53.5533,644.2551,2017.0819,0,1,128.0,1024.0,1022.0,2.0,0.0,0.015625,2.0,,,
+0.0,0,64,512,1,1.0,1,1.0,5207.4805,4587.3176,5459.6346,15353,1,2543.871,0.2891,0.0032,0.2156,-0.0,0.1818,-0.0003,0.0,1.0,0.0,0.039,0.5,0.2891,13.6703,73.4764,355.181,58.6,857.9688,502.7878,0.0108,0.0549,3.5254,71.9423,683.3507,755.6622,31.4849,1392.6153,52.8988,683.6647,580.5066,0,2,64.0,512.0,511.0,1.0,0.0,0.015625,1.0,,,
+0.0,0,64,512,2,2.0,2,1.0,5185.8047,4404.2423,5662.6069,18658,1,2655.8627,0.3789,0.0056,0.1026,-0.0,0.1694,-0.0,0.0,1.0,0.0,0.0307,0.4688,0.3789,16.1691,73.7953,352.0549,59.0,857.9688,505.9138,0.0087,0.0759,3.5505,84.1237,702.8472,787.3638,32.8388,1345.9698,52.8027,703.1638,502.2264,0,3,64.0,512.0,510.0,1.0,0.0,0.03125,2.0,,,
+0.0,0,64,512,3,3.0,3,1.0,6173.4297,5171.7151,6744.9601,23599,1,2913.9495,0.3633,-0.0048,0.1501,0.0,0.1671,0.0,0.0,1.0,0.0,0.0269,0.5781,0.3633,19.7277,74.2299,348.8676,59.3,857.9688,509.1012,0.0089,0.0556,4.1262,92.6868,765.1808,858.2347,33.0996,2000.5081,53.4025,765.4919,1084.7149,0,4,64.0,512.0,512.0,0.0,0.0,,,,,
+0.0,0,64,512,4,4.0,4,1.0,6649.6289,5533.7975,7170.7765,21118,1405,3018.764,0.3184,0.002,0.0977,-0.0,0.1669,-0.0,0.0,1.0,0.0,0.0231,0.4062,0.3184,20.1003,74.4495,353.0518,58.9,857.9688,504.9169,0.0081,0.052,3.8742,89.0204,756.9617,846.374,33.0429,1451.9746,52.8114,757.3012,548.8843,0,5,64.0,512.0,512.0,0.0,0.0,,,36.8234,,
+0.0,0,64,512,5,5.0,5,1.0,6952.5547,6301.3432,7273.414,20095,1889,2871.2745,0.3301,0.0065,0.1201,-0.0,0.1707,-0.0,0.0,1.0,0.0,0.0208,0.4688,0.3301,20.2417,74.4911,352.5635,58.9,857.9688,505.4053,0.008,0.0486,3.8545,91.1339,754.2028,845.7448,31.6035,1824.3834,56.2481,754.562,918.5016,0,6,64.0,512.0,512.0,0.0,0.0,,,,,
+0.0,0,64,512,6,6.0,6,1.0,7821.9141,6336.3427,8397.6233,28454,2297,3823.9451,0.2793,-0.0032,0.1026,-0.0,0.1755,-0.0,0.0,1.0,0.0,0.0234,0.4062,0.2793,24.1808,74.7747,405.2173,52.8,857.9688,452.7515,0.011,0.0608,4.7397,122.4595,855.6703,978.5429,33.4132,4135.5872,14.0068,856.0222,3138.2634,0,7,64.0,512.0,512.0,0.0,0.0,,,,,
+0.0,0,64,512,0,0.0,0,0.0,5879.5469,5305.2657,6269.3049,14895,1513,2089.5078,0.4043,0.0018,0.096,-0.0,0.1721,-0.0,0.0,1.0,0.0,0.0209,0.5156,0.4043,24.3558,75.0631,401.2662,53.2,857.9688,456.7026,0.01,0.0396,3.3153,69.9986,642.9688,713.3496,32.0209,3348.3309,52.4998,643.311,2579.128,1,8,64.0,512.0,512.0,0.0,0.0,,,,,
+0.0,0,64,512,1,1.0,1,1.0,6734.0,6444.1044,6893.8818,18643,2175,2384.6491,0.3555,0.0017,0.1221,-0.0,0.1782,-0.0,0.0,1.0,0.0,0.0215,0.4375,0.3555,24.6022,75.2357,411.7228,52.0,857.9688,446.246,0.009,0.0439,3.6223,78.3919,702.1173,780.9468,31.0012,1826.3469,52.3499,702.5107,989.3979,1,9,64.0,512.0,509.0,2.0,0.0,0.046875,3.0,,,
+0.0,0,64,512,2,2.0,2,1.0,6511.9746,5525.2532,7336.0108,18605,1,2862.176,0.4551,0.0042,0.1035,-0.0001,0.1794,-0.002,0.0,1.0,0.0,0.0187,0.5469,0.4551,24.6203,75.1738,409.3499,52.3,857.9688,448.6188,0.0111,0.044,3.5639,90.4193,734.5286,825.4112,31.3292,1651.042,52.4615,734.9477,769.5638,1,10,64.0,512.0,512.0,0.0,0.0,,,36.5961,,
+0.0,0,64,512,3,3.0,3,1.0,7150.2969,5831.9757,8037.7941,22262,1340,2965.0407,0.4023,0.0008,0.1053,-0.0,0.1771,-0.0,0.0,1.0,0.0,0.0194,0.5156,0.4023,24.8219,75.2715,408.2212,52.4,857.9688,449.7476,0.0086,0.0515,4.1267,95.6788,762.1538,858.3374,32.4094,2182.5423,51.6537,762.6066,1268.3906,1,11,64.0,512.0,511.0,1.0,0.0,0.015625,1.0,,,
+0.0,0,64,512,4,4.0,4,1.0,7161.0645,6208.9091,7530.0569,19269,1,2859.9143,0.2793,-0.0043,0.1026,0.0,0.1809,0.0,0.0,1.0,0.0,0.0175,0.3906,0.2793,24.8525,75.2184,415.8303,51.5,857.9688,442.1384,0.0083,0.0535,3.7269,95.4867,753.3097,849.3001,32.3294,1734.4666,56.6358,753.7596,824.7671,1,12,64.0,512.0,509.0,2.0,0.0,0.03125,2.0,,0.015625,1.0
+0.0,0,64,512,5,5.0,5,1.0,7395.9434,6484.1768,7894.5227,19744,1,2920.1255,0.3535,0.0007,0.1193,0.0,0.1803,0.0,0.0,1.0,0.0,0.0186,0.4844,0.3535,24.9543,75.2349,412.6592,51.9,857.9688,445.3096,0.0093,0.05,3.7965,93.9943,736.3408,830.7211,32.1481,1894.2929,59.5623,736.6765,1000.1749,1,13,64.0,512.0,512.0,0.0,0.0,,,,,
+0.0,0,64,512,6,6.0,6,1.0,8946.791,7245.6212,9537.7237,29403,2237,4155.2128,0.2578,0.002,0.073,0.0,0.1889,0.0,0.0,1.0,0.0,0.0133,0.3438,0.2578,26.1139,75.4758,416.4844,51.5,857.9688,441.4843,0.0095,0.0622,4.9616,132.9495,904.4659,1037.814,32.4082,7012.0493,13.8671,904.8018,5955.3685,1,14,,,,,,,,,,

training_logs/20260428_203615_metrics_report.md ADDED Viewed

	@@ -0,0 +1,231 @@

+# SkyRL Training Metrics Analysis
+Generated from 1 log files
+## Overview
+| Log File | Total Steps | Metric Blocks | Final Reward (mean) | Final Reward (max) | Total Time (s) |
+|----------|-------------|---------------|---------------------|-------------------|----------------|
+| a2_rl_stack_pytest_v2_387401 | 14 | 14 | 0.3359 | 0.4551 | 34579.9 |
+## Async Metrics
+|                               |       Mean |      Std |   Min |   Max |   Count |
+|:------------------------------|-----------:|---------:|------:|------:|--------:|
+| async/discard_rate            |   0        | 0        |     0 |     0 |      14 |
+| async/discarded_count         |   0        | 0        |     0 |     0 |      14 |
+| async/effective_batch_groups  |  64        | 0        |    64 |    64 |      14 |
+| async/effective_batch_samples | 512        | 0        |   512 |   512 |      14 |
+| async/staleness_max           |   3        | 2.0755   |     0 |     6 |      14 |
+| async/staleness_mean          |   3        | 2.0755   |     0 |     6 |      14 |
+| async/staleness_min           |   3        | 2.0755   |     0 |     6 |      14 |
+| async/staleness_ratio         |   0.857143 | 0.363137 |     0 |     1 |      14 |
+## Generate Metrics
+|                                      |      Mean |      Std |      Min |      Max |   Count |
+|:-------------------------------------|----------:|---------:|---------:|---------:|--------:|
+| generate/avg_num_tokens              |  6608.03  | 1126.45  |  4741.99 |  8946.79 |      14 |
+| generate/avg_tokens_non_zero_rewards |  5704.32  |  854.75  |  4404.24 |  7245.62 |      14 |
+| generate/avg_tokens_zero_rewards     |  7073.66  | 1246.64  |  4822.94 |  9537.72 |      14 |
+| generate/max_num_tokens              | 20353.4   | 4453.55  | 14849    | 29403    |      14 |
+| generate/min_num_tokens              |   918.786 |  993.044 |     1    |  2297    |      14 |
+| generate/std_num_tokens              |  2875.61  |  558.202 |  2089.51 |  4155.21 |      14 |
+## Loss Metrics
+|                             |       Mean |        Std |     Min |    Max |   Count |
+|:----------------------------|-----------:|-----------:|--------:|-------:|--------:|
+| loss/avg_final_rewards      | 0.335943   | 0.0629694  |  0.2363 | 0.4551 |      14 |
+| loss/avg_raw_advantages     | 0.00156429 | 0.00358149 | -0.0048 | 0.0065 |      14 |
+| loss/avg_raw_advantages_abs | 0.118371   | 0.0344238  |  0.073  | 0.2156 |      14 |
+## Policy Metrics
+|                            |         Mean |         Std |     Min |    Max |   Count |
+|:---------------------------|-------------:|------------:|--------:|-------:|--------:|
+| policy/final_loss          | -7.14286e-06 | 2.67261e-05 | -0.0001 | 0      |      14 |
+| policy/policy_entropy      |  0.1767      | 0.00675209  |  0.1669 | 0.1889 |      14 |
+| policy/policy_loss         | -0.000164286 | 0.000534368 | -0.002  | 0      |      14 |
+| policy/policy_lr           |  0           | 0           |  0      | 0      |      14 |
+| policy/policy_update_steps |  1           | 0           |  1      | 1      |      14 |
+| policy/ppo_clip_ratio      |  0           | 0           |  0      | 0      |      14 |
+| policy/raw_grad_norm       |  0.02305     | 0.00651681  |  0.0133 | 0.039  |      14 |
+## Reward Metrics
+|                       |     Mean |       Std |    Min |    Max |   Count |
+|:----------------------|---------:|----------:|-------:|-------:|--------:|
+| reward/avg_pass_at_8  | 0.464286 | 0.0649824 | 0.3438 | 0.5781 |      14 |
+| reward/avg_raw_reward | 0.335943 | 0.0629694 | 0.2363 | 0.4551 |      14 |
+## System Metrics
+|                         |     Mean |          Std |      Min |      Max |   Count |
+|:------------------------|---------:|-------------:|---------:|---------:|--------:|
+| system/process_rss_gb   |  21.517  |  4.48783     |  12.8265 |  26.1139 |      14 |
+| system/process_vms_gb   |  74.6518 |  0.727882    |  73.2355 |  75.4758 |      14 |
+| system/ram_available_gb | 385.569  | 29.6712      | 348.868  | 416.484  |      14 |
+| system/ram_percent      |  55.0643 |  3.46424     |  51.5    |  59.3    |      14 |
+| system/ram_total_gb     | 857.969  |  2.35957e-13 | 857.969  | 857.969  |      14 |
+| system/ram_used_gb      | 472.399  | 29.6712      | 441.484  | 509.101  |      14 |
+## Timing Metrics
+|                                       |         Mean |          Std |       Min |       Max |   Count |
+|:--------------------------------------|-------------:|-------------:|----------:|----------:|--------:|
+| timing/cleanup_old_checkpoints        |    0.779807  |    2.88286   |    0.008  |   10.796  |      14 |
+| timing/compute_advantages_and_returns |    0.0574143 |    0.0179349 |    0.0396 |    0.1113 |      14 |
+| timing/convert_to_training_input      |    3.82427   |    0.55475   |    2.7561 |    4.9616 |      14 |
+| timing/fwd_logprobs_values_reward     |   90.7353    |   18.9282    |   62.0087 |  132.95   |      14 |
+| timing/policy_train                   |  742.715     |   71.7004    |  642.969  |  904.466  |      14 |
+| timing/run_training                   |  833.87      |   90.3807    |  706.375  | 1037.81   |      14 |
+| timing/save_checkpoints               |   33.056     |    3.13253   |   31.0012 |   43.6553 |      14 |
+| timing/step                           | 2469.99      | 1529.45      | 1345.97   | 7012.05   |      14 |
+| timing/sync_weights                   |   48.1967    |   14.6734    |   13.8671 |   59.5623 |      14 |
+| timing/train_critic_and_policy        |  743.077     |   71.7036    |  643.311  |  904.802  |      14 |
+| timing/wait_for_generation_buffer     | 1584.07      | 1485.24      |  502.226  | 5955.37   |      14 |
+| timing/save_hf_model                  |   36.7097    |    0.160725  |   36.5961 |   36.8234 |       2 |
+## Trainer Metrics
+|                     |   Mean |      Std |   Min |   Max |   Count |
+|:--------------------|-------:|---------:|------:|------:|--------:|
+| trainer/epoch       |    0.5 | 0.518875 |     0 |     1 |      14 |
+| trainer/global_step |    7.5 | 4.1833   |     1 |    14 |      14 |
+## Batch_Errors Metrics
+|                                         |        Mean |         Std |        Min |         Max |   Count |
+|:----------------------------------------|------------:|------------:|-----------:|------------:|--------:|
+| batch_errors/total_batches              |  68.9231    |  17.7504    |  64        |  128        |      13 |
+| batch_errors/total_instances            | 551.385     | 142.003     | 512        | 1024        |      13 |
+| batch_errors/total_successful           | 550.462     | 141.684     | 509        | 1022        |      13 |
+| batch_errors/total_failed               |   0.692308  |   0.85485   |   0        |    2        |      13 |
+| batch_errors/total_masked               |   0         |   0         |   0        |    0        |      13 |
+| batch_errors/avg_VerifierTimeoutError   |   0.0260417 |   0.0127578 |   0.015625 |    0.046875 |       6 |
+| batch_errors/total_VerifierTimeoutError |   1.83333   |   0.752773  |   1        |    3        |       6 |
+| batch_errors/avg_RuntimeError           |   0.015625  | nan         |   0.015625 |    0.015625 |       1 |
+| batch_errors/total_RuntimeError         |   1         | nan         |   1        |    1        |       1 |
+## Training Progression by Log
+### a2_rl_stack_pytest_v2_387401
+| Step | Reward | Pass@8 | KL | Loss | Step Time (s) | Gen Wait (s) |
+|------|--------|--------|-----|------|---------------|-------------|
+| 1 | 0.2363 | 0.4375 | 0.000000 | 0.0000 | 2779.8 | 2017.1 |
+| 2 | 0.2891 | 0.5000 | 0.000000 | -0.0000 | 1392.6 | 580.5 |
+| 3 | 0.3789 | 0.4688 | 0.000000 | -0.0000 | 1346.0 | 502.2 |
+| 4 | 0.3633 | 0.5781 | 0.000000 | 0.0000 | 2000.5 | 1084.7 |
+| 5 | 0.3184 | 0.4062 | 0.000000 | -0.0000 | 1452.0 | 548.9 |
+| 6 | 0.3301 | 0.4688 | 0.000000 | -0.0000 | 1824.4 | 918.5 |
+| 7 | 0.2793 | 0.4062 | 0.000000 | -0.0000 | 4135.6 | 3138.3 |
+| 8 | 0.4043 | 0.5156 | 0.000000 | -0.0000 | 3348.3 | 2579.1 |
+| 9 | 0.3555 | 0.4375 | 0.000000 | -0.0000 | 1826.3 | 989.4 |
+| 10 | 0.4551 | 0.5469 | 0.000000 | -0.0001 | 1651.0 | 769.6 |
+| 11 | 0.4023 | 0.5156 | 0.000000 | -0.0000 | 2182.5 | 1268.4 |
+| 12 | 0.2793 | 0.3906 | 0.000000 | 0.0000 | 1734.5 | 824.8 |
+| 13 | 0.3535 | 0.4844 | 0.000000 | 0.0000 | 1894.3 | 1000.2 |
+| 14 | 0.2578 | 0.3438 | 0.000000 | 0.0000 | 7012.0 | 5955.4 |
+## Timing Analysis
+### Average Time Breakdown (% of step time)
+| Component | Avg % of Step Time |
+|-----------|-------------------|
+| wait_for_generation_buffer | 56.5% |
+| run_training | 40.7% |
+| train_critic_and_policy | 36.4% |
+| policy_train | 36.4% |
+| fwd_logprobs_values_reward | 4.4% |
+| sync_weights | 2.6% |
+| save_hf_model | 2.4% |
+| save_checkpoints | 1.6% |
+| convert_to_training_input | 0.2% |
+| cleanup_old_checkpoints | 0.0% |
+| compute_advantages_and_returns | 0.0% |
+## vLLM Inference Engine Analysis
+Metrics from vLLM stat loggers (V1LoggingStatLoggerFixed).
+> **Note**: Ray deduplicates similar log messages with `[repeated Nx across cluster]`,
+> so we typically capture stats from one engine per timestamp. The stats shown are
+> **per-engine** values. Multiply by num_inference_engines for cluster-wide estimates.
+### Summary by Log (Per-Engine Stats)
+| Log | Avg Running/Engine | Avg Waiting/Engine | Avg Gen Throughput/Engine | Avg KV Cache % | Avg Prefix Hit % |
+|-----|-------------------|-------------------|--------------------------|----------------|------------------|
+| a2_rl_stack_pytest_v2_387401 | 3.5 | 0.0 | 69.6 tok/s | 5.9% | 85.6% |
+### Utilization Analysis (Per-Engine)
+Key indicators of inference engine utilization:
+- **Running requests/engine**: Concurrent requests being processed by each engine
+- **Waiting requests**: Requests queued (0 = engine not saturated, has spare capacity)
+- **Generation throughput**: Decode tokens/sec per engine
+  - 8B model on H100 can do **1000+ tok/s** when saturated
+  - If seeing <300 tok/s with 0 waiting, engine is **starved for requests**
+#### a2_rl_stack_pytest_v2_387401
+- **Running requests/engine**: avg=3.5, max=14
+- **Waiting requests**: avg=0.0, max=0
+- **Generation throughput/engine**: avg=69.6 tok/s, max=319.7 tok/s
+- **KV cache usage**: avg=5.9%
+- **Prefix cache hit rate**: avg=85.6%
+- ⚠️ **Underutilized**: Engines starved for requests (0 waiting, avg 3.5 running)
+  - Bottleneck is likely upstream (environment execution, not inference)
+## Trial-Level Analysis (from result.json)
+Total trials parsed: 7168
+### Turn Count Statistics
+| Metric | Value |
+|--------|-------|
+| Mean | 3.3 |
+| Median | 3.0 |
+| Std | 1.6 |
+| Min | 2 |
+| Max | 19 |
+| Count | 7168 |
+### Exception Distribution
+| Exception Type | Count | % |
+|---------------|-------|---|
+| No exception | 7143 | 99.7% |
+| AgentTimeoutError | 12 | 0.2% |
+| VerifierTimeoutError | 11 | 0.2% |
+| RuntimeError | 1 | 0.0% |
+| ContextLengthExceededError | 1 | 0.0% |
+### Turn Count by Exception Type
+| Exception Type | Mean Turns | Median Turns | Count |
+|---------------|-----------|-------------|-------|
+| ContextLengthExceededError | 15.0 | 15.0 | 1 |
+| AgentTimeoutError | 9.3 | 9.5 | 12 |
+| VerifierTimeoutError | 4.4 | 4.0 | 11 |
+| RuntimeError | 4.0 | 4.0 | 1 |
+| No exception | 3.3 | 3.0 | 7143 |
+### Turn Count by Outcome
+| Outcome | Mean Turns | Median Turns | Count |
+|---------|-----------|-------------|-------|
+| Success | 2.9 | 3.0 | 2408 |
+| Failure | 3.5 | 3.0 | 4748 |
+### Reward Summary
+- Mean reward: 0.3365
+- Success rate: 33.7%
+- Trials with reward data: 7156

training_logs/20260428_203615_metrics_table.csv ADDED Viewed

	@@ -0,0 +1,15 @@

+log_file,async/discard_rate,async/discarded_count,async/effective_batch_groups,async/effective_batch_samples,async/staleness_max,async/staleness_mean,async/staleness_min,async/staleness_ratio,generate/avg_num_tokens,generate/avg_tokens_non_zero_rewards,generate/avg_tokens_zero_rewards,generate/max_num_tokens,generate/min_num_tokens,generate/std_num_tokens,loss/avg_final_rewards,loss/avg_raw_advantages,loss/avg_raw_advantages_abs,policy/final_loss,policy/policy_entropy,policy/policy_loss,policy/policy_lr,policy/policy_update_steps,policy/ppo_clip_ratio,policy/raw_grad_norm,reward/avg_pass_at_8,reward/avg_raw_reward,system/process_rss_gb,system/process_vms_gb,system/ram_available_gb,system/ram_percent,system/ram_total_gb,system/ram_used_gb,timing/cleanup_old_checkpoints,timing/compute_advantages_and_returns,timing/convert_to_training_input,timing/fwd_logprobs_values_reward,timing/policy_train,timing/run_training,timing/save_checkpoints,timing/step,timing/sync_weights,timing/train_critic_and_policy,timing/wait_for_generation_buffer,trainer/epoch,trainer/global_step,batch_errors/total_batches,batch_errors/total_instances,batch_errors/total_successful,batch_errors/total_failed,batch_errors/total_masked,batch_errors/avg_VerifierTimeoutError,batch_errors/total_VerifierTimeoutError,timing/save_hf_model,batch_errors/avg_RuntimeError,batch_errors/total_RuntimeError,global_step
+a2_rl_stack_pytest_v2_387401,0.0,0,64,512,0,0.0,0,0.0,4741.9902,4480.4132,4822.9386,14849,1,2194.273,0.2363,0.0057,0.1467,0.0,0.1855,0.0,0.0,1.0,0.0,0.0289,0.4375,0.2363,12.8265,73.2355,355.5015,58.6,857.9688,502.4673,10.796,0.1113,2.7561,62.0087,643.9144,706.3755,43.6553,2779.7671,53.5533,644.2551,2017.0819,0,1,128.0,1024.0,1022.0,2.0,0.0,0.015625,2.0,,,,1
+a2_rl_stack_pytest_v2_387401,0.0,0,64,512,1,1.0,1,1.0,5207.4805,4587.3176,5459.6346,15353,1,2543.871,0.2891,0.0032,0.2156,-0.0,0.1818,-0.0003,0.0,1.0,0.0,0.039,0.5,0.2891,13.6703,73.4764,355.181,58.6,857.9688,502.7878,0.0108,0.0549,3.5254,71.9423,683.3507,755.6622,31.4849,1392.6153,52.8988,683.6647,580.5066,0,2,64.0,512.0,511.0,1.0,0.0,0.015625,1.0,,,,2
+a2_rl_stack_pytest_v2_387401,0.0,0,64,512,2,2.0,2,1.0,5185.8047,4404.2423,5662.6069,18658,1,2655.8627,0.3789,0.0056,0.1026,-0.0,0.1694,-0.0,0.0,1.0,0.0,0.0307,0.4688,0.3789,16.1691,73.7953,352.0549,59.0,857.9688,505.9138,0.0087,0.0759,3.5505,84.1237,702.8472,787.3638,32.8388,1345.9698,52.8027,703.1638,502.2264,0,3,64.0,512.0,510.0,1.0,0.0,0.03125,2.0,,,,3
+a2_rl_stack_pytest_v2_387401,0.0,0,64,512,3,3.0,3,1.0,6173.4297,5171.7151,6744.9601,23599,1,2913.9495,0.3633,-0.0048,0.1501,0.0,0.1671,0.0,0.0,1.0,0.0,0.0269,0.5781,0.3633,19.7277,74.2299,348.8676,59.3,857.9688,509.1012,0.0089,0.0556,4.1262,92.6868,765.1808,858.2347,33.0996,2000.5081,53.4025,765.4919,1084.7149,0,4,64.0,512.0,512.0,0.0,0.0,,,,,,4
+a2_rl_stack_pytest_v2_387401,0.0,0,64,512,4,4.0,4,1.0,6649.6289,5533.7975,7170.7765,21118,1405,3018.764,0.3184,0.002,0.0977,-0.0,0.1669,-0.0,0.0,1.0,0.0,0.0231,0.4062,0.3184,20.1003,74.4495,353.0518,58.9,857.9688,504.9169,0.0081,0.052,3.8742,89.0204,756.9617,846.374,33.0429,1451.9746,52.8114,757.3012,548.8843,0,5,64.0,512.0,512.0,0.0,0.0,,,36.8234,,,5
+a2_rl_stack_pytest_v2_387401,0.0,0,64,512,5,5.0,5,1.0,6952.5547,6301.3432,7273.414,20095,1889,2871.2745,0.3301,0.0065,0.1201,-0.0,0.1707,-0.0,0.0,1.0,0.0,0.0208,0.4688,0.3301,20.2417,74.4911,352.5635,58.9,857.9688,505.4053,0.008,0.0486,3.8545,91.1339,754.2028,845.7448,31.6035,1824.3834,56.2481,754.562,918.5016,0,6,64.0,512.0,512.0,0.0,0.0,,,,,,6
+a2_rl_stack_pytest_v2_387401,0.0,0,64,512,6,6.0,6,1.0,7821.9141,6336.3427,8397.6233,28454,2297,3823.9451,0.2793,-0.0032,0.1026,-0.0,0.1755,-0.0,0.0,1.0,0.0,0.0234,0.4062,0.2793,24.1808,74.7747,405.2173,52.8,857.9688,452.7515,0.011,0.0608,4.7397,122.4595,855.6703,978.5429,33.4132,4135.5872,14.0068,856.0222,3138.2634,0,7,64.0,512.0,512.0,0.0,0.0,,,,,,7
+a2_rl_stack_pytest_v2_387401,0.0,0,64,512,0,0.0,0,0.0,5879.5469,5305.2657,6269.3049,14895,1513,2089.5078,0.4043,0.0018,0.096,-0.0,0.1721,-0.0,0.0,1.0,0.0,0.0209,0.5156,0.4043,24.3558,75.0631,401.2662,53.2,857.9688,456.7026,0.01,0.0396,3.3153,69.9986,642.9688,713.3496,32.0209,3348.3309,52.4998,643.311,2579.128,1,8,64.0,512.0,512.0,0.0,0.0,,,,,,8
+a2_rl_stack_pytest_v2_387401,0.0,0,64,512,1,1.0,1,1.0,6734.0,6444.1044,6893.8818,18643,2175,2384.6491,0.3555,0.0017,0.1221,-0.0,0.1782,-0.0,0.0,1.0,0.0,0.0215,0.4375,0.3555,24.6022,75.2357,411.7228,52.0,857.9688,446.246,0.009,0.0439,3.6223,78.3919,702.1173,780.9468,31.0012,1826.3469,52.3499,702.5107,989.3979,1,9,64.0,512.0,509.0,2.0,0.0,0.046875,3.0,,,,9
+a2_rl_stack_pytest_v2_387401,0.0,0,64,512,2,2.0,2,1.0,6511.9746,5525.2532,7336.0108,18605,1,2862.176,0.4551,0.0042,0.1035,-0.0001,0.1794,-0.002,0.0,1.0,0.0,0.0187,0.5469,0.4551,24.6203,75.1738,409.3499,52.3,857.9688,448.6188,0.0111,0.044,3.5639,90.4193,734.5286,825.4112,31.3292,1651.042,52.4615,734.9477,769.5638,1,10,64.0,512.0,512.0,0.0,0.0,,,36.5961,,,10
+a2_rl_stack_pytest_v2_387401,0.0,0,64,512,3,3.0,3,1.0,7150.2969,5831.9757,8037.7941,22262,1340,2965.0407,0.4023,0.0008,0.1053,-0.0,0.1771,-0.0,0.0,1.0,0.0,0.0194,0.5156,0.4023,24.8219,75.2715,408.2212,52.4,857.9688,449.7476,0.0086,0.0515,4.1267,95.6788,762.1538,858.3374,32.4094,2182.5423,51.6537,762.6066,1268.3906,1,11,64.0,512.0,511.0,1.0,0.0,0.015625,1.0,,,,11
+a2_rl_stack_pytest_v2_387401,0.0,0,64,512,4,4.0,4,1.0,7161.0645,6208.9091,7530.0569,19269,1,2859.9143,0.2793,-0.0043,0.1026,0.0,0.1809,0.0,0.0,1.0,0.0,0.0175,0.3906,0.2793,24.8525,75.2184,415.8303,51.5,857.9688,442.1384,0.0083,0.0535,3.7269,95.4867,753.3097,849.3001,32.3294,1734.4666,56.6358,753.7596,824.7671,1,12,64.0,512.0,509.0,2.0,0.0,0.03125,2.0,,0.015625,1.0,12
+a2_rl_stack_pytest_v2_387401,0.0,0,64,512,5,5.0,5,1.0,7395.9434,6484.1768,7894.5227,19744,1,2920.1255,0.3535,0.0007,0.1193,0.0,0.1803,0.0,0.0,1.0,0.0,0.0186,0.4844,0.3535,24.9543,75.2349,412.6592,51.9,857.9688,445.3096,0.0093,0.05,3.7965,93.9943,736.3408,830.7211,32.1481,1894.2929,59.5623,736.6765,1000.1749,1,13,64.0,512.0,512.0,0.0,0.0,,,,,,13
+a2_rl_stack_pytest_v2_387401,0.0,0,64,512,6,6.0,6,1.0,8946.791,7245.6212,9537.7237,29403,2237,4155.2128,0.2578,0.002,0.073,0.0,0.1889,0.0,0.0,1.0,0.0,0.0133,0.3438,0.2578,26.1139,75.4758,416.4844,51.5,857.9688,441.4843,0.0095,0.0622,4.9616,132.9495,904.4659,1037.814,32.4082,7012.0493,13.8671,904.8018,5955.3685,1,14,,,,,,,,,,,14

training_logs/20260428_203615_reward_vs_steps.png ADDED Viewed

Git LFS Details

SHA256: b76373f5ca18051354f9b4d1e962ecf1aa8d1aa5b0b6e5bf39148f0cc4f30874
Pointer size: 131 Bytes
Size of remote file: 145 kB

training_logs/20260428_203615_trial_results.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

training_logs/20260428_203615_turn_count_distribution.png ADDED Viewed

training_logs/20260428_203615_vllm_metrics_a2_rl_stack_pytest_v2_387401.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

training_logs/20260428_203615_vllm_metrics_table.csv ADDED Viewed

The diff for this file is too large to render. See raw diff