Text Classification
Transformers
PyTorch
English
electra
reward-model
reward_model
RLHF
Inference Endpoints
theblackcat102's picture
Upload 12 files
ae6162c
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.8507242605603262,
"global_step": 4000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.62e-07,
"loss": 0.6957,
"step": 10
},
{
"epoch": 0.01,
"learning_rate": 3.42e-07,
"loss": 0.6958,
"step": 20
},
{
"epoch": 0.01,
"learning_rate": 5.22e-07,
"loss": 0.6916,
"step": 30
},
{
"epoch": 0.02,
"learning_rate": 7.02e-07,
"loss": 0.6959,
"step": 40
},
{
"epoch": 0.02,
"learning_rate": 8.820000000000001e-07,
"loss": 0.6927,
"step": 50
},
{
"epoch": 0.03,
"learning_rate": 1.062e-06,
"loss": 0.6921,
"step": 60
},
{
"epoch": 0.03,
"learning_rate": 1.2420000000000001e-06,
"loss": 0.691,
"step": 70
},
{
"epoch": 0.04,
"learning_rate": 1.4220000000000001e-06,
"loss": 0.6892,
"step": 80
},
{
"epoch": 0.04,
"learning_rate": 1.6019999999999999e-06,
"loss": 0.6881,
"step": 90
},
{
"epoch": 0.05,
"learning_rate": 1.782e-06,
"loss": 0.6857,
"step": 100
},
{
"epoch": 0.05,
"learning_rate": 1.962e-06,
"loss": 0.6836,
"step": 110
},
{
"epoch": 0.06,
"learning_rate": 2.142e-06,
"loss": 0.6802,
"step": 120
},
{
"epoch": 0.06,
"learning_rate": 2.322e-06,
"loss": 0.6714,
"step": 130
},
{
"epoch": 0.06,
"learning_rate": 2.4840000000000003e-06,
"loss": 0.6661,
"step": 140
},
{
"epoch": 0.07,
"learning_rate": 2.664e-06,
"loss": 0.6599,
"step": 150
},
{
"epoch": 0.07,
"learning_rate": 2.826e-06,
"loss": 0.6524,
"step": 160
},
{
"epoch": 0.08,
"learning_rate": 3.006e-06,
"loss": 0.6362,
"step": 170
},
{
"epoch": 0.08,
"learning_rate": 3.168e-06,
"loss": 0.632,
"step": 180
},
{
"epoch": 0.09,
"learning_rate": 3.348e-06,
"loss": 0.6301,
"step": 190
},
{
"epoch": 0.09,
"learning_rate": 3.5280000000000004e-06,
"loss": 0.6272,
"step": 200
},
{
"epoch": 0.1,
"learning_rate": 3.708e-06,
"loss": 0.6172,
"step": 210
},
{
"epoch": 0.1,
"learning_rate": 3.87e-06,
"loss": 0.6146,
"step": 220
},
{
"epoch": 0.11,
"learning_rate": 4.05e-06,
"loss": 0.606,
"step": 230
},
{
"epoch": 0.11,
"learning_rate": 4.23e-06,
"loss": 0.6194,
"step": 240
},
{
"epoch": 0.12,
"learning_rate": 4.392e-06,
"loss": 0.5863,
"step": 250
},
{
"epoch": 0.12,
"eval_webgpt_accuracy": 0.547752808988764,
"eval_webgpt_loss": 0.7199520468711853,
"eval_webgpt_runtime": 152.9037,
"eval_webgpt_samples_per_second": 25.611,
"eval_webgpt_steps_per_second": 2.564,
"step": 250
},
{
"epoch": 0.12,
"eval_hfsummary_accuracy": 0.630263277211861,
"eval_hfsummary_loss": 0.6598580479621887,
"eval_hfsummary_runtime": 2471.5424,
"eval_hfsummary_samples_per_second": 13.386,
"eval_hfsummary_steps_per_second": 1.339,
"step": 250
},
{
"epoch": 0.12,
"eval_gptsynthetic_accuracy": 0.9978883861236802,
"eval_gptsynthetic_loss": 0.09524902701377869,
"eval_gptsynthetic_runtime": 116.7014,
"eval_gptsynthetic_samples_per_second": 28.406,
"eval_gptsynthetic_steps_per_second": 2.845,
"step": 250
},
{
"epoch": 0.12,
"learning_rate": 4.5720000000000004e-06,
"loss": 0.5818,
"step": 260
},
{
"epoch": 0.12,
"learning_rate": 4.752e-06,
"loss": 0.5817,
"step": 270
},
{
"epoch": 0.13,
"learning_rate": 4.932e-06,
"loss": 0.5713,
"step": 280
},
{
"epoch": 0.13,
"learning_rate": 5.1119999999999995e-06,
"loss": 0.5787,
"step": 290
},
{
"epoch": 0.14,
"learning_rate": 5.2919999999999995e-06,
"loss": 0.566,
"step": 300
},
{
"epoch": 0.14,
"learning_rate": 5.472e-06,
"loss": 0.5765,
"step": 310
},
{
"epoch": 0.15,
"learning_rate": 5.652e-06,
"loss": 0.5286,
"step": 320
},
{
"epoch": 0.15,
"learning_rate": 5.814000000000001e-06,
"loss": 0.5644,
"step": 330
},
{
"epoch": 0.16,
"learning_rate": 5.9940000000000005e-06,
"loss": 0.5278,
"step": 340
},
{
"epoch": 0.16,
"learning_rate": 6.1740000000000005e-06,
"loss": 0.5376,
"step": 350
},
{
"epoch": 0.17,
"learning_rate": 6.354e-06,
"loss": 0.5396,
"step": 360
},
{
"epoch": 0.17,
"learning_rate": 6.534e-06,
"loss": 0.517,
"step": 370
},
{
"epoch": 0.18,
"learning_rate": 6.7140000000000004e-06,
"loss": 0.5001,
"step": 380
},
{
"epoch": 0.18,
"learning_rate": 6.894e-06,
"loss": 0.5069,
"step": 390
},
{
"epoch": 0.19,
"learning_rate": 7.074e-06,
"loss": 0.5102,
"step": 400
},
{
"epoch": 0.19,
"learning_rate": 7.254e-06,
"loss": 0.5105,
"step": 410
},
{
"epoch": 0.19,
"learning_rate": 7.4339999999999995e-06,
"loss": 0.5332,
"step": 420
},
{
"epoch": 0.2,
"learning_rate": 7.614e-06,
"loss": 0.5076,
"step": 430
},
{
"epoch": 0.2,
"learning_rate": 7.794e-06,
"loss": 0.5088,
"step": 440
},
{
"epoch": 0.21,
"learning_rate": 7.974e-06,
"loss": 0.4968,
"step": 450
},
{
"epoch": 0.21,
"learning_rate": 8.154e-06,
"loss": 0.4902,
"step": 460
},
{
"epoch": 0.22,
"learning_rate": 8.334e-06,
"loss": 0.5097,
"step": 470
},
{
"epoch": 0.22,
"learning_rate": 8.514e-06,
"loss": 0.5038,
"step": 480
},
{
"epoch": 0.23,
"learning_rate": 8.694e-06,
"loss": 0.5082,
"step": 490
},
{
"epoch": 0.23,
"learning_rate": 8.874e-06,
"loss": 0.4887,
"step": 500
},
{
"epoch": 0.23,
"eval_webgpt_accuracy": 0.5623084780388151,
"eval_webgpt_loss": 0.7031316161155701,
"eval_webgpt_runtime": 152.8509,
"eval_webgpt_samples_per_second": 25.62,
"eval_webgpt_steps_per_second": 2.565,
"step": 500
},
{
"epoch": 0.23,
"eval_hfsummary_accuracy": 0.6744249312335641,
"eval_hfsummary_loss": 0.6107630729675293,
"eval_hfsummary_runtime": 2471.6908,
"eval_hfsummary_samples_per_second": 13.385,
"eval_hfsummary_steps_per_second": 1.339,
"step": 500
},
{
"epoch": 0.23,
"eval_gptsynthetic_accuracy": 0.9987933634992459,
"eval_gptsynthetic_loss": 0.023942505940794945,
"eval_gptsynthetic_runtime": 116.8138,
"eval_gptsynthetic_samples_per_second": 28.379,
"eval_gptsynthetic_steps_per_second": 2.842,
"step": 500
},
{
"epoch": 0.24,
"learning_rate": 8.99293563579278e-06,
"loss": 0.4875,
"step": 510
},
{
"epoch": 0.24,
"learning_rate": 8.969387755102041e-06,
"loss": 0.483,
"step": 520
},
{
"epoch": 0.25,
"learning_rate": 8.945839874411304e-06,
"loss": 0.5057,
"step": 530
},
{
"epoch": 0.25,
"learning_rate": 8.922291993720566e-06,
"loss": 0.4923,
"step": 540
},
{
"epoch": 0.25,
"learning_rate": 8.898744113029827e-06,
"loss": 0.4623,
"step": 550
},
{
"epoch": 0.26,
"learning_rate": 8.87519623233909e-06,
"loss": 0.4735,
"step": 560
},
{
"epoch": 0.26,
"learning_rate": 8.851648351648352e-06,
"loss": 0.4516,
"step": 570
},
{
"epoch": 0.27,
"learning_rate": 8.828100470957614e-06,
"loss": 0.4542,
"step": 580
},
{
"epoch": 0.27,
"learning_rate": 8.804552590266875e-06,
"loss": 0.465,
"step": 590
},
{
"epoch": 0.28,
"learning_rate": 8.781004709576138e-06,
"loss": 0.4577,
"step": 600
},
{
"epoch": 0.28,
"learning_rate": 8.7574568288854e-06,
"loss": 0.4994,
"step": 610
},
{
"epoch": 0.29,
"learning_rate": 8.733908948194663e-06,
"loss": 0.4565,
"step": 620
},
{
"epoch": 0.29,
"learning_rate": 8.710361067503925e-06,
"loss": 0.4617,
"step": 630
},
{
"epoch": 0.3,
"learning_rate": 8.686813186813188e-06,
"loss": 0.4598,
"step": 640
},
{
"epoch": 0.3,
"learning_rate": 8.663265306122449e-06,
"loss": 0.4441,
"step": 650
},
{
"epoch": 0.31,
"learning_rate": 8.639717425431711e-06,
"loss": 0.4935,
"step": 660
},
{
"epoch": 0.31,
"learning_rate": 8.616169544740974e-06,
"loss": 0.4646,
"step": 670
},
{
"epoch": 0.31,
"learning_rate": 8.592621664050236e-06,
"loss": 0.4771,
"step": 680
},
{
"epoch": 0.32,
"learning_rate": 8.569073783359497e-06,
"loss": 0.4754,
"step": 690
},
{
"epoch": 0.32,
"learning_rate": 8.54552590266876e-06,
"loss": 0.4684,
"step": 700
},
{
"epoch": 0.33,
"learning_rate": 8.521978021978022e-06,
"loss": 0.4691,
"step": 710
},
{
"epoch": 0.33,
"learning_rate": 8.498430141287285e-06,
"loss": 0.4807,
"step": 720
},
{
"epoch": 0.34,
"learning_rate": 8.474882260596547e-06,
"loss": 0.4597,
"step": 730
},
{
"epoch": 0.34,
"learning_rate": 8.45133437990581e-06,
"loss": 0.455,
"step": 740
},
{
"epoch": 0.35,
"learning_rate": 8.42778649921507e-06,
"loss": 0.4561,
"step": 750
},
{
"epoch": 0.35,
"eval_webgpt_accuracy": 0.5702247191011236,
"eval_webgpt_loss": 0.6994287371635437,
"eval_webgpt_runtime": 152.9386,
"eval_webgpt_samples_per_second": 25.605,
"eval_webgpt_steps_per_second": 2.563,
"step": 750
},
{
"epoch": 0.35,
"eval_hfsummary_accuracy": 0.6827373575552399,
"eval_hfsummary_loss": 0.60725337266922,
"eval_hfsummary_runtime": 2470.4498,
"eval_hfsummary_samples_per_second": 13.391,
"eval_hfsummary_steps_per_second": 1.339,
"step": 750
},
{
"epoch": 0.35,
"eval_gptsynthetic_accuracy": 0.9975867269984917,
"eval_gptsynthetic_loss": 0.01110268384218216,
"eval_gptsynthetic_runtime": 116.8749,
"eval_gptsynthetic_samples_per_second": 28.364,
"eval_gptsynthetic_steps_per_second": 2.841,
"step": 750
},
{
"epoch": 0.35,
"learning_rate": 8.404238618524333e-06,
"loss": 0.4578,
"step": 760
},
{
"epoch": 0.36,
"learning_rate": 8.380690737833595e-06,
"loss": 0.4798,
"step": 770
},
{
"epoch": 0.36,
"learning_rate": 8.359497645211932e-06,
"loss": 0.4402,
"step": 780
},
{
"epoch": 0.37,
"learning_rate": 8.335949764521193e-06,
"loss": 0.4632,
"step": 790
},
{
"epoch": 0.37,
"learning_rate": 8.312401883830455e-06,
"loss": 0.4505,
"step": 800
},
{
"epoch": 0.37,
"learning_rate": 8.288854003139718e-06,
"loss": 0.4641,
"step": 810
},
{
"epoch": 0.38,
"learning_rate": 8.26530612244898e-06,
"loss": 0.4373,
"step": 820
},
{
"epoch": 0.38,
"learning_rate": 8.241758241758241e-06,
"loss": 0.4192,
"step": 830
},
{
"epoch": 0.39,
"learning_rate": 8.218210361067504e-06,
"loss": 0.4671,
"step": 840
},
{
"epoch": 0.39,
"learning_rate": 8.194662480376766e-06,
"loss": 0.4481,
"step": 850
},
{
"epoch": 0.4,
"learning_rate": 8.171114599686029e-06,
"loss": 0.4481,
"step": 860
},
{
"epoch": 0.4,
"learning_rate": 8.147566718995291e-06,
"loss": 0.4317,
"step": 870
},
{
"epoch": 0.41,
"learning_rate": 8.124018838304554e-06,
"loss": 0.4517,
"step": 880
},
{
"epoch": 0.41,
"learning_rate": 8.100470957613814e-06,
"loss": 0.4282,
"step": 890
},
{
"epoch": 0.42,
"learning_rate": 8.076923076923077e-06,
"loss": 0.4543,
"step": 900
},
{
"epoch": 0.42,
"learning_rate": 8.05337519623234e-06,
"loss": 0.4303,
"step": 910
},
{
"epoch": 0.43,
"learning_rate": 8.029827315541602e-06,
"loss": 0.4423,
"step": 920
},
{
"epoch": 0.43,
"learning_rate": 8.006279434850863e-06,
"loss": 0.4624,
"step": 930
},
{
"epoch": 0.43,
"learning_rate": 7.982731554160125e-06,
"loss": 0.4318,
"step": 940
},
{
"epoch": 0.44,
"learning_rate": 7.959183673469388e-06,
"loss": 0.4374,
"step": 950
},
{
"epoch": 0.44,
"learning_rate": 7.93563579277865e-06,
"loss": 0.4271,
"step": 960
},
{
"epoch": 0.45,
"learning_rate": 7.912087912087913e-06,
"loss": 0.4535,
"step": 970
},
{
"epoch": 0.45,
"learning_rate": 7.888540031397175e-06,
"loss": 0.4393,
"step": 980
},
{
"epoch": 0.46,
"learning_rate": 7.864992150706436e-06,
"loss": 0.4513,
"step": 990
},
{
"epoch": 0.46,
"learning_rate": 7.841444270015699e-06,
"loss": 0.4259,
"step": 1000
},
{
"epoch": 0.46,
"eval_webgpt_accuracy": 0.5720122574055159,
"eval_webgpt_loss": 0.6880369782447815,
"eval_webgpt_runtime": 153.0376,
"eval_webgpt_samples_per_second": 25.588,
"eval_webgpt_steps_per_second": 2.561,
"step": 1000
},
{
"epoch": 0.46,
"eval_hfsummary_accuracy": 0.6844905238339933,
"eval_hfsummary_loss": 0.6170333027839661,
"eval_hfsummary_runtime": 2474.9308,
"eval_hfsummary_samples_per_second": 13.367,
"eval_hfsummary_steps_per_second": 1.337,
"step": 1000
},
{
"epoch": 0.46,
"eval_gptsynthetic_accuracy": 0.9978883861236802,
"eval_gptsynthetic_loss": 0.014577150344848633,
"eval_gptsynthetic_runtime": 116.8198,
"eval_gptsynthetic_samples_per_second": 28.377,
"eval_gptsynthetic_steps_per_second": 2.842,
"step": 1000
},
{
"epoch": 0.47,
"learning_rate": 7.817896389324961e-06,
"loss": 0.449,
"step": 1010
},
{
"epoch": 0.47,
"learning_rate": 7.794348508634224e-06,
"loss": 0.4274,
"step": 1020
},
{
"epoch": 0.48,
"learning_rate": 7.770800627943485e-06,
"loss": 0.4014,
"step": 1030
},
{
"epoch": 0.48,
"learning_rate": 7.747252747252747e-06,
"loss": 0.4356,
"step": 1040
},
{
"epoch": 0.49,
"learning_rate": 7.72370486656201e-06,
"loss": 0.4378,
"step": 1050
},
{
"epoch": 0.49,
"learning_rate": 7.700156985871272e-06,
"loss": 0.4298,
"step": 1060
},
{
"epoch": 0.49,
"learning_rate": 7.676609105180535e-06,
"loss": 0.4514,
"step": 1070
},
{
"epoch": 0.5,
"learning_rate": 7.653061224489796e-06,
"loss": 0.4526,
"step": 1080
},
{
"epoch": 0.5,
"learning_rate": 7.629513343799058e-06,
"loss": 0.4192,
"step": 1090
},
{
"epoch": 0.51,
"learning_rate": 7.60596546310832e-06,
"loss": 0.4433,
"step": 1100
},
{
"epoch": 0.51,
"learning_rate": 7.582417582417582e-06,
"loss": 0.4199,
"step": 1110
},
{
"epoch": 0.52,
"learning_rate": 7.558869701726845e-06,
"loss": 0.4419,
"step": 1120
},
{
"epoch": 0.52,
"learning_rate": 7.535321821036107e-06,
"loss": 0.4163,
"step": 1130
},
{
"epoch": 0.53,
"learning_rate": 7.511773940345369e-06,
"loss": 0.411,
"step": 1140
},
{
"epoch": 0.53,
"learning_rate": 7.4882260596546314e-06,
"loss": 0.4492,
"step": 1150
},
{
"epoch": 0.54,
"learning_rate": 7.464678178963893e-06,
"loss": 0.454,
"step": 1160
},
{
"epoch": 0.54,
"learning_rate": 7.441130298273156e-06,
"loss": 0.4311,
"step": 1170
},
{
"epoch": 0.55,
"learning_rate": 7.417582417582417e-06,
"loss": 0.4368,
"step": 1180
},
{
"epoch": 0.55,
"learning_rate": 7.39403453689168e-06,
"loss": 0.4264,
"step": 1190
},
{
"epoch": 0.56,
"learning_rate": 7.3704866562009415e-06,
"loss": 0.4163,
"step": 1200
},
{
"epoch": 0.56,
"learning_rate": 7.346938775510204e-06,
"loss": 0.4214,
"step": 1210
},
{
"epoch": 0.56,
"learning_rate": 7.3233908948194665e-06,
"loss": 0.4263,
"step": 1220
},
{
"epoch": 0.57,
"learning_rate": 7.299843014128729e-06,
"loss": 0.4233,
"step": 1230
},
{
"epoch": 0.57,
"learning_rate": 7.276295133437991e-06,
"loss": 0.4331,
"step": 1240
},
{
"epoch": 0.58,
"learning_rate": 7.252747252747253e-06,
"loss": 0.4241,
"step": 1250
},
{
"epoch": 0.58,
"eval_webgpt_accuracy": 0.5806945863125639,
"eval_webgpt_loss": 0.6766389608383179,
"eval_webgpt_runtime": 153.0189,
"eval_webgpt_samples_per_second": 25.592,
"eval_webgpt_steps_per_second": 2.562,
"step": 1250
},
{
"epoch": 0.58,
"eval_hfsummary_accuracy": 0.6927122691412508,
"eval_hfsummary_loss": 0.6117472648620605,
"eval_hfsummary_runtime": 2473.4013,
"eval_hfsummary_samples_per_second": 13.376,
"eval_hfsummary_steps_per_second": 1.338,
"step": 1250
},
{
"epoch": 0.58,
"eval_gptsynthetic_accuracy": 0.9987933634992459,
"eval_gptsynthetic_loss": 0.007306403946131468,
"eval_gptsynthetic_runtime": 116.8034,
"eval_gptsynthetic_samples_per_second": 28.381,
"eval_gptsynthetic_steps_per_second": 2.842,
"step": 1250
},
{
"epoch": 0.58,
"learning_rate": 7.229199372056515e-06,
"loss": 0.3976,
"step": 1260
},
{
"epoch": 0.59,
"learning_rate": 7.205651491365777e-06,
"loss": 0.4221,
"step": 1270
},
{
"epoch": 0.59,
"learning_rate": 7.182103610675039e-06,
"loss": 0.4225,
"step": 1280
},
{
"epoch": 0.6,
"learning_rate": 7.158555729984302e-06,
"loss": 0.3961,
"step": 1290
},
{
"epoch": 0.6,
"learning_rate": 7.135007849293563e-06,
"loss": 0.4348,
"step": 1300
},
{
"epoch": 0.61,
"learning_rate": 7.111459968602826e-06,
"loss": 0.4146,
"step": 1310
},
{
"epoch": 0.61,
"learning_rate": 7.087912087912088e-06,
"loss": 0.4122,
"step": 1320
},
{
"epoch": 0.62,
"learning_rate": 7.064364207221351e-06,
"loss": 0.4126,
"step": 1330
},