nid989's picture
Upload trainer_state.json
96faccd
raw history blame
No virus
31.1 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.25033952297803896,
"global_step": 40000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 4.984353779813873e-05,
"loss": 2.5206,
"step": 500
},
{
"epoch": 0.0,
"eval_bleu": 38.3843,
"eval_gen_len": 23.8603,
"eval_loss": 1.8556830883026123,
"eval_runtime": 1197.8495,
"eval_samples_per_second": 2.515,
"eval_steps_per_second": 0.629,
"step": 500
},
{
"epoch": 0.01,
"learning_rate": 4.9687075596277455e-05,
"loss": 1.9572,
"step": 1000
},
{
"epoch": 0.01,
"eval_bleu": 22.3733,
"eval_gen_len": 18.9091,
"eval_loss": 2.9118094444274902,
"eval_runtime": 1006.4293,
"eval_samples_per_second": 2.994,
"eval_steps_per_second": 0.749,
"step": 1000
},
{
"epoch": 0.01,
"learning_rate": 4.953061339441618e-05,
"loss": 1.7653,
"step": 1500
},
{
"epoch": 0.01,
"eval_bleu": 38.119,
"eval_gen_len": 23.6728,
"eval_loss": 1.9158709049224854,
"eval_runtime": 1104.4939,
"eval_samples_per_second": 2.728,
"eval_steps_per_second": 0.683,
"step": 1500
},
{
"epoch": 0.01,
"learning_rate": 4.937415119255491e-05,
"loss": 1.715,
"step": 2000
},
{
"epoch": 0.01,
"eval_bleu": 37.8451,
"eval_gen_len": 23.7046,
"eval_loss": 1.9664654731750488,
"eval_runtime": 1119.4046,
"eval_samples_per_second": 2.692,
"eval_steps_per_second": 0.674,
"step": 2000
},
{
"epoch": 0.02,
"learning_rate": 4.921768899069363e-05,
"loss": 1.7164,
"step": 2500
},
{
"epoch": 0.02,
"eval_bleu": 36.315,
"eval_gen_len": 23.1172,
"eval_loss": 1.8593918085098267,
"eval_runtime": 1061.9031,
"eval_samples_per_second": 2.837,
"eval_steps_per_second": 0.71,
"step": 2500
},
{
"epoch": 0.02,
"learning_rate": 4.906122678883235e-05,
"loss": 1.616,
"step": 3000
},
{
"epoch": 0.02,
"eval_bleu": 37.6941,
"eval_gen_len": 23.617,
"eval_loss": 1.816091537475586,
"eval_runtime": 1096.7235,
"eval_samples_per_second": 2.747,
"eval_steps_per_second": 0.688,
"step": 3000
},
{
"epoch": 0.02,
"learning_rate": 4.890476458697108e-05,
"loss": 1.6131,
"step": 3500
},
{
"epoch": 0.02,
"eval_bleu": 34.7085,
"eval_gen_len": 22.4826,
"eval_loss": 1.9029065370559692,
"eval_runtime": 1058.2776,
"eval_samples_per_second": 2.847,
"eval_steps_per_second": 0.712,
"step": 3500
},
{
"epoch": 0.03,
"learning_rate": 4.8748302385109805e-05,
"loss": 1.5981,
"step": 4000
},
{
"epoch": 0.03,
"eval_bleu": 37.0045,
"eval_gen_len": 24.0611,
"eval_loss": 1.9932270050048828,
"eval_runtime": 1166.939,
"eval_samples_per_second": 2.582,
"eval_steps_per_second": 0.646,
"step": 4000
},
{
"epoch": 0.03,
"learning_rate": 4.859184018324853e-05,
"loss": 1.5898,
"step": 4500
},
{
"epoch": 0.03,
"eval_bleu": 38.2992,
"eval_gen_len": 24.2121,
"eval_loss": 1.874466896057129,
"eval_runtime": 1133.0065,
"eval_samples_per_second": 2.659,
"eval_steps_per_second": 0.665,
"step": 4500
},
{
"epoch": 0.03,
"learning_rate": 4.843537798138726e-05,
"loss": 1.5582,
"step": 5000
},
{
"epoch": 0.03,
"eval_bleu": 36.9851,
"eval_gen_len": 23.4202,
"eval_loss": 1.8227670192718506,
"eval_runtime": 1093.0431,
"eval_samples_per_second": 2.757,
"eval_steps_per_second": 0.69,
"step": 5000
},
{
"epoch": 0.03,
"learning_rate": 4.8278915779525984e-05,
"loss": 1.7726,
"step": 5500
},
{
"epoch": 0.03,
"eval_bleu": 32.3628,
"eval_gen_len": 22.3083,
"eval_loss": 2.096940755844116,
"eval_runtime": 1059.9352,
"eval_samples_per_second": 2.843,
"eval_steps_per_second": 0.711,
"step": 5500
},
{
"epoch": 0.04,
"learning_rate": 4.812245357766471e-05,
"loss": 1.7102,
"step": 6000
},
{
"epoch": 0.04,
"eval_bleu": 36.1748,
"eval_gen_len": 24.1942,
"eval_loss": 2.018200397491455,
"eval_runtime": 1129.5162,
"eval_samples_per_second": 2.668,
"eval_steps_per_second": 0.668,
"step": 6000
},
{
"epoch": 0.04,
"learning_rate": 4.7965991375803436e-05,
"loss": 1.521,
"step": 6500
},
{
"epoch": 0.04,
"eval_bleu": 32.2516,
"eval_gen_len": 28.4076,
"eval_loss": 2.057112693786621,
"eval_runtime": 1610.6798,
"eval_samples_per_second": 1.871,
"eval_steps_per_second": 0.468,
"step": 6500
},
{
"epoch": 0.04,
"learning_rate": 4.780952917394216e-05,
"loss": 1.6184,
"step": 7000
},
{
"epoch": 0.04,
"eval_bleu": 37.8637,
"eval_gen_len": 23.6844,
"eval_loss": 1.8763169050216675,
"eval_runtime": 1099.6674,
"eval_samples_per_second": 2.74,
"eval_steps_per_second": 0.686,
"step": 7000
},
{
"epoch": 0.05,
"learning_rate": 4.765306697208088e-05,
"loss": 1.5384,
"step": 7500
},
{
"epoch": 0.05,
"eval_bleu": 37.7753,
"eval_gen_len": 23.8984,
"eval_loss": 1.8554104566574097,
"eval_runtime": 1111.1157,
"eval_samples_per_second": 2.712,
"eval_steps_per_second": 0.679,
"step": 7500
},
{
"epoch": 0.05,
"learning_rate": 4.749660477021961e-05,
"loss": 1.5485,
"step": 8000
},
{
"epoch": 0.05,
"eval_bleu": 37.1929,
"eval_gen_len": 24.0408,
"eval_loss": 1.9456806182861328,
"eval_runtime": 1136.0111,
"eval_samples_per_second": 2.652,
"eval_steps_per_second": 0.664,
"step": 8000
},
{
"epoch": 0.05,
"learning_rate": 4.7340142568358334e-05,
"loss": 1.4872,
"step": 8500
},
{
"epoch": 0.05,
"eval_bleu": 35.4276,
"eval_gen_len": 23.2008,
"eval_loss": 1.8643290996551514,
"eval_runtime": 1078.1924,
"eval_samples_per_second": 2.794,
"eval_steps_per_second": 0.699,
"step": 8500
},
{
"epoch": 0.06,
"learning_rate": 4.718368036649706e-05,
"loss": 1.4412,
"step": 9000
},
{
"epoch": 0.06,
"eval_bleu": 35.7002,
"eval_gen_len": 23.613,
"eval_loss": 1.910874366760254,
"eval_runtime": 1134.7549,
"eval_samples_per_second": 2.655,
"eval_steps_per_second": 0.664,
"step": 9000
},
{
"epoch": 0.06,
"learning_rate": 4.702721816463579e-05,
"loss": 1.4483,
"step": 9500
},
{
"epoch": 0.06,
"eval_bleu": 37.6828,
"eval_gen_len": 24.0441,
"eval_loss": 1.8838558197021484,
"eval_runtime": 1126.6767,
"eval_samples_per_second": 2.674,
"eval_steps_per_second": 0.669,
"step": 9500
},
{
"epoch": 0.06,
"learning_rate": 4.687075596277451e-05,
"loss": 1.4523,
"step": 10000
},
{
"epoch": 0.06,
"eval_bleu": 36.2473,
"eval_gen_len": 23.3561,
"eval_loss": 1.940147042274475,
"eval_runtime": 1087.7551,
"eval_samples_per_second": 2.77,
"eval_steps_per_second": 0.693,
"step": 10000
},
{
"epoch": 0.07,
"learning_rate": 4.671429376091324e-05,
"loss": 1.4405,
"step": 10500
},
{
"epoch": 0.07,
"eval_bleu": 35.6564,
"eval_gen_len": 22.9851,
"eval_loss": 1.8357064723968506,
"eval_runtime": 1082.157,
"eval_samples_per_second": 2.784,
"eval_steps_per_second": 0.697,
"step": 10500
},
{
"epoch": 0.07,
"learning_rate": 4.6557831559051965e-05,
"loss": 1.3966,
"step": 11000
},
{
"epoch": 0.07,
"eval_bleu": 37.8601,
"eval_gen_len": 24.1235,
"eval_loss": 1.8839383125305176,
"eval_runtime": 1137.6484,
"eval_samples_per_second": 2.648,
"eval_steps_per_second": 0.663,
"step": 11000
},
{
"epoch": 0.07,
"learning_rate": 4.640136935719069e-05,
"loss": 1.415,
"step": 11500
},
{
"epoch": 0.07,
"eval_bleu": 35.9576,
"eval_gen_len": 23.4434,
"eval_loss": 1.868781566619873,
"eval_runtime": 1187.6496,
"eval_samples_per_second": 2.537,
"eval_steps_per_second": 0.635,
"step": 11500
},
{
"epoch": 0.08,
"learning_rate": 4.624490715532942e-05,
"loss": 1.4306,
"step": 12000
},
{
"epoch": 0.08,
"eval_bleu": 37.8482,
"eval_gen_len": 24.2154,
"eval_loss": 1.8083645105361938,
"eval_runtime": 1247.7304,
"eval_samples_per_second": 2.415,
"eval_steps_per_second": 0.604,
"step": 12000
},
{
"epoch": 0.08,
"learning_rate": 4.6088444953468144e-05,
"loss": 1.3337,
"step": 12500
},
{
"epoch": 0.08,
"eval_bleu": 37.4047,
"eval_gen_len": 23.692,
"eval_loss": 1.798228144645691,
"eval_runtime": 1200.6209,
"eval_samples_per_second": 2.51,
"eval_steps_per_second": 0.628,
"step": 12500
},
{
"epoch": 0.08,
"learning_rate": 4.593198275160687e-05,
"loss": 1.4188,
"step": 13000
},
{
"epoch": 0.08,
"eval_bleu": 36.76,
"eval_gen_len": 23.5446,
"eval_loss": 1.8160576820373535,
"eval_runtime": 1178.8918,
"eval_samples_per_second": 2.556,
"eval_steps_per_second": 0.64,
"step": 13000
},
{
"epoch": 0.08,
"learning_rate": 4.5775520549745596e-05,
"loss": 1.3576,
"step": 13500
},
{
"epoch": 0.08,
"eval_bleu": 37.1149,
"eval_gen_len": 23.6963,
"eval_loss": 1.8919538259506226,
"eval_runtime": 1218.0527,
"eval_samples_per_second": 2.474,
"eval_steps_per_second": 0.619,
"step": 13500
},
{
"epoch": 0.09,
"learning_rate": 4.561905834788432e-05,
"loss": 1.4464,
"step": 14000
},
{
"epoch": 0.09,
"eval_bleu": 38.2523,
"eval_gen_len": 24.2449,
"eval_loss": 1.8881698846817017,
"eval_runtime": 1185.5541,
"eval_samples_per_second": 2.541,
"eval_steps_per_second": 0.636,
"step": 14000
},
{
"epoch": 0.09,
"learning_rate": 4.546259614602305e-05,
"loss": 1.3548,
"step": 14500
},
{
"epoch": 0.09,
"eval_bleu": 37.0608,
"eval_gen_len": 23.6077,
"eval_loss": 1.8448089361190796,
"eval_runtime": 1093.1892,
"eval_samples_per_second": 2.756,
"eval_steps_per_second": 0.69,
"step": 14500
},
{
"epoch": 0.09,
"learning_rate": 4.5306133944161775e-05,
"loss": 1.3779,
"step": 15000
},
{
"epoch": 0.09,
"eval_bleu": 37.2558,
"eval_gen_len": 23.5888,
"eval_loss": 1.868304967880249,
"eval_runtime": 1104.5613,
"eval_samples_per_second": 2.728,
"eval_steps_per_second": 0.683,
"step": 15000
},
{
"epoch": 0.1,
"learning_rate": 4.51496717423005e-05,
"loss": 1.3565,
"step": 15500
},
{
"epoch": 0.1,
"eval_bleu": 38.3579,
"eval_gen_len": 24.3107,
"eval_loss": 1.9323915243148804,
"eval_runtime": 1143.8823,
"eval_samples_per_second": 2.634,
"eval_steps_per_second": 0.659,
"step": 15500
},
{
"epoch": 0.1,
"learning_rate": 4.499320954043923e-05,
"loss": 1.3474,
"step": 16000
},
{
"epoch": 0.1,
"eval_bleu": 37.9665,
"eval_gen_len": 23.8453,
"eval_loss": 1.8365575075149536,
"eval_runtime": 1107.7008,
"eval_samples_per_second": 2.72,
"eval_steps_per_second": 0.681,
"step": 16000
},
{
"epoch": 0.1,
"learning_rate": 4.4836747338577954e-05,
"loss": 1.3492,
"step": 16500
},
{
"epoch": 0.1,
"eval_bleu": 37.6124,
"eval_gen_len": 23.7939,
"eval_loss": 1.8358746767044067,
"eval_runtime": 1110.7352,
"eval_samples_per_second": 2.713,
"eval_steps_per_second": 0.679,
"step": 16500
},
{
"epoch": 0.11,
"learning_rate": 4.468028513671668e-05,
"loss": 1.3347,
"step": 17000
},
{
"epoch": 0.11,
"eval_bleu": 37.7852,
"eval_gen_len": 23.8994,
"eval_loss": 1.7998509407043457,
"eval_runtime": 1113.2105,
"eval_samples_per_second": 2.707,
"eval_steps_per_second": 0.677,
"step": 17000
},
{
"epoch": 0.11,
"learning_rate": 4.45238229348554e-05,
"loss": 1.3361,
"step": 17500
},
{
"epoch": 0.11,
"eval_bleu": 37.3385,
"eval_gen_len": 23.6077,
"eval_loss": 1.849636197090149,
"eval_runtime": 1105.7834,
"eval_samples_per_second": 2.725,
"eval_steps_per_second": 0.682,
"step": 17500
},
{
"epoch": 0.11,
"learning_rate": 4.4367360732994125e-05,
"loss": 1.3204,
"step": 18000
},
{
"epoch": 0.11,
"eval_bleu": 36.8887,
"eval_gen_len": 23.385,
"eval_loss": 1.8504753112792969,
"eval_runtime": 1074.3508,
"eval_samples_per_second": 2.804,
"eval_steps_per_second": 0.702,
"step": 18000
},
{
"epoch": 0.12,
"learning_rate": 4.421089853113285e-05,
"loss": 1.3234,
"step": 18500
},
{
"epoch": 0.12,
"eval_bleu": 36.6707,
"eval_gen_len": 23.2629,
"eval_loss": 1.8434782028198242,
"eval_runtime": 1075.4498,
"eval_samples_per_second": 2.802,
"eval_steps_per_second": 0.701,
"step": 18500
},
{
"epoch": 0.12,
"learning_rate": 4.405443632927158e-05,
"loss": 1.3189,
"step": 19000
},
{
"epoch": 0.12,
"eval_bleu": 36.6831,
"eval_gen_len": 23.6163,
"eval_loss": 1.8203562498092651,
"eval_runtime": 1116.9517,
"eval_samples_per_second": 2.698,
"eval_steps_per_second": 0.675,
"step": 19000
},
{
"epoch": 0.12,
"learning_rate": 4.3897974127410304e-05,
"loss": 1.2984,
"step": 19500
},
{
"epoch": 0.12,
"eval_bleu": 36.9382,
"eval_gen_len": 23.699,
"eval_loss": 1.7791001796722412,
"eval_runtime": 1129.6793,
"eval_samples_per_second": 2.667,
"eval_steps_per_second": 0.667,
"step": 19500
},
{
"epoch": 0.13,
"learning_rate": 4.374151192554903e-05,
"loss": 1.2913,
"step": 20000
},
{
"epoch": 0.13,
"eval_bleu": 38.155,
"eval_gen_len": 24.1394,
"eval_loss": 1.8548424243927002,
"eval_runtime": 1126.2716,
"eval_samples_per_second": 2.675,
"eval_steps_per_second": 0.669,
"step": 20000
},
{
"epoch": 0.13,
"learning_rate": 4.3585049723687756e-05,
"loss": 1.3222,
"step": 20500
},
{
"epoch": 0.13,
"eval_bleu": 37.9634,
"eval_gen_len": 23.9492,
"eval_loss": 1.8165639638900757,
"eval_runtime": 1117.1297,
"eval_samples_per_second": 2.697,
"eval_steps_per_second": 0.675,
"step": 20500
},
{
"epoch": 0.13,
"learning_rate": 4.342858752182648e-05,
"loss": 1.3223,
"step": 21000
},
{
"epoch": 0.13,
"eval_bleu": 37.6942,
"eval_gen_len": 24.0,
"eval_loss": 1.8118115663528442,
"eval_runtime": 1118.3371,
"eval_samples_per_second": 2.694,
"eval_steps_per_second": 0.674,
"step": 21000
},
{
"epoch": 0.13,
"learning_rate": 4.327212531996521e-05,
"loss": 1.3134,
"step": 21500
},
{
"epoch": 0.13,
"eval_bleu": 37.7975,
"eval_gen_len": 23.9004,
"eval_loss": 1.8295111656188965,
"eval_runtime": 1106.5041,
"eval_samples_per_second": 2.723,
"eval_steps_per_second": 0.681,
"step": 21500
},
{
"epoch": 0.14,
"learning_rate": 4.3115663118103935e-05,
"loss": 1.3045,
"step": 22000
},
{
"epoch": 0.14,
"eval_bleu": 37.7737,
"eval_gen_len": 23.9373,
"eval_loss": 1.8213391304016113,
"eval_runtime": 1127.6192,
"eval_samples_per_second": 2.672,
"eval_steps_per_second": 0.669,
"step": 22000
},
{
"epoch": 0.14,
"learning_rate": 4.2959200916242654e-05,
"loss": 1.3111,
"step": 22500
},
{
"epoch": 0.14,
"eval_bleu": 37.8615,
"eval_gen_len": 23.9519,
"eval_loss": 1.8346147537231445,
"eval_runtime": 1143.7572,
"eval_samples_per_second": 2.634,
"eval_steps_per_second": 0.659,
"step": 22500
},
{
"epoch": 0.14,
"learning_rate": 4.280273871438138e-05,
"loss": 1.3346,
"step": 23000
},
{
"epoch": 0.14,
"eval_bleu": 38.1234,
"eval_gen_len": 24.1049,
"eval_loss": 1.7679733037948608,
"eval_runtime": 1113.7045,
"eval_samples_per_second": 2.705,
"eval_steps_per_second": 0.677,
"step": 23000
},
{
"epoch": 0.15,
"learning_rate": 4.264627651252011e-05,
"loss": 1.2851,
"step": 23500
},
{
"epoch": 0.15,
"eval_bleu": 38.1328,
"eval_gen_len": 24.1069,
"eval_loss": 1.801455020904541,
"eval_runtime": 1136.226,
"eval_samples_per_second": 2.652,
"eval_steps_per_second": 0.664,
"step": 23500
},
{
"epoch": 0.15,
"learning_rate": 4.248981431065883e-05,
"loss": 1.2732,
"step": 24000
},
{
"epoch": 0.15,
"eval_bleu": 37.8569,
"eval_gen_len": 23.8579,
"eval_loss": 1.8218289613723755,
"eval_runtime": 1124.2084,
"eval_samples_per_second": 2.68,
"eval_steps_per_second": 0.671,
"step": 24000
},
{
"epoch": 0.15,
"learning_rate": 4.233335210879756e-05,
"loss": 1.2666,
"step": 24500
},
{
"epoch": 0.15,
"eval_bleu": 37.3143,
"eval_gen_len": 24.2327,
"eval_loss": 1.8298362493515015,
"eval_runtime": 1169.4328,
"eval_samples_per_second": 2.576,
"eval_steps_per_second": 0.645,
"step": 24500
},
{
"epoch": 0.16,
"learning_rate": 4.2176889906936285e-05,
"loss": 1.248,
"step": 25000
},
{
"epoch": 0.16,
"eval_bleu": 37.4488,
"eval_gen_len": 24.0846,
"eval_loss": 1.7926667928695679,
"eval_runtime": 1136.492,
"eval_samples_per_second": 2.651,
"eval_steps_per_second": 0.663,
"step": 25000
},
{
"epoch": 0.16,
"learning_rate": 4.202042770507501e-05,
"loss": 1.2586,
"step": 25500
},
{
"epoch": 0.16,
"eval_bleu": 36.9098,
"eval_gen_len": 23.5061,
"eval_loss": 1.8259001970291138,
"eval_runtime": 1094.3744,
"eval_samples_per_second": 2.753,
"eval_steps_per_second": 0.689,
"step": 25500
},
{
"epoch": 0.16,
"learning_rate": 4.186396550321374e-05,
"loss": 1.2305,
"step": 26000
},
{
"epoch": 0.16,
"eval_bleu": 37.6963,
"eval_gen_len": 23.9618,
"eval_loss": 1.7810018062591553,
"eval_runtime": 1112.8793,
"eval_samples_per_second": 2.707,
"eval_steps_per_second": 0.678,
"step": 26000
},
{
"epoch": 0.17,
"learning_rate": 4.1707503301352464e-05,
"loss": 1.2435,
"step": 26500
},
{
"epoch": 0.17,
"eval_bleu": 38.219,
"eval_gen_len": 24.2695,
"eval_loss": 1.8444660902023315,
"eval_runtime": 1136.4697,
"eval_samples_per_second": 2.651,
"eval_steps_per_second": 0.663,
"step": 26500
},
{
"epoch": 0.17,
"learning_rate": 4.155104109949119e-05,
"loss": 1.2681,
"step": 27000
},
{
"epoch": 0.17,
"eval_bleu": 38.0339,
"eval_gen_len": 23.8065,
"eval_loss": 1.8042678833007812,
"eval_runtime": 1110.1648,
"eval_samples_per_second": 2.714,
"eval_steps_per_second": 0.679,
"step": 27000
},
{
"epoch": 0.17,
"learning_rate": 4.139457889762991e-05,
"loss": 1.2581,
"step": 27500
},
{
"epoch": 0.17,
"eval_bleu": 36.7337,
"eval_gen_len": 23.6279,
"eval_loss": 1.7898603677749634,
"eval_runtime": 1101.1993,
"eval_samples_per_second": 2.736,
"eval_steps_per_second": 0.685,
"step": 27500
},
{
"epoch": 0.18,
"learning_rate": 4.1238116695768636e-05,
"loss": 1.2476,
"step": 28000
},
{
"epoch": 0.18,
"eval_bleu": 37.5418,
"eval_gen_len": 23.7527,
"eval_loss": 1.851403832435608,
"eval_runtime": 1100.3622,
"eval_samples_per_second": 2.738,
"eval_steps_per_second": 0.685,
"step": 28000
},
{
"epoch": 0.18,
"learning_rate": 4.108165449390736e-05,
"loss": 1.2778,
"step": 28500
},
{
"epoch": 0.18,
"eval_bleu": 36.9893,
"eval_gen_len": 23.8487,
"eval_loss": 1.7635945081710815,
"eval_runtime": 1113.9066,
"eval_samples_per_second": 2.705,
"eval_steps_per_second": 0.677,
"step": 28500
},
{
"epoch": 0.18,
"learning_rate": 4.092519229204609e-05,
"loss": 1.2335,
"step": 29000
},
{
"epoch": 0.18,
"eval_bleu": 38.4148,
"eval_gen_len": 24.0743,
"eval_loss": 1.7612364292144775,
"eval_runtime": 1118.162,
"eval_samples_per_second": 2.695,
"eval_steps_per_second": 0.674,
"step": 29000
},
{
"epoch": 0.18,
"learning_rate": 4.0768730090184814e-05,
"loss": 1.2229,
"step": 29500
},
{
"epoch": 0.18,
"eval_bleu": 38.387,
"eval_gen_len": 24.1324,
"eval_loss": 1.724638819694519,
"eval_runtime": 1133.5566,
"eval_samples_per_second": 2.658,
"eval_steps_per_second": 0.665,
"step": 29500
},
{
"epoch": 0.19,
"learning_rate": 4.061226788832354e-05,
"loss": 1.198,
"step": 30000
},
{
"epoch": 0.19,
"eval_bleu": 38.2916,
"eval_gen_len": 23.918,
"eval_loss": 1.7657166719436646,
"eval_runtime": 1258.8163,
"eval_samples_per_second": 2.394,
"eval_steps_per_second": 0.599,
"step": 30000
},
{
"epoch": 0.19,
"learning_rate": 4.045580568646227e-05,
"loss": 1.281,
"step": 30500
},
{
"epoch": 0.19,
"eval_bleu": 36.5976,
"eval_gen_len": 23.24,
"eval_loss": 1.758962631225586,
"eval_runtime": 1321.212,
"eval_samples_per_second": 2.28,
"eval_steps_per_second": 0.571,
"step": 30500
},
{
"epoch": 0.19,
"learning_rate": 4.029934348460099e-05,
"loss": 1.231,
"step": 31000
},
{
"epoch": 0.19,
"eval_bleu": 37.8396,
"eval_gen_len": 23.7385,
"eval_loss": 1.733566164970398,
"eval_runtime": 1419.0837,
"eval_samples_per_second": 2.123,
"eval_steps_per_second": 0.531,
"step": 31000
},
{
"epoch": 0.2,
"learning_rate": 4.014288128273972e-05,
"loss": 1.2014,
"step": 31500
},
{
"epoch": 0.2,
"eval_bleu": 37.8945,
"eval_gen_len": 23.8025,
"eval_loss": 1.7839374542236328,
"eval_runtime": 1450.5502,
"eval_samples_per_second": 2.077,
"eval_steps_per_second": 0.52,
"step": 31500
},
{
"epoch": 0.2,
"learning_rate": 3.9986419080878445e-05,
"loss": 1.2295,
"step": 32000
},
{
"epoch": 0.2,
"eval_bleu": 37.9001,
"eval_gen_len": 23.92,
"eval_loss": 1.757741928100586,
"eval_runtime": 1476.1175,
"eval_samples_per_second": 2.041,
"eval_steps_per_second": 0.511,
"step": 32000
},
{
"epoch": 0.2,
"learning_rate": 3.9829956879017165e-05,
"loss": 1.2154,
"step": 32500
},
{
"epoch": 0.2,
"eval_bleu": 37.5283,
"eval_gen_len": 23.537,
"eval_loss": 1.80224609375,
"eval_runtime": 1473.4024,
"eval_samples_per_second": 2.045,
"eval_steps_per_second": 0.512,
"step": 32500
},
{
"epoch": 0.21,
"learning_rate": 3.967349467715589e-05,
"loss": 1.1956,
"step": 33000
},
{
"epoch": 0.21,
"eval_bleu": 38.5709,
"eval_gen_len": 24.0189,
"eval_loss": 1.7815015316009521,
"eval_runtime": 1398.0131,
"eval_samples_per_second": 2.155,
"eval_steps_per_second": 0.539,
"step": 33000
},
{
"epoch": 0.21,
"learning_rate": 3.951703247529462e-05,
"loss": 1.2045,
"step": 33500
},
{
"epoch": 0.21,
"eval_bleu": 37.2706,
"eval_gen_len": 23.2522,
"eval_loss": 1.7592459917068481,
"eval_runtime": 1445.8551,
"eval_samples_per_second": 2.084,
"eval_steps_per_second": 0.521,
"step": 33500
},
{
"epoch": 0.21,
"learning_rate": 3.9360570273433344e-05,
"loss": 1.1796,
"step": 34000
},
{
"epoch": 0.21,
"eval_bleu": 38.5198,
"eval_gen_len": 24.078,
"eval_loss": 1.8075897693634033,
"eval_runtime": 1528.5032,
"eval_samples_per_second": 1.971,
"eval_steps_per_second": 0.493,
"step": 34000
},
{
"epoch": 0.22,
"learning_rate": 3.920410807157207e-05,
"loss": 1.226,
"step": 34500
},
{
"epoch": 0.22,
"eval_bleu": 38.1729,
"eval_gen_len": 23.9038,
"eval_loss": 1.731508493423462,
"eval_runtime": 1501.9126,
"eval_samples_per_second": 2.006,
"eval_steps_per_second": 0.502,
"step": 34500
},
{
"epoch": 0.22,
"learning_rate": 3.9047645869710796e-05,
"loss": 1.2304,
"step": 35000
},
{
"epoch": 0.22,
"eval_bleu": 37.8875,
"eval_gen_len": 23.8493,
"eval_loss": 1.7690067291259766,
"eval_runtime": 1584.8349,
"eval_samples_per_second": 1.901,
"eval_steps_per_second": 0.476,
"step": 35000
},
{
"epoch": 0.22,
"learning_rate": 3.889118366784952e-05,
"loss": 1.197,
"step": 35500
},
{
"epoch": 0.22,
"eval_bleu": 37.7553,
"eval_gen_len": 23.9353,
"eval_loss": 1.7940903902053833,
"eval_runtime": 1636.4218,
"eval_samples_per_second": 1.841,
"eval_steps_per_second": 0.461,
"step": 35500
},
{
"epoch": 0.23,
"learning_rate": 3.873472146598825e-05,
"loss": 1.1844,
"step": 36000
},
{
"epoch": 0.23,
"eval_bleu": 38.3847,
"eval_gen_len": 23.9668,
"eval_loss": 1.7370097637176514,
"eval_runtime": 1586.621,
"eval_samples_per_second": 1.899,
"eval_steps_per_second": 0.475,
"step": 36000
},
{
"epoch": 0.23,
"learning_rate": 3.8578259264126975e-05,
"loss": 1.2099,
"step": 36500
},
{
"epoch": 0.23,
"eval_bleu": 38.4795,
"eval_gen_len": 23.932,
"eval_loss": 1.7586994171142578,
"eval_runtime": 1530.6865,
"eval_samples_per_second": 1.968,
"eval_steps_per_second": 0.493,
"step": 36500
},
{
"epoch": 0.23,
"learning_rate": 3.84217970622657e-05,
"loss": 1.1798,
"step": 37000
},
{
"epoch": 0.23,
"eval_bleu": 37.463,
"eval_gen_len": 23.6664,
"eval_loss": 1.752753496170044,
"eval_runtime": 1408.4249,
"eval_samples_per_second": 2.139,
"eval_steps_per_second": 0.535,
"step": 37000
},
{
"epoch": 0.23,
"learning_rate": 3.826533486040442e-05,
"loss": 1.1959,
"step": 37500
},
{
"epoch": 0.23,
"eval_bleu": 38.1287,
"eval_gen_len": 23.7693,
"eval_loss": 1.7170414924621582,
"eval_runtime": 1482.0293,
"eval_samples_per_second": 2.033,
"eval_steps_per_second": 0.509,
"step": 37500
},
{
"epoch": 0.24,
"learning_rate": 3.8108872658543146e-05,
"loss": 1.2061,
"step": 38000
},
{
"epoch": 0.24,
"eval_bleu": 38.8039,
"eval_gen_len": 23.9973,
"eval_loss": 1.731540322303772,
"eval_runtime": 1484.0478,
"eval_samples_per_second": 2.03,
"eval_steps_per_second": 0.508,
"step": 38000
},
{
"epoch": 0.24,
"learning_rate": 3.795241045668187e-05,
"loss": 1.1606,
"step": 38500
},
{
"epoch": 0.24,
"eval_bleu": 37.93,
"eval_gen_len": 23.8347,
"eval_loss": 1.7204748392105103,
"eval_runtime": 1527.1349,
"eval_samples_per_second": 1.973,
"eval_steps_per_second": 0.494,
"step": 38500
},
{
"epoch": 0.24,
"learning_rate": 3.77959482548206e-05,
"loss": 1.1605,
"step": 39000
},
{
"epoch": 0.24,
"eval_bleu": 38.2637,
"eval_gen_len": 24.2688,
"eval_loss": 1.7610867023468018,
"eval_runtime": 1550.9436,
"eval_samples_per_second": 1.943,
"eval_steps_per_second": 0.486,
"step": 39000
},
{
"epoch": 0.25,
"learning_rate": 3.7639486052959325e-05,
"loss": 1.1792,
"step": 39500
},
{
"epoch": 0.25,
"eval_bleu": 38.3329,
"eval_gen_len": 24.088,
"eval_loss": 1.728115200996399,
"eval_runtime": 1598.5717,
"eval_samples_per_second": 1.885,
"eval_steps_per_second": 0.472,
"step": 39500
},
{
"epoch": 0.25,
"learning_rate": 3.748302385109805e-05,
"loss": 1.1613,
"step": 40000
},
{
"epoch": 0.25,
"eval_bleu": 38.7357,
"eval_gen_len": 24.1019,
"eval_loss": 1.7274950742721558,
"eval_runtime": 1552.6289,
"eval_samples_per_second": 1.941,
"eval_steps_per_second": 0.486,
"step": 40000
}
],
"max_steps": 159783,
"num_train_epochs": 1,
"total_flos": 6683673699483648.0,
"trial_name": null,
"trial_params": null
}