{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.25033952297803896, "global_step": 40000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.984353779813873e-05, "loss": 2.5206, "step": 500 }, { "epoch": 0.0, "eval_bleu": 38.3843, "eval_gen_len": 23.8603, "eval_loss": 1.8556830883026123, "eval_runtime": 1197.8495, "eval_samples_per_second": 2.515, "eval_steps_per_second": 0.629, "step": 500 }, { "epoch": 0.01, "learning_rate": 4.9687075596277455e-05, "loss": 1.9572, "step": 1000 }, { "epoch": 0.01, "eval_bleu": 22.3733, "eval_gen_len": 18.9091, "eval_loss": 2.9118094444274902, "eval_runtime": 1006.4293, "eval_samples_per_second": 2.994, "eval_steps_per_second": 0.749, "step": 1000 }, { "epoch": 0.01, "learning_rate": 4.953061339441618e-05, "loss": 1.7653, "step": 1500 }, { "epoch": 0.01, "eval_bleu": 38.119, "eval_gen_len": 23.6728, "eval_loss": 1.9158709049224854, "eval_runtime": 1104.4939, "eval_samples_per_second": 2.728, "eval_steps_per_second": 0.683, "step": 1500 }, { "epoch": 0.01, "learning_rate": 4.937415119255491e-05, "loss": 1.715, "step": 2000 }, { "epoch": 0.01, "eval_bleu": 37.8451, "eval_gen_len": 23.7046, "eval_loss": 1.9664654731750488, "eval_runtime": 1119.4046, "eval_samples_per_second": 2.692, "eval_steps_per_second": 0.674, "step": 2000 }, { "epoch": 0.02, "learning_rate": 4.921768899069363e-05, "loss": 1.7164, "step": 2500 }, { "epoch": 0.02, "eval_bleu": 36.315, "eval_gen_len": 23.1172, "eval_loss": 1.8593918085098267, "eval_runtime": 1061.9031, "eval_samples_per_second": 2.837, "eval_steps_per_second": 0.71, "step": 2500 }, { "epoch": 0.02, "learning_rate": 4.906122678883235e-05, "loss": 1.616, "step": 3000 }, { "epoch": 0.02, "eval_bleu": 37.6941, "eval_gen_len": 23.617, "eval_loss": 1.816091537475586, "eval_runtime": 1096.7235, "eval_samples_per_second": 2.747, "eval_steps_per_second": 0.688, "step": 3000 }, { "epoch": 0.02, "learning_rate": 4.890476458697108e-05, "loss": 1.6131, "step": 3500 }, { "epoch": 0.02, "eval_bleu": 34.7085, "eval_gen_len": 22.4826, "eval_loss": 1.9029065370559692, "eval_runtime": 1058.2776, "eval_samples_per_second": 2.847, "eval_steps_per_second": 0.712, "step": 3500 }, { "epoch": 0.03, "learning_rate": 4.8748302385109805e-05, "loss": 1.5981, "step": 4000 }, { "epoch": 0.03, "eval_bleu": 37.0045, "eval_gen_len": 24.0611, "eval_loss": 1.9932270050048828, "eval_runtime": 1166.939, "eval_samples_per_second": 2.582, "eval_steps_per_second": 0.646, "step": 4000 }, { "epoch": 0.03, "learning_rate": 4.859184018324853e-05, "loss": 1.5898, "step": 4500 }, { "epoch": 0.03, "eval_bleu": 38.2992, "eval_gen_len": 24.2121, "eval_loss": 1.874466896057129, "eval_runtime": 1133.0065, "eval_samples_per_second": 2.659, "eval_steps_per_second": 0.665, "step": 4500 }, { "epoch": 0.03, "learning_rate": 4.843537798138726e-05, "loss": 1.5582, "step": 5000 }, { "epoch": 0.03, "eval_bleu": 36.9851, "eval_gen_len": 23.4202, "eval_loss": 1.8227670192718506, "eval_runtime": 1093.0431, "eval_samples_per_second": 2.757, "eval_steps_per_second": 0.69, "step": 5000 }, { "epoch": 0.03, "learning_rate": 4.8278915779525984e-05, "loss": 1.7726, "step": 5500 }, { "epoch": 0.03, "eval_bleu": 32.3628, "eval_gen_len": 22.3083, "eval_loss": 2.096940755844116, "eval_runtime": 1059.9352, "eval_samples_per_second": 2.843, "eval_steps_per_second": 0.711, "step": 5500 }, { "epoch": 0.04, "learning_rate": 4.812245357766471e-05, "loss": 1.7102, "step": 6000 }, { "epoch": 0.04, "eval_bleu": 36.1748, "eval_gen_len": 24.1942, "eval_loss": 2.018200397491455, "eval_runtime": 1129.5162, "eval_samples_per_second": 2.668, "eval_steps_per_second": 0.668, "step": 6000 }, { "epoch": 0.04, "learning_rate": 4.7965991375803436e-05, "loss": 1.521, "step": 6500 }, { "epoch": 0.04, "eval_bleu": 32.2516, "eval_gen_len": 28.4076, "eval_loss": 2.057112693786621, "eval_runtime": 1610.6798, "eval_samples_per_second": 1.871, "eval_steps_per_second": 0.468, "step": 6500 }, { "epoch": 0.04, "learning_rate": 4.780952917394216e-05, "loss": 1.6184, "step": 7000 }, { "epoch": 0.04, "eval_bleu": 37.8637, "eval_gen_len": 23.6844, "eval_loss": 1.8763169050216675, "eval_runtime": 1099.6674, "eval_samples_per_second": 2.74, "eval_steps_per_second": 0.686, "step": 7000 }, { "epoch": 0.05, "learning_rate": 4.765306697208088e-05, "loss": 1.5384, "step": 7500 }, { "epoch": 0.05, "eval_bleu": 37.7753, "eval_gen_len": 23.8984, "eval_loss": 1.8554104566574097, "eval_runtime": 1111.1157, "eval_samples_per_second": 2.712, "eval_steps_per_second": 0.679, "step": 7500 }, { "epoch": 0.05, "learning_rate": 4.749660477021961e-05, "loss": 1.5485, "step": 8000 }, { "epoch": 0.05, "eval_bleu": 37.1929, "eval_gen_len": 24.0408, "eval_loss": 1.9456806182861328, "eval_runtime": 1136.0111, "eval_samples_per_second": 2.652, "eval_steps_per_second": 0.664, "step": 8000 }, { "epoch": 0.05, "learning_rate": 4.7340142568358334e-05, "loss": 1.4872, "step": 8500 }, { "epoch": 0.05, "eval_bleu": 35.4276, "eval_gen_len": 23.2008, "eval_loss": 1.8643290996551514, "eval_runtime": 1078.1924, "eval_samples_per_second": 2.794, "eval_steps_per_second": 0.699, "step": 8500 }, { "epoch": 0.06, "learning_rate": 4.718368036649706e-05, "loss": 1.4412, "step": 9000 }, { "epoch": 0.06, "eval_bleu": 35.7002, "eval_gen_len": 23.613, "eval_loss": 1.910874366760254, "eval_runtime": 1134.7549, "eval_samples_per_second": 2.655, "eval_steps_per_second": 0.664, "step": 9000 }, { "epoch": 0.06, "learning_rate": 4.702721816463579e-05, "loss": 1.4483, "step": 9500 }, { "epoch": 0.06, "eval_bleu": 37.6828, "eval_gen_len": 24.0441, "eval_loss": 1.8838558197021484, "eval_runtime": 1126.6767, "eval_samples_per_second": 2.674, "eval_steps_per_second": 0.669, "step": 9500 }, { "epoch": 0.06, "learning_rate": 4.687075596277451e-05, "loss": 1.4523, "step": 10000 }, { "epoch": 0.06, "eval_bleu": 36.2473, "eval_gen_len": 23.3561, "eval_loss": 1.940147042274475, "eval_runtime": 1087.7551, "eval_samples_per_second": 2.77, "eval_steps_per_second": 0.693, "step": 10000 }, { "epoch": 0.07, "learning_rate": 4.671429376091324e-05, "loss": 1.4405, "step": 10500 }, { "epoch": 0.07, "eval_bleu": 35.6564, "eval_gen_len": 22.9851, "eval_loss": 1.8357064723968506, "eval_runtime": 1082.157, "eval_samples_per_second": 2.784, "eval_steps_per_second": 0.697, "step": 10500 }, { "epoch": 0.07, "learning_rate": 4.6557831559051965e-05, "loss": 1.3966, "step": 11000 }, { "epoch": 0.07, "eval_bleu": 37.8601, "eval_gen_len": 24.1235, "eval_loss": 1.8839383125305176, "eval_runtime": 1137.6484, "eval_samples_per_second": 2.648, "eval_steps_per_second": 0.663, "step": 11000 }, { "epoch": 0.07, "learning_rate": 4.640136935719069e-05, "loss": 1.415, "step": 11500 }, { "epoch": 0.07, "eval_bleu": 35.9576, "eval_gen_len": 23.4434, "eval_loss": 1.868781566619873, "eval_runtime": 1187.6496, "eval_samples_per_second": 2.537, "eval_steps_per_second": 0.635, "step": 11500 }, { "epoch": 0.08, "learning_rate": 4.624490715532942e-05, "loss": 1.4306, "step": 12000 }, { "epoch": 0.08, "eval_bleu": 37.8482, "eval_gen_len": 24.2154, "eval_loss": 1.8083645105361938, "eval_runtime": 1247.7304, "eval_samples_per_second": 2.415, "eval_steps_per_second": 0.604, "step": 12000 }, { "epoch": 0.08, "learning_rate": 4.6088444953468144e-05, "loss": 1.3337, "step": 12500 }, { "epoch": 0.08, "eval_bleu": 37.4047, "eval_gen_len": 23.692, "eval_loss": 1.798228144645691, "eval_runtime": 1200.6209, "eval_samples_per_second": 2.51, "eval_steps_per_second": 0.628, "step": 12500 }, { "epoch": 0.08, "learning_rate": 4.593198275160687e-05, "loss": 1.4188, "step": 13000 }, { "epoch": 0.08, "eval_bleu": 36.76, "eval_gen_len": 23.5446, "eval_loss": 1.8160576820373535, "eval_runtime": 1178.8918, "eval_samples_per_second": 2.556, "eval_steps_per_second": 0.64, "step": 13000 }, { "epoch": 0.08, "learning_rate": 4.5775520549745596e-05, "loss": 1.3576, "step": 13500 }, { "epoch": 0.08, "eval_bleu": 37.1149, "eval_gen_len": 23.6963, "eval_loss": 1.8919538259506226, "eval_runtime": 1218.0527, "eval_samples_per_second": 2.474, "eval_steps_per_second": 0.619, "step": 13500 }, { "epoch": 0.09, "learning_rate": 4.561905834788432e-05, "loss": 1.4464, "step": 14000 }, { "epoch": 0.09, "eval_bleu": 38.2523, "eval_gen_len": 24.2449, "eval_loss": 1.8881698846817017, "eval_runtime": 1185.5541, "eval_samples_per_second": 2.541, "eval_steps_per_second": 0.636, "step": 14000 }, { "epoch": 0.09, "learning_rate": 4.546259614602305e-05, "loss": 1.3548, "step": 14500 }, { "epoch": 0.09, "eval_bleu": 37.0608, "eval_gen_len": 23.6077, "eval_loss": 1.8448089361190796, "eval_runtime": 1093.1892, "eval_samples_per_second": 2.756, "eval_steps_per_second": 0.69, "step": 14500 }, { "epoch": 0.09, "learning_rate": 4.5306133944161775e-05, "loss": 1.3779, "step": 15000 }, { "epoch": 0.09, "eval_bleu": 37.2558, "eval_gen_len": 23.5888, "eval_loss": 1.868304967880249, "eval_runtime": 1104.5613, "eval_samples_per_second": 2.728, "eval_steps_per_second": 0.683, "step": 15000 }, { "epoch": 0.1, "learning_rate": 4.51496717423005e-05, "loss": 1.3565, "step": 15500 }, { "epoch": 0.1, "eval_bleu": 38.3579, "eval_gen_len": 24.3107, "eval_loss": 1.9323915243148804, "eval_runtime": 1143.8823, "eval_samples_per_second": 2.634, "eval_steps_per_second": 0.659, "step": 15500 }, { "epoch": 0.1, "learning_rate": 4.499320954043923e-05, "loss": 1.3474, "step": 16000 }, { "epoch": 0.1, "eval_bleu": 37.9665, "eval_gen_len": 23.8453, "eval_loss": 1.8365575075149536, "eval_runtime": 1107.7008, "eval_samples_per_second": 2.72, "eval_steps_per_second": 0.681, "step": 16000 }, { "epoch": 0.1, "learning_rate": 4.4836747338577954e-05, "loss": 1.3492, "step": 16500 }, { "epoch": 0.1, "eval_bleu": 37.6124, "eval_gen_len": 23.7939, "eval_loss": 1.8358746767044067, "eval_runtime": 1110.7352, "eval_samples_per_second": 2.713, "eval_steps_per_second": 0.679, "step": 16500 }, { "epoch": 0.11, "learning_rate": 4.468028513671668e-05, "loss": 1.3347, "step": 17000 }, { "epoch": 0.11, "eval_bleu": 37.7852, "eval_gen_len": 23.8994, "eval_loss": 1.7998509407043457, "eval_runtime": 1113.2105, "eval_samples_per_second": 2.707, "eval_steps_per_second": 0.677, "step": 17000 }, { "epoch": 0.11, "learning_rate": 4.45238229348554e-05, "loss": 1.3361, "step": 17500 }, { "epoch": 0.11, "eval_bleu": 37.3385, "eval_gen_len": 23.6077, "eval_loss": 1.849636197090149, "eval_runtime": 1105.7834, "eval_samples_per_second": 2.725, "eval_steps_per_second": 0.682, "step": 17500 }, { "epoch": 0.11, "learning_rate": 4.4367360732994125e-05, "loss": 1.3204, "step": 18000 }, { "epoch": 0.11, "eval_bleu": 36.8887, "eval_gen_len": 23.385, "eval_loss": 1.8504753112792969, "eval_runtime": 1074.3508, "eval_samples_per_second": 2.804, "eval_steps_per_second": 0.702, "step": 18000 }, { "epoch": 0.12, "learning_rate": 4.421089853113285e-05, "loss": 1.3234, "step": 18500 }, { "epoch": 0.12, "eval_bleu": 36.6707, "eval_gen_len": 23.2629, "eval_loss": 1.8434782028198242, "eval_runtime": 1075.4498, "eval_samples_per_second": 2.802, "eval_steps_per_second": 0.701, "step": 18500 }, { "epoch": 0.12, "learning_rate": 4.405443632927158e-05, "loss": 1.3189, "step": 19000 }, { "epoch": 0.12, "eval_bleu": 36.6831, "eval_gen_len": 23.6163, "eval_loss": 1.8203562498092651, "eval_runtime": 1116.9517, "eval_samples_per_second": 2.698, "eval_steps_per_second": 0.675, "step": 19000 }, { "epoch": 0.12, "learning_rate": 4.3897974127410304e-05, "loss": 1.2984, "step": 19500 }, { "epoch": 0.12, "eval_bleu": 36.9382, "eval_gen_len": 23.699, "eval_loss": 1.7791001796722412, "eval_runtime": 1129.6793, "eval_samples_per_second": 2.667, "eval_steps_per_second": 0.667, "step": 19500 }, { "epoch": 0.13, "learning_rate": 4.374151192554903e-05, "loss": 1.2913, "step": 20000 }, { "epoch": 0.13, "eval_bleu": 38.155, "eval_gen_len": 24.1394, "eval_loss": 1.8548424243927002, "eval_runtime": 1126.2716, "eval_samples_per_second": 2.675, "eval_steps_per_second": 0.669, "step": 20000 }, { "epoch": 0.13, "learning_rate": 4.3585049723687756e-05, "loss": 1.3222, "step": 20500 }, { "epoch": 0.13, "eval_bleu": 37.9634, "eval_gen_len": 23.9492, "eval_loss": 1.8165639638900757, "eval_runtime": 1117.1297, "eval_samples_per_second": 2.697, "eval_steps_per_second": 0.675, "step": 20500 }, { "epoch": 0.13, "learning_rate": 4.342858752182648e-05, "loss": 1.3223, "step": 21000 }, { "epoch": 0.13, "eval_bleu": 37.6942, "eval_gen_len": 24.0, "eval_loss": 1.8118115663528442, "eval_runtime": 1118.3371, "eval_samples_per_second": 2.694, "eval_steps_per_second": 0.674, "step": 21000 }, { "epoch": 0.13, "learning_rate": 4.327212531996521e-05, "loss": 1.3134, "step": 21500 }, { "epoch": 0.13, "eval_bleu": 37.7975, "eval_gen_len": 23.9004, "eval_loss": 1.8295111656188965, "eval_runtime": 1106.5041, "eval_samples_per_second": 2.723, "eval_steps_per_second": 0.681, "step": 21500 }, { "epoch": 0.14, "learning_rate": 4.3115663118103935e-05, "loss": 1.3045, "step": 22000 }, { "epoch": 0.14, "eval_bleu": 37.7737, "eval_gen_len": 23.9373, "eval_loss": 1.8213391304016113, "eval_runtime": 1127.6192, "eval_samples_per_second": 2.672, "eval_steps_per_second": 0.669, "step": 22000 }, { "epoch": 0.14, "learning_rate": 4.2959200916242654e-05, "loss": 1.3111, "step": 22500 }, { "epoch": 0.14, "eval_bleu": 37.8615, "eval_gen_len": 23.9519, "eval_loss": 1.8346147537231445, "eval_runtime": 1143.7572, "eval_samples_per_second": 2.634, "eval_steps_per_second": 0.659, "step": 22500 }, { "epoch": 0.14, "learning_rate": 4.280273871438138e-05, "loss": 1.3346, "step": 23000 }, { "epoch": 0.14, "eval_bleu": 38.1234, "eval_gen_len": 24.1049, "eval_loss": 1.7679733037948608, "eval_runtime": 1113.7045, "eval_samples_per_second": 2.705, "eval_steps_per_second": 0.677, "step": 23000 }, { "epoch": 0.15, "learning_rate": 4.264627651252011e-05, "loss": 1.2851, "step": 23500 }, { "epoch": 0.15, "eval_bleu": 38.1328, "eval_gen_len": 24.1069, "eval_loss": 1.801455020904541, "eval_runtime": 1136.226, "eval_samples_per_second": 2.652, "eval_steps_per_second": 0.664, "step": 23500 }, { "epoch": 0.15, "learning_rate": 4.248981431065883e-05, "loss": 1.2732, "step": 24000 }, { "epoch": 0.15, "eval_bleu": 37.8569, "eval_gen_len": 23.8579, "eval_loss": 1.8218289613723755, "eval_runtime": 1124.2084, "eval_samples_per_second": 2.68, "eval_steps_per_second": 0.671, "step": 24000 }, { "epoch": 0.15, "learning_rate": 4.233335210879756e-05, "loss": 1.2666, "step": 24500 }, { "epoch": 0.15, "eval_bleu": 37.3143, "eval_gen_len": 24.2327, "eval_loss": 1.8298362493515015, "eval_runtime": 1169.4328, "eval_samples_per_second": 2.576, "eval_steps_per_second": 0.645, "step": 24500 }, { "epoch": 0.16, "learning_rate": 4.2176889906936285e-05, "loss": 1.248, "step": 25000 }, { "epoch": 0.16, "eval_bleu": 37.4488, "eval_gen_len": 24.0846, "eval_loss": 1.7926667928695679, "eval_runtime": 1136.492, "eval_samples_per_second": 2.651, "eval_steps_per_second": 0.663, "step": 25000 }, { "epoch": 0.16, "learning_rate": 4.202042770507501e-05, "loss": 1.2586, "step": 25500 }, { "epoch": 0.16, "eval_bleu": 36.9098, "eval_gen_len": 23.5061, "eval_loss": 1.8259001970291138, "eval_runtime": 1094.3744, "eval_samples_per_second": 2.753, "eval_steps_per_second": 0.689, "step": 25500 }, { "epoch": 0.16, "learning_rate": 4.186396550321374e-05, "loss": 1.2305, "step": 26000 }, { "epoch": 0.16, "eval_bleu": 37.6963, "eval_gen_len": 23.9618, "eval_loss": 1.7810018062591553, "eval_runtime": 1112.8793, "eval_samples_per_second": 2.707, "eval_steps_per_second": 0.678, "step": 26000 }, { "epoch": 0.17, "learning_rate": 4.1707503301352464e-05, "loss": 1.2435, "step": 26500 }, { "epoch": 0.17, "eval_bleu": 38.219, "eval_gen_len": 24.2695, "eval_loss": 1.8444660902023315, "eval_runtime": 1136.4697, "eval_samples_per_second": 2.651, "eval_steps_per_second": 0.663, "step": 26500 }, { "epoch": 0.17, "learning_rate": 4.155104109949119e-05, "loss": 1.2681, "step": 27000 }, { "epoch": 0.17, "eval_bleu": 38.0339, "eval_gen_len": 23.8065, "eval_loss": 1.8042678833007812, "eval_runtime": 1110.1648, "eval_samples_per_second": 2.714, "eval_steps_per_second": 0.679, "step": 27000 }, { "epoch": 0.17, "learning_rate": 4.139457889762991e-05, "loss": 1.2581, "step": 27500 }, { "epoch": 0.17, "eval_bleu": 36.7337, "eval_gen_len": 23.6279, "eval_loss": 1.7898603677749634, "eval_runtime": 1101.1993, "eval_samples_per_second": 2.736, "eval_steps_per_second": 0.685, "step": 27500 }, { "epoch": 0.18, "learning_rate": 4.1238116695768636e-05, "loss": 1.2476, "step": 28000 }, { "epoch": 0.18, "eval_bleu": 37.5418, "eval_gen_len": 23.7527, "eval_loss": 1.851403832435608, "eval_runtime": 1100.3622, "eval_samples_per_second": 2.738, "eval_steps_per_second": 0.685, "step": 28000 }, { "epoch": 0.18, "learning_rate": 4.108165449390736e-05, "loss": 1.2778, "step": 28500 }, { "epoch": 0.18, "eval_bleu": 36.9893, "eval_gen_len": 23.8487, "eval_loss": 1.7635945081710815, "eval_runtime": 1113.9066, "eval_samples_per_second": 2.705, "eval_steps_per_second": 0.677, "step": 28500 }, { "epoch": 0.18, "learning_rate": 4.092519229204609e-05, "loss": 1.2335, "step": 29000 }, { "epoch": 0.18, "eval_bleu": 38.4148, "eval_gen_len": 24.0743, "eval_loss": 1.7612364292144775, "eval_runtime": 1118.162, "eval_samples_per_second": 2.695, "eval_steps_per_second": 0.674, "step": 29000 }, { "epoch": 0.18, "learning_rate": 4.0768730090184814e-05, "loss": 1.2229, "step": 29500 }, { "epoch": 0.18, "eval_bleu": 38.387, "eval_gen_len": 24.1324, "eval_loss": 1.724638819694519, "eval_runtime": 1133.5566, "eval_samples_per_second": 2.658, "eval_steps_per_second": 0.665, "step": 29500 }, { "epoch": 0.19, "learning_rate": 4.061226788832354e-05, "loss": 1.198, "step": 30000 }, { "epoch": 0.19, "eval_bleu": 38.2916, "eval_gen_len": 23.918, "eval_loss": 1.7657166719436646, "eval_runtime": 1258.8163, "eval_samples_per_second": 2.394, "eval_steps_per_second": 0.599, "step": 30000 }, { "epoch": 0.19, "learning_rate": 4.045580568646227e-05, "loss": 1.281, "step": 30500 }, { "epoch": 0.19, "eval_bleu": 36.5976, "eval_gen_len": 23.24, "eval_loss": 1.758962631225586, "eval_runtime": 1321.212, "eval_samples_per_second": 2.28, "eval_steps_per_second": 0.571, "step": 30500 }, { "epoch": 0.19, "learning_rate": 4.029934348460099e-05, "loss": 1.231, "step": 31000 }, { "epoch": 0.19, "eval_bleu": 37.8396, "eval_gen_len": 23.7385, "eval_loss": 1.733566164970398, "eval_runtime": 1419.0837, "eval_samples_per_second": 2.123, "eval_steps_per_second": 0.531, "step": 31000 }, { "epoch": 0.2, "learning_rate": 4.014288128273972e-05, "loss": 1.2014, "step": 31500 }, { "epoch": 0.2, "eval_bleu": 37.8945, "eval_gen_len": 23.8025, "eval_loss": 1.7839374542236328, "eval_runtime": 1450.5502, "eval_samples_per_second": 2.077, "eval_steps_per_second": 0.52, "step": 31500 }, { "epoch": 0.2, "learning_rate": 3.9986419080878445e-05, "loss": 1.2295, "step": 32000 }, { "epoch": 0.2, "eval_bleu": 37.9001, "eval_gen_len": 23.92, "eval_loss": 1.757741928100586, "eval_runtime": 1476.1175, "eval_samples_per_second": 2.041, "eval_steps_per_second": 0.511, "step": 32000 }, { "epoch": 0.2, "learning_rate": 3.9829956879017165e-05, "loss": 1.2154, "step": 32500 }, { "epoch": 0.2, "eval_bleu": 37.5283, "eval_gen_len": 23.537, "eval_loss": 1.80224609375, "eval_runtime": 1473.4024, "eval_samples_per_second": 2.045, "eval_steps_per_second": 0.512, "step": 32500 }, { "epoch": 0.21, "learning_rate": 3.967349467715589e-05, "loss": 1.1956, "step": 33000 }, { "epoch": 0.21, "eval_bleu": 38.5709, "eval_gen_len": 24.0189, "eval_loss": 1.7815015316009521, "eval_runtime": 1398.0131, "eval_samples_per_second": 2.155, "eval_steps_per_second": 0.539, "step": 33000 }, { "epoch": 0.21, "learning_rate": 3.951703247529462e-05, "loss": 1.2045, "step": 33500 }, { "epoch": 0.21, "eval_bleu": 37.2706, "eval_gen_len": 23.2522, "eval_loss": 1.7592459917068481, "eval_runtime": 1445.8551, "eval_samples_per_second": 2.084, "eval_steps_per_second": 0.521, "step": 33500 }, { "epoch": 0.21, "learning_rate": 3.9360570273433344e-05, "loss": 1.1796, "step": 34000 }, { "epoch": 0.21, "eval_bleu": 38.5198, "eval_gen_len": 24.078, "eval_loss": 1.8075897693634033, "eval_runtime": 1528.5032, "eval_samples_per_second": 1.971, "eval_steps_per_second": 0.493, "step": 34000 }, { "epoch": 0.22, "learning_rate": 3.920410807157207e-05, "loss": 1.226, "step": 34500 }, { "epoch": 0.22, "eval_bleu": 38.1729, "eval_gen_len": 23.9038, "eval_loss": 1.731508493423462, "eval_runtime": 1501.9126, "eval_samples_per_second": 2.006, "eval_steps_per_second": 0.502, "step": 34500 }, { "epoch": 0.22, "learning_rate": 3.9047645869710796e-05, "loss": 1.2304, "step": 35000 }, { "epoch": 0.22, "eval_bleu": 37.8875, "eval_gen_len": 23.8493, "eval_loss": 1.7690067291259766, "eval_runtime": 1584.8349, "eval_samples_per_second": 1.901, "eval_steps_per_second": 0.476, "step": 35000 }, { "epoch": 0.22, "learning_rate": 3.889118366784952e-05, "loss": 1.197, "step": 35500 }, { "epoch": 0.22, "eval_bleu": 37.7553, "eval_gen_len": 23.9353, "eval_loss": 1.7940903902053833, "eval_runtime": 1636.4218, "eval_samples_per_second": 1.841, "eval_steps_per_second": 0.461, "step": 35500 }, { "epoch": 0.23, "learning_rate": 3.873472146598825e-05, "loss": 1.1844, "step": 36000 }, { "epoch": 0.23, "eval_bleu": 38.3847, "eval_gen_len": 23.9668, "eval_loss": 1.7370097637176514, "eval_runtime": 1586.621, "eval_samples_per_second": 1.899, "eval_steps_per_second": 0.475, "step": 36000 }, { "epoch": 0.23, "learning_rate": 3.8578259264126975e-05, "loss": 1.2099, "step": 36500 }, { "epoch": 0.23, "eval_bleu": 38.4795, "eval_gen_len": 23.932, "eval_loss": 1.7586994171142578, "eval_runtime": 1530.6865, "eval_samples_per_second": 1.968, "eval_steps_per_second": 0.493, "step": 36500 }, { "epoch": 0.23, "learning_rate": 3.84217970622657e-05, "loss": 1.1798, "step": 37000 }, { "epoch": 0.23, "eval_bleu": 37.463, "eval_gen_len": 23.6664, "eval_loss": 1.752753496170044, "eval_runtime": 1408.4249, "eval_samples_per_second": 2.139, "eval_steps_per_second": 0.535, "step": 37000 }, { "epoch": 0.23, "learning_rate": 3.826533486040442e-05, "loss": 1.1959, "step": 37500 }, { "epoch": 0.23, "eval_bleu": 38.1287, "eval_gen_len": 23.7693, "eval_loss": 1.7170414924621582, "eval_runtime": 1482.0293, "eval_samples_per_second": 2.033, "eval_steps_per_second": 0.509, "step": 37500 }, { "epoch": 0.24, "learning_rate": 3.8108872658543146e-05, "loss": 1.2061, "step": 38000 }, { "epoch": 0.24, "eval_bleu": 38.8039, "eval_gen_len": 23.9973, "eval_loss": 1.731540322303772, "eval_runtime": 1484.0478, "eval_samples_per_second": 2.03, "eval_steps_per_second": 0.508, "step": 38000 }, { "epoch": 0.24, "learning_rate": 3.795241045668187e-05, "loss": 1.1606, "step": 38500 }, { "epoch": 0.24, "eval_bleu": 37.93, "eval_gen_len": 23.8347, "eval_loss": 1.7204748392105103, "eval_runtime": 1527.1349, "eval_samples_per_second": 1.973, "eval_steps_per_second": 0.494, "step": 38500 }, { "epoch": 0.24, "learning_rate": 3.77959482548206e-05, "loss": 1.1605, "step": 39000 }, { "epoch": 0.24, "eval_bleu": 38.2637, "eval_gen_len": 24.2688, "eval_loss": 1.7610867023468018, "eval_runtime": 1550.9436, "eval_samples_per_second": 1.943, "eval_steps_per_second": 0.486, "step": 39000 }, { "epoch": 0.25, "learning_rate": 3.7639486052959325e-05, "loss": 1.1792, "step": 39500 }, { "epoch": 0.25, "eval_bleu": 38.3329, "eval_gen_len": 24.088, "eval_loss": 1.728115200996399, "eval_runtime": 1598.5717, "eval_samples_per_second": 1.885, "eval_steps_per_second": 0.472, "step": 39500 }, { "epoch": 0.25, "learning_rate": 3.748302385109805e-05, "loss": 1.1613, "step": 40000 }, { "epoch": 0.25, "eval_bleu": 38.7357, "eval_gen_len": 24.1019, "eval_loss": 1.7274950742721558, "eval_runtime": 1552.6289, "eval_samples_per_second": 1.941, "eval_steps_per_second": 0.486, "step": 40000 } ], "max_steps": 159783, "num_train_epochs": 1, "total_flos": 6683673699483648.0, "trial_name": null, "trial_params": null }