{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "global_step": 59688, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 0.00019832462136442838, "loss": 38.657, "step": 500 }, { "epoch": 0.05, "learning_rate": 0.00019664924272885673, "loss": 31.308, "step": 1000 }, { "epoch": 0.05, "eval_loss": 31.739517211914062, "eval_runtime": 0.2575, "eval_samples_per_second": 194.153, "eval_steps_per_second": 7.766, "step": 1000 }, { "epoch": 0.08, "learning_rate": 0.00019497386409328508, "loss": 29.927, "step": 1500 }, { "epoch": 0.1, "learning_rate": 0.00019329848545771345, "loss": 29.1683, "step": 2000 }, { "epoch": 0.1, "eval_loss": 30.288894653320312, "eval_runtime": 0.2122, "eval_samples_per_second": 235.624, "eval_steps_per_second": 9.425, "step": 2000 }, { "epoch": 0.13, "learning_rate": 0.00019162310682214182, "loss": 28.6403, "step": 2500 }, { "epoch": 0.15, "learning_rate": 0.0001899477281865702, "loss": 28.258, "step": 3000 }, { "epoch": 0.15, "eval_loss": 29.558307647705078, "eval_runtime": 0.2125, "eval_samples_per_second": 235.261, "eval_steps_per_second": 9.41, "step": 3000 }, { "epoch": 0.18, "learning_rate": 0.00018827234955099854, "loss": 27.9285, "step": 3500 }, { "epoch": 0.2, "learning_rate": 0.0001865969709154269, "loss": 27.6974, "step": 4000 }, { "epoch": 0.2, "eval_loss": 29.129697799682617, "eval_runtime": 0.213, "eval_samples_per_second": 234.762, "eval_steps_per_second": 9.39, "step": 4000 }, { "epoch": 0.23, "learning_rate": 0.00018492159227985526, "loss": 27.492, "step": 4500 }, { "epoch": 0.25, "learning_rate": 0.00018324621364428363, "loss": 27.3712, "step": 5000 }, { "epoch": 0.25, "eval_loss": 28.81637954711914, "eval_runtime": 0.2107, "eval_samples_per_second": 237.354, "eval_steps_per_second": 9.494, "step": 5000 }, { "epoch": 0.28, "learning_rate": 0.00018157083500871198, "loss": 27.204, "step": 5500 }, { "epoch": 0.3, "learning_rate": 0.00017989545637314033, "loss": 27.0678, "step": 6000 }, { "epoch": 0.3, "eval_loss": 28.593141555786133, "eval_runtime": 0.2097, "eval_samples_per_second": 238.454, "eval_steps_per_second": 9.538, "step": 6000 }, { "epoch": 0.33, "learning_rate": 0.0001782200777375687, "loss": 26.9572, "step": 6500 }, { "epoch": 0.35, "learning_rate": 0.00017654469910199707, "loss": 26.8814, "step": 7000 }, { "epoch": 0.35, "eval_loss": 28.419679641723633, "eval_runtime": 0.2117, "eval_samples_per_second": 236.237, "eval_steps_per_second": 9.449, "step": 7000 }, { "epoch": 0.38, "learning_rate": 0.00017486932046642542, "loss": 26.784, "step": 7500 }, { "epoch": 0.4, "learning_rate": 0.0001731939418308538, "loss": 26.6886, "step": 8000 }, { "epoch": 0.4, "eval_loss": 28.238792419433594, "eval_runtime": 0.2269, "eval_samples_per_second": 220.371, "eval_steps_per_second": 8.815, "step": 8000 }, { "epoch": 0.43, "learning_rate": 0.00017151856319528214, "loss": 26.6235, "step": 8500 }, { "epoch": 0.45, "learning_rate": 0.0001698431845597105, "loss": 26.545, "step": 9000 }, { "epoch": 0.45, "eval_loss": 28.171510696411133, "eval_runtime": 0.2133, "eval_samples_per_second": 234.396, "eval_steps_per_second": 9.376, "step": 9000 }, { "epoch": 0.48, "learning_rate": 0.00016816780592413886, "loss": 26.4933, "step": 9500 }, { "epoch": 0.5, "learning_rate": 0.00016649242728856723, "loss": 26.4678, "step": 10000 }, { "epoch": 0.5, "eval_loss": 28.05875015258789, "eval_runtime": 0.2106, "eval_samples_per_second": 237.401, "eval_steps_per_second": 9.496, "step": 10000 }, { "epoch": 0.53, "learning_rate": 0.00016481704865299558, "loss": 26.3908, "step": 10500 }, { "epoch": 0.55, "learning_rate": 0.00016314167001742395, "loss": 26.3369, "step": 11000 }, { "epoch": 0.55, "eval_loss": 27.978954315185547, "eval_runtime": 0.2203, "eval_samples_per_second": 226.942, "eval_steps_per_second": 9.078, "step": 11000 }, { "epoch": 0.58, "learning_rate": 0.0001614662913818523, "loss": 26.3059, "step": 11500 }, { "epoch": 0.6, "learning_rate": 0.00015979091274628067, "loss": 26.2555, "step": 12000 }, { "epoch": 0.6, "eval_loss": 27.862077713012695, "eval_runtime": 0.2131, "eval_samples_per_second": 234.646, "eval_steps_per_second": 9.386, "step": 12000 }, { "epoch": 0.63, "learning_rate": 0.00015811553411070902, "loss": 26.197, "step": 12500 }, { "epoch": 0.65, "learning_rate": 0.0001564401554751374, "loss": 26.1535, "step": 13000 }, { "epoch": 0.65, "eval_loss": 27.81878662109375, "eval_runtime": 0.2128, "eval_samples_per_second": 234.945, "eval_steps_per_second": 9.398, "step": 13000 }, { "epoch": 0.68, "learning_rate": 0.00015476477683956574, "loss": 26.1223, "step": 13500 }, { "epoch": 0.7, "learning_rate": 0.0001530893982039941, "loss": 26.0879, "step": 14000 }, { "epoch": 0.7, "eval_loss": 27.750713348388672, "eval_runtime": 0.2338, "eval_samples_per_second": 213.838, "eval_steps_per_second": 8.554, "step": 14000 }, { "epoch": 0.73, "learning_rate": 0.00015141401956842248, "loss": 26.0771, "step": 14500 }, { "epoch": 0.75, "learning_rate": 0.00014973864093285083, "loss": 26.0283, "step": 15000 }, { "epoch": 0.75, "eval_loss": 27.70425796508789, "eval_runtime": 0.2114, "eval_samples_per_second": 236.495, "eval_steps_per_second": 9.46, "step": 15000 }, { "epoch": 0.78, "learning_rate": 0.00014806326229727917, "loss": 26.0089, "step": 15500 }, { "epoch": 0.8, "learning_rate": 0.00014638788366170755, "loss": 25.9742, "step": 16000 }, { "epoch": 0.8, "eval_loss": 27.637338638305664, "eval_runtime": 0.2146, "eval_samples_per_second": 233.03, "eval_steps_per_second": 9.321, "step": 16000 }, { "epoch": 0.83, "learning_rate": 0.00014471250502613592, "loss": 25.9742, "step": 16500 }, { "epoch": 0.85, "learning_rate": 0.00014303712639056427, "loss": 25.9445, "step": 17000 }, { "epoch": 0.85, "eval_loss": 27.6143798828125, "eval_runtime": 0.2101, "eval_samples_per_second": 237.929, "eval_steps_per_second": 9.517, "step": 17000 }, { "epoch": 0.88, "learning_rate": 0.00014136174775499264, "loss": 25.9146, "step": 17500 }, { "epoch": 0.9, "learning_rate": 0.000139686369119421, "loss": 25.8983, "step": 18000 }, { "epoch": 0.9, "eval_loss": 27.574024200439453, "eval_runtime": 0.2119, "eval_samples_per_second": 236.006, "eval_steps_per_second": 9.44, "step": 18000 }, { "epoch": 0.93, "learning_rate": 0.00013801099048384936, "loss": 25.8699, "step": 18500 }, { "epoch": 0.95, "learning_rate": 0.00013633561184827773, "loss": 25.8736, "step": 19000 }, { "epoch": 0.95, "eval_loss": 27.524921417236328, "eval_runtime": 0.2136, "eval_samples_per_second": 234.075, "eval_steps_per_second": 9.363, "step": 19000 }, { "epoch": 0.98, "learning_rate": 0.00013466023321270608, "loss": 25.8176, "step": 19500 }, { "epoch": 1.01, "learning_rate": 0.00013298485457713443, "loss": 25.8017, "step": 20000 }, { "epoch": 1.01, "eval_loss": 27.505229949951172, "eval_runtime": 0.2283, "eval_samples_per_second": 218.987, "eval_steps_per_second": 8.759, "step": 20000 }, { "epoch": 1.03, "learning_rate": 0.0001313094759415628, "loss": 25.7503, "step": 20500 }, { "epoch": 1.06, "learning_rate": 0.00012963409730599117, "loss": 25.7469, "step": 21000 }, { "epoch": 1.06, "eval_loss": 27.50472640991211, "eval_runtime": 0.2116, "eval_samples_per_second": 236.284, "eval_steps_per_second": 9.451, "step": 21000 }, { "epoch": 1.08, "learning_rate": 0.00012795871867041952, "loss": 25.7176, "step": 21500 }, { "epoch": 1.11, "learning_rate": 0.00012628334003484786, "loss": 25.6876, "step": 22000 }, { "epoch": 1.11, "eval_loss": 27.477006912231445, "eval_runtime": 0.215, "eval_samples_per_second": 232.548, "eval_steps_per_second": 9.302, "step": 22000 }, { "epoch": 1.13, "learning_rate": 0.00012460796139927624, "loss": 25.7203, "step": 22500 }, { "epoch": 1.16, "learning_rate": 0.0001229325827637046, "loss": 25.6757, "step": 23000 }, { "epoch": 1.16, "eval_loss": 27.425168991088867, "eval_runtime": 0.2125, "eval_samples_per_second": 235.312, "eval_steps_per_second": 9.412, "step": 23000 }, { "epoch": 1.18, "learning_rate": 0.00012125720412813297, "loss": 25.6867, "step": 23500 }, { "epoch": 1.21, "learning_rate": 0.00011958182549256132, "loss": 25.6558, "step": 24000 }, { "epoch": 1.21, "eval_loss": 27.394458770751953, "eval_runtime": 0.2106, "eval_samples_per_second": 237.425, "eval_steps_per_second": 9.497, "step": 24000 }, { "epoch": 1.23, "learning_rate": 0.00011790644685698968, "loss": 25.6432, "step": 24500 }, { "epoch": 1.26, "learning_rate": 0.00011623106822141805, "loss": 25.6223, "step": 25000 }, { "epoch": 1.26, "eval_loss": 27.391796112060547, "eval_runtime": 0.2101, "eval_samples_per_second": 237.937, "eval_steps_per_second": 9.517, "step": 25000 }, { "epoch": 1.28, "learning_rate": 0.00011455568958584641, "loss": 25.6129, "step": 25500 }, { "epoch": 1.31, "learning_rate": 0.00011288031095027476, "loss": 25.6234, "step": 26000 }, { "epoch": 1.31, "eval_loss": 27.368698120117188, "eval_runtime": 0.2109, "eval_samples_per_second": 237.043, "eval_steps_per_second": 9.482, "step": 26000 }, { "epoch": 1.33, "learning_rate": 0.00011120493231470313, "loss": 25.5979, "step": 26500 }, { "epoch": 1.36, "learning_rate": 0.00010952955367913149, "loss": 25.623, "step": 27000 }, { "epoch": 1.36, "eval_loss": 27.350811004638672, "eval_runtime": 0.2251, "eval_samples_per_second": 222.106, "eval_steps_per_second": 8.884, "step": 27000 }, { "epoch": 1.38, "learning_rate": 0.00010785417504355986, "loss": 25.5917, "step": 27500 }, { "epoch": 1.41, "learning_rate": 0.00010617879640798821, "loss": 25.5728, "step": 28000 }, { "epoch": 1.41, "eval_loss": 27.310701370239258, "eval_runtime": 0.2112, "eval_samples_per_second": 236.707, "eval_steps_per_second": 9.468, "step": 28000 }, { "epoch": 1.43, "learning_rate": 0.00010450341777241657, "loss": 25.5796, "step": 28500 }, { "epoch": 1.46, "learning_rate": 0.00010282803913684493, "loss": 25.5648, "step": 29000 }, { "epoch": 1.46, "eval_loss": 27.295310974121094, "eval_runtime": 0.217, "eval_samples_per_second": 230.466, "eval_steps_per_second": 9.219, "step": 29000 }, { "epoch": 1.48, "learning_rate": 0.0001011526605012733, "loss": 25.5587, "step": 29500 }, { "epoch": 1.51, "learning_rate": 9.947728186570166e-05, "loss": 25.5318, "step": 30000 }, { "epoch": 1.51, "eval_loss": 27.264293670654297, "eval_runtime": 0.2116, "eval_samples_per_second": 236.263, "eval_steps_per_second": 9.451, "step": 30000 }, { "epoch": 1.53, "learning_rate": 9.780190323013e-05, "loss": 25.5344, "step": 30500 }, { "epoch": 1.56, "learning_rate": 9.612652459455838e-05, "loss": 25.5212, "step": 31000 }, { "epoch": 1.56, "eval_loss": 27.232440948486328, "eval_runtime": 0.2137, "eval_samples_per_second": 233.953, "eval_steps_per_second": 9.358, "step": 31000 }, { "epoch": 1.58, "learning_rate": 9.445114595898673e-05, "loss": 25.5354, "step": 31500 }, { "epoch": 1.61, "learning_rate": 9.27757673234151e-05, "loss": 25.5099, "step": 32000 }, { "epoch": 1.61, "eval_loss": 27.240272521972656, "eval_runtime": 0.2111, "eval_samples_per_second": 236.859, "eval_steps_per_second": 9.474, "step": 32000 }, { "epoch": 1.63, "learning_rate": 9.110038868784345e-05, "loss": 25.5093, "step": 32500 }, { "epoch": 1.66, "learning_rate": 8.942501005227182e-05, "loss": 25.497, "step": 33000 }, { "epoch": 1.66, "eval_loss": 27.228918075561523, "eval_runtime": 0.2125, "eval_samples_per_second": 235.337, "eval_steps_per_second": 9.413, "step": 33000 }, { "epoch": 1.68, "learning_rate": 8.774963141670018e-05, "loss": 25.5092, "step": 33500 }, { "epoch": 1.71, "learning_rate": 8.607425278112854e-05, "loss": 25.4787, "step": 34000 }, { "epoch": 1.71, "eval_loss": 27.228187561035156, "eval_runtime": 0.2095, "eval_samples_per_second": 238.654, "eval_steps_per_second": 9.546, "step": 34000 }, { "epoch": 1.73, "learning_rate": 8.439887414555691e-05, "loss": 25.4684, "step": 34500 }, { "epoch": 1.76, "learning_rate": 8.272349550998526e-05, "loss": 25.483, "step": 35000 }, { "epoch": 1.76, "eval_loss": 27.22612762451172, "eval_runtime": 0.2116, "eval_samples_per_second": 236.31, "eval_steps_per_second": 9.452, "step": 35000 }, { "epoch": 1.78, "learning_rate": 8.104811687441363e-05, "loss": 25.4463, "step": 35500 }, { "epoch": 1.81, "learning_rate": 7.937273823884198e-05, "loss": 25.4702, "step": 36000 }, { "epoch": 1.81, "eval_loss": 27.186805725097656, "eval_runtime": 0.2143, "eval_samples_per_second": 233.331, "eval_steps_per_second": 9.333, "step": 36000 }, { "epoch": 1.83, "learning_rate": 7.769735960327035e-05, "loss": 25.4289, "step": 36500 }, { "epoch": 1.86, "learning_rate": 7.60219809676987e-05, "loss": 25.4306, "step": 37000 }, { "epoch": 1.86, "eval_loss": 27.194353103637695, "eval_runtime": 0.2096, "eval_samples_per_second": 238.518, "eval_steps_per_second": 9.541, "step": 37000 }, { "epoch": 1.88, "learning_rate": 7.434660233212707e-05, "loss": 25.4421, "step": 37500 }, { "epoch": 1.91, "learning_rate": 7.267122369655543e-05, "loss": 25.4518, "step": 38000 }, { "epoch": 1.91, "eval_loss": 27.154298782348633, "eval_runtime": 0.22, "eval_samples_per_second": 227.255, "eval_steps_per_second": 9.09, "step": 38000 }, { "epoch": 1.94, "learning_rate": 7.099584506098379e-05, "loss": 25.4411, "step": 38500 }, { "epoch": 1.96, "learning_rate": 6.932046642541215e-05, "loss": 25.4264, "step": 39000 }, { "epoch": 1.96, "eval_loss": 27.156774520874023, "eval_runtime": 0.2232, "eval_samples_per_second": 224.006, "eval_steps_per_second": 8.96, "step": 39000 }, { "epoch": 1.99, "learning_rate": 6.764508778984051e-05, "loss": 25.4384, "step": 39500 }, { "epoch": 2.01, "learning_rate": 6.596970915426887e-05, "loss": 25.4168, "step": 40000 }, { "epoch": 2.01, "eval_loss": 27.140100479125977, "eval_runtime": 0.2133, "eval_samples_per_second": 234.392, "eval_steps_per_second": 9.376, "step": 40000 }, { "epoch": 2.04, "learning_rate": 6.429433051869723e-05, "loss": 25.4029, "step": 40500 }, { "epoch": 2.06, "learning_rate": 6.261895188312559e-05, "loss": 25.386, "step": 41000 }, { "epoch": 2.06, "eval_loss": 27.135889053344727, "eval_runtime": 0.2136, "eval_samples_per_second": 234.042, "eval_steps_per_second": 9.362, "step": 41000 }, { "epoch": 2.09, "learning_rate": 6.0943573247553954e-05, "loss": 25.3667, "step": 41500 }, { "epoch": 2.11, "learning_rate": 5.926819461198231e-05, "loss": 25.3798, "step": 42000 }, { "epoch": 2.11, "eval_loss": 27.12567138671875, "eval_runtime": 0.2109, "eval_samples_per_second": 237.075, "eval_steps_per_second": 9.483, "step": 42000 }, { "epoch": 2.14, "learning_rate": 5.7592815976410674e-05, "loss": 25.3643, "step": 42500 }, { "epoch": 2.16, "learning_rate": 5.5917437340839026e-05, "loss": 25.3643, "step": 43000 }, { "epoch": 2.16, "eval_loss": 27.113182067871094, "eval_runtime": 0.2121, "eval_samples_per_second": 235.709, "eval_steps_per_second": 9.428, "step": 43000 }, { "epoch": 2.19, "learning_rate": 5.424205870526739e-05, "loss": 25.3653, "step": 43500 }, { "epoch": 2.21, "learning_rate": 5.256668006969575e-05, "loss": 25.3378, "step": 44000 }, { "epoch": 2.21, "eval_loss": 27.1131649017334, "eval_runtime": 0.2131, "eval_samples_per_second": 234.625, "eval_steps_per_second": 9.385, "step": 44000 }, { "epoch": 2.24, "learning_rate": 5.089130143412412e-05, "loss": 25.3598, "step": 44500 }, { "epoch": 2.26, "learning_rate": 4.921592279855248e-05, "loss": 25.3431, "step": 45000 }, { "epoch": 2.26, "eval_loss": 27.10150909423828, "eval_runtime": 0.3343, "eval_samples_per_second": 149.553, "eval_steps_per_second": 5.982, "step": 45000 }, { "epoch": 2.29, "learning_rate": 4.754054416298084e-05, "loss": 25.3568, "step": 45500 }, { "epoch": 2.31, "learning_rate": 4.58651655274092e-05, "loss": 25.3384, "step": 46000 }, { "epoch": 2.31, "eval_loss": 27.091062545776367, "eval_runtime": 0.2132, "eval_samples_per_second": 234.536, "eval_steps_per_second": 9.381, "step": 46000 }, { "epoch": 2.34, "learning_rate": 4.418978689183756e-05, "loss": 25.3337, "step": 46500 }, { "epoch": 2.36, "learning_rate": 4.251440825626592e-05, "loss": 25.3516, "step": 47000 }, { "epoch": 2.36, "eval_loss": 27.0788631439209, "eval_runtime": 0.2156, "eval_samples_per_second": 231.865, "eval_steps_per_second": 9.275, "step": 47000 }, { "epoch": 2.39, "learning_rate": 4.083902962069428e-05, "loss": 25.3368, "step": 47500 }, { "epoch": 2.41, "learning_rate": 3.9163650985122644e-05, "loss": 25.3104, "step": 48000 }, { "epoch": 2.41, "eval_loss": 27.085359573364258, "eval_runtime": 0.2126, "eval_samples_per_second": 235.191, "eval_steps_per_second": 9.408, "step": 48000 }, { "epoch": 2.44, "learning_rate": 3.7488272349551004e-05, "loss": 25.3357, "step": 48500 }, { "epoch": 2.46, "learning_rate": 3.581289371397936e-05, "loss": 25.3254, "step": 49000 }, { "epoch": 2.46, "eval_loss": 27.069896697998047, "eval_runtime": 0.2134, "eval_samples_per_second": 234.3, "eval_steps_per_second": 9.372, "step": 49000 }, { "epoch": 2.49, "learning_rate": 3.413751507840772e-05, "loss": 25.3286, "step": 49500 }, { "epoch": 2.51, "learning_rate": 3.246213644283608e-05, "loss": 25.3307, "step": 50000 }, { "epoch": 2.51, "eval_loss": 27.073589324951172, "eval_runtime": 0.2151, "eval_samples_per_second": 232.429, "eval_steps_per_second": 9.297, "step": 50000 }, { "epoch": 2.54, "learning_rate": 3.078675780726444e-05, "loss": 25.3197, "step": 50500 }, { "epoch": 2.56, "learning_rate": 2.9111379171692806e-05, "loss": 25.3153, "step": 51000 }, { "epoch": 2.56, "eval_loss": 27.077919006347656, "eval_runtime": 0.267, "eval_samples_per_second": 187.287, "eval_steps_per_second": 7.491, "step": 51000 }, { "epoch": 2.59, "learning_rate": 2.7436000536121165e-05, "loss": 25.3012, "step": 51500 }, { "epoch": 2.61, "learning_rate": 2.5760621900549525e-05, "loss": 25.3149, "step": 52000 }, { "epoch": 2.61, "eval_loss": 27.066099166870117, "eval_runtime": 0.2122, "eval_samples_per_second": 235.617, "eval_steps_per_second": 9.425, "step": 52000 }, { "epoch": 2.64, "learning_rate": 2.4085243264977885e-05, "loss": 25.2943, "step": 52500 }, { "epoch": 2.66, "learning_rate": 2.2409864629406248e-05, "loss": 25.3251, "step": 53000 }, { "epoch": 2.66, "eval_loss": 27.05815887451172, "eval_runtime": 0.2117, "eval_samples_per_second": 236.215, "eval_steps_per_second": 9.449, "step": 53000 }, { "epoch": 2.69, "learning_rate": 2.0734485993834608e-05, "loss": 25.2907, "step": 53500 }, { "epoch": 2.71, "learning_rate": 1.9059107358262967e-05, "loss": 25.3185, "step": 54000 }, { "epoch": 2.71, "eval_loss": 27.06610107421875, "eval_runtime": 0.2143, "eval_samples_per_second": 233.359, "eval_steps_per_second": 9.334, "step": 54000 }, { "epoch": 2.74, "learning_rate": 1.7383728722691327e-05, "loss": 25.3016, "step": 54500 }, { "epoch": 2.76, "learning_rate": 1.570835008711969e-05, "loss": 25.3045, "step": 55000 }, { "epoch": 2.76, "eval_loss": 27.05499267578125, "eval_runtime": 0.2149, "eval_samples_per_second": 232.706, "eval_steps_per_second": 9.308, "step": 55000 }, { "epoch": 2.79, "learning_rate": 1.403297145154805e-05, "loss": 25.2932, "step": 55500 }, { "epoch": 2.81, "learning_rate": 1.2357592815976411e-05, "loss": 25.2952, "step": 56000 }, { "epoch": 2.81, "eval_loss": 27.059736251831055, "eval_runtime": 0.2105, "eval_samples_per_second": 237.516, "eval_steps_per_second": 9.501, "step": 56000 }, { "epoch": 2.84, "learning_rate": 1.0682214180404771e-05, "loss": 25.2941, "step": 56500 }, { "epoch": 2.86, "learning_rate": 9.006835544833132e-06, "loss": 25.2863, "step": 57000 }, { "epoch": 2.86, "eval_loss": 27.0577392578125, "eval_runtime": 0.231, "eval_samples_per_second": 216.423, "eval_steps_per_second": 8.657, "step": 57000 }, { "epoch": 2.89, "learning_rate": 7.331456909261493e-06, "loss": 25.2943, "step": 57500 }, { "epoch": 2.92, "learning_rate": 5.656078273689854e-06, "loss": 25.2966, "step": 58000 }, { "epoch": 2.92, "eval_loss": 27.05270004272461, "eval_runtime": 0.2335, "eval_samples_per_second": 214.093, "eval_steps_per_second": 8.564, "step": 58000 }, { "epoch": 2.94, "learning_rate": 3.980699638118215e-06, "loss": 25.2804, "step": 58500 }, { "epoch": 2.97, "learning_rate": 2.3053210025465755e-06, "loss": 25.2995, "step": 59000 }, { "epoch": 2.97, "eval_loss": 27.049020767211914, "eval_runtime": 0.2618, "eval_samples_per_second": 190.997, "eval_steps_per_second": 7.64, "step": 59000 }, { "epoch": 2.99, "learning_rate": 6.299423669749365e-07, "loss": 25.2862, "step": 59500 } ], "max_steps": 59688, "num_train_epochs": 3, "total_flos": 3.715405206616277e+18, "trial_name": null, "trial_params": null }