{ "best_metric": 4.0290679931640625, "best_model_checkpoint": "./models/gpt_trinity_test/checkpoint-84000", "epoch": 4.0, "global_step": 84736, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 3.533160254897333e-06, "loss": 3.5765, "step": 1000 }, { "epoch": 0.05, "eval_loss": 4.124706745147705, "eval_runtime": 5.5178, "eval_samples_per_second": 77.749, "eval_steps_per_second": 38.965, "step": 1000 }, { "epoch": 0.09, "learning_rate": 7.073400991267406e-06, "loss": 3.19, "step": 2000 }, { "epoch": 0.09, "eval_loss": 4.057796955108643, "eval_runtime": 5.5495, "eval_samples_per_second": 77.305, "eval_steps_per_second": 38.742, "step": 2000 }, { "epoch": 0.14, "learning_rate": 1.061364172763748e-05, "loss": 3.1177, "step": 3000 }, { "epoch": 0.14, "eval_loss": 4.070754051208496, "eval_runtime": 5.5223, "eval_samples_per_second": 77.685, "eval_steps_per_second": 38.933, "step": 3000 }, { "epoch": 0.19, "learning_rate": 1.4153882464007553e-05, "loss": 3.1116, "step": 4000 }, { "epoch": 0.19, "eval_loss": 4.065377712249756, "eval_runtime": 5.5251, "eval_samples_per_second": 77.646, "eval_steps_per_second": 38.913, "step": 4000 }, { "epoch": 0.24, "learning_rate": 1.7694123200377628e-05, "loss": 3.0777, "step": 5000 }, { "epoch": 0.24, "eval_loss": 4.085710525512695, "eval_runtime": 5.5077, "eval_samples_per_second": 77.89, "eval_steps_per_second": 39.036, "step": 5000 }, { "epoch": 0.28, "learning_rate": 2.12343639367477e-05, "loss": 3.1105, "step": 6000 }, { "epoch": 0.28, "eval_loss": 4.112701416015625, "eval_runtime": 5.4987, "eval_samples_per_second": 78.018, "eval_steps_per_second": 39.1, "step": 6000 }, { "epoch": 0.33, "learning_rate": 2.4771064432381404e-05, "loss": 3.1018, "step": 7000 }, { "epoch": 0.33, "eval_loss": 4.141016483306885, "eval_runtime": 5.5233, "eval_samples_per_second": 77.671, "eval_steps_per_second": 38.926, "step": 7000 }, { "epoch": 0.38, "learning_rate": 2.8311305168751475e-05, "loss": 3.0728, "step": 8000 }, { "epoch": 0.38, "eval_loss": 4.183428764343262, "eval_runtime": 5.5153, "eval_samples_per_second": 77.783, "eval_steps_per_second": 38.982, "step": 8000 }, { "epoch": 0.42, "learning_rate": 2.9794261886653903e-05, "loss": 3.1248, "step": 9000 }, { "epoch": 0.42, "eval_loss": 4.205781936645508, "eval_runtime": 5.4948, "eval_samples_per_second": 78.073, "eval_steps_per_second": 39.128, "step": 9000 }, { "epoch": 0.47, "learning_rate": 2.9400881172799034e-05, "loss": 3.1035, "step": 10000 }, { "epoch": 0.47, "eval_loss": 4.20481538772583, "eval_runtime": 5.5171, "eval_samples_per_second": 77.758, "eval_steps_per_second": 38.97, "step": 10000 }, { "epoch": 0.52, "learning_rate": 2.9007893839658023e-05, "loss": 3.0943, "step": 11000 }, { "epoch": 0.52, "eval_loss": 4.189157009124756, "eval_runtime": 5.5205, "eval_samples_per_second": 77.71, "eval_steps_per_second": 38.946, "step": 11000 }, { "epoch": 0.57, "learning_rate": 2.8614513125803154e-05, "loss": 3.0724, "step": 12000 }, { "epoch": 0.57, "eval_loss": 4.206292629241943, "eval_runtime": 5.5117, "eval_samples_per_second": 77.834, "eval_steps_per_second": 39.008, "step": 12000 }, { "epoch": 0.61, "learning_rate": 2.822113241194828e-05, "loss": 3.0517, "step": 13000 }, { "epoch": 0.61, "eval_loss": 4.19225549697876, "eval_runtime": 5.5111, "eval_samples_per_second": 77.843, "eval_steps_per_second": 39.012, "step": 13000 }, { "epoch": 0.66, "learning_rate": 2.782814507880727e-05, "loss": 3.0372, "step": 14000 }, { "epoch": 0.66, "eval_loss": 4.2112298011779785, "eval_runtime": 5.4935, "eval_samples_per_second": 78.093, "eval_steps_per_second": 39.138, "step": 14000 }, { "epoch": 0.71, "learning_rate": 2.7434764364952404e-05, "loss": 3.0235, "step": 15000 }, { "epoch": 0.71, "eval_loss": 4.204270362854004, "eval_runtime": 5.5028, "eval_samples_per_second": 77.961, "eval_steps_per_second": 39.071, "step": 15000 }, { "epoch": 0.76, "learning_rate": 2.704177703181139e-05, "loss": 3.0329, "step": 16000 }, { "epoch": 0.76, "eval_loss": 4.163009166717529, "eval_runtime": 5.5278, "eval_samples_per_second": 77.608, "eval_steps_per_second": 38.894, "step": 16000 }, { "epoch": 0.8, "learning_rate": 2.664839631795652e-05, "loss": 3.0171, "step": 17000 }, { "epoch": 0.8, "eval_loss": 4.163072109222412, "eval_runtime": 5.4985, "eval_samples_per_second": 78.021, "eval_steps_per_second": 39.102, "step": 17000 }, { "epoch": 0.85, "learning_rate": 2.625501560410165e-05, "loss": 2.9997, "step": 18000 }, { "epoch": 0.85, "eval_loss": 4.156316757202148, "eval_runtime": 5.5028, "eval_samples_per_second": 77.961, "eval_steps_per_second": 39.071, "step": 18000 }, { "epoch": 0.9, "learning_rate": 2.5861634890246782e-05, "loss": 2.9913, "step": 19000 }, { "epoch": 0.9, "eval_loss": 4.161618232727051, "eval_runtime": 5.5279, "eval_samples_per_second": 77.607, "eval_steps_per_second": 38.894, "step": 19000 }, { "epoch": 0.94, "learning_rate": 2.546904093781962e-05, "loss": 2.9579, "step": 20000 }, { "epoch": 0.94, "eval_loss": 4.149376392364502, "eval_runtime": 5.5138, "eval_samples_per_second": 77.805, "eval_steps_per_second": 38.993, "step": 20000 }, { "epoch": 0.99, "learning_rate": 2.507605360467861e-05, "loss": 2.9576, "step": 21000 }, { "epoch": 0.99, "eval_loss": 4.136674880981445, "eval_runtime": 5.5162, "eval_samples_per_second": 77.77, "eval_steps_per_second": 38.976, "step": 21000 }, { "epoch": 1.04, "learning_rate": 2.468267289082374e-05, "loss": 2.7461, "step": 22000 }, { "epoch": 1.04, "eval_loss": 4.15926456451416, "eval_runtime": 5.5064, "eval_samples_per_second": 77.909, "eval_steps_per_second": 39.045, "step": 22000 }, { "epoch": 1.09, "learning_rate": 2.4289292176968872e-05, "loss": 2.7637, "step": 23000 }, { "epoch": 1.09, "eval_loss": 4.1452813148498535, "eval_runtime": 5.5159, "eval_samples_per_second": 77.775, "eval_steps_per_second": 38.978, "step": 23000 }, { "epoch": 1.13, "learning_rate": 2.3895911463114e-05, "loss": 2.741, "step": 24000 }, { "epoch": 1.13, "eval_loss": 4.162442207336426, "eval_runtime": 5.5028, "eval_samples_per_second": 77.961, "eval_steps_per_second": 39.071, "step": 24000 }, { "epoch": 1.18, "learning_rate": 2.3502530749259134e-05, "loss": 2.7514, "step": 25000 }, { "epoch": 1.18, "eval_loss": 4.135677814483643, "eval_runtime": 5.525, "eval_samples_per_second": 77.647, "eval_steps_per_second": 38.914, "step": 25000 }, { "epoch": 1.23, "learning_rate": 2.3109150035404265e-05, "loss": 2.755, "step": 26000 }, { "epoch": 1.23, "eval_loss": 4.152438163757324, "eval_runtime": 5.5218, "eval_samples_per_second": 77.692, "eval_steps_per_second": 38.937, "step": 26000 }, { "epoch": 1.27, "learning_rate": 2.2715769321549396e-05, "loss": 2.7365, "step": 27000 }, { "epoch": 1.27, "eval_loss": 4.139864444732666, "eval_runtime": 5.5425, "eval_samples_per_second": 77.401, "eval_steps_per_second": 38.791, "step": 27000 }, { "epoch": 1.32, "learning_rate": 2.232278198840838e-05, "loss": 2.7356, "step": 28000 }, { "epoch": 1.32, "eval_loss": 4.1285014152526855, "eval_runtime": 5.5006, "eval_samples_per_second": 77.991, "eval_steps_per_second": 39.087, "step": 28000 }, { "epoch": 1.37, "learning_rate": 2.1929401274553512e-05, "loss": 2.7386, "step": 29000 }, { "epoch": 1.37, "eval_loss": 4.128605365753174, "eval_runtime": 5.5111, "eval_samples_per_second": 77.843, "eval_steps_per_second": 39.012, "step": 29000 }, { "epoch": 1.42, "learning_rate": 2.15364139414125e-05, "loss": 2.7489, "step": 30000 }, { "epoch": 1.42, "eval_loss": 4.123142719268799, "eval_runtime": 5.5126, "eval_samples_per_second": 77.821, "eval_steps_per_second": 39.001, "step": 30000 }, { "epoch": 1.46, "learning_rate": 2.114303322755763e-05, "loss": 2.7518, "step": 31000 }, { "epoch": 1.46, "eval_loss": 4.110356330871582, "eval_runtime": 5.5003, "eval_samples_per_second": 77.995, "eval_steps_per_second": 39.089, "step": 31000 }, { "epoch": 1.51, "learning_rate": 2.0750045894416617e-05, "loss": 2.7317, "step": 32000 }, { "epoch": 1.51, "eval_loss": 4.120232582092285, "eval_runtime": 5.4956, "eval_samples_per_second": 78.063, "eval_steps_per_second": 39.122, "step": 32000 }, { "epoch": 1.56, "learning_rate": 2.0356665180561748e-05, "loss": 2.7378, "step": 33000 }, { "epoch": 1.56, "eval_loss": 4.113248825073242, "eval_runtime": 5.4916, "eval_samples_per_second": 78.119, "eval_steps_per_second": 39.151, "step": 33000 }, { "epoch": 1.6, "learning_rate": 1.9963677847420733e-05, "loss": 2.7309, "step": 34000 }, { "epoch": 1.6, "eval_loss": 4.104722499847412, "eval_runtime": 5.5185, "eval_samples_per_second": 77.739, "eval_steps_per_second": 38.96, "step": 34000 }, { "epoch": 1.65, "learning_rate": 1.9570297133565864e-05, "loss": 2.7791, "step": 35000 }, { "epoch": 1.65, "eval_loss": 4.09762716293335, "eval_runtime": 5.5051, "eval_samples_per_second": 77.928, "eval_steps_per_second": 39.055, "step": 35000 }, { "epoch": 1.7, "learning_rate": 1.9176916419710998e-05, "loss": 2.7427, "step": 36000 }, { "epoch": 1.7, "eval_loss": 4.087375164031982, "eval_runtime": 5.502, "eval_samples_per_second": 77.972, "eval_steps_per_second": 39.077, "step": 36000 }, { "epoch": 1.75, "learning_rate": 1.8784322467283838e-05, "loss": 2.7184, "step": 37000 }, { "epoch": 1.75, "eval_loss": 4.0953216552734375, "eval_runtime": 5.4993, "eval_samples_per_second": 78.01, "eval_steps_per_second": 39.096, "step": 37000 }, { "epoch": 1.79, "learning_rate": 1.8390941753428972e-05, "loss": 2.7107, "step": 38000 }, { "epoch": 1.79, "eval_loss": 4.09627103805542, "eval_runtime": 5.5281, "eval_samples_per_second": 77.604, "eval_steps_per_second": 38.892, "step": 38000 }, { "epoch": 1.84, "learning_rate": 1.7997561039574103e-05, "loss": 2.7122, "step": 39000 }, { "epoch": 1.84, "eval_loss": 4.0841288566589355, "eval_runtime": 5.5134, "eval_samples_per_second": 77.811, "eval_steps_per_second": 38.996, "step": 39000 }, { "epoch": 1.89, "learning_rate": 1.760418032571923e-05, "loss": 2.7172, "step": 40000 }, { "epoch": 1.89, "eval_loss": 4.085172176361084, "eval_runtime": 5.5109, "eval_samples_per_second": 77.845, "eval_steps_per_second": 39.013, "step": 40000 }, { "epoch": 1.94, "learning_rate": 1.721119299257822e-05, "loss": 2.7126, "step": 41000 }, { "epoch": 1.94, "eval_loss": 4.063177108764648, "eval_runtime": 5.484, "eval_samples_per_second": 78.227, "eval_steps_per_second": 39.205, "step": 41000 }, { "epoch": 1.98, "learning_rate": 1.681781227872335e-05, "loss": 2.7063, "step": 42000 }, { "epoch": 1.98, "eval_loss": 4.064344882965088, "eval_runtime": 5.5064, "eval_samples_per_second": 77.909, "eval_steps_per_second": 39.045, "step": 42000 }, { "epoch": 2.03, "learning_rate": 1.642443156486848e-05, "loss": 2.5311, "step": 43000 }, { "epoch": 2.03, "eval_loss": 4.084793567657471, "eval_runtime": 5.5081, "eval_samples_per_second": 77.885, "eval_steps_per_second": 39.033, "step": 43000 }, { "epoch": 2.08, "learning_rate": 1.6031444231727466e-05, "loss": 2.4496, "step": 44000 }, { "epoch": 2.08, "eval_loss": 4.094330310821533, "eval_runtime": 5.4892, "eval_samples_per_second": 78.153, "eval_steps_per_second": 39.167, "step": 44000 }, { "epoch": 2.12, "learning_rate": 1.563845689858645e-05, "loss": 2.4597, "step": 45000 }, { "epoch": 2.12, "eval_loss": 4.079880714416504, "eval_runtime": 5.4837, "eval_samples_per_second": 78.232, "eval_steps_per_second": 39.207, "step": 45000 }, { "epoch": 2.17, "learning_rate": 1.5245076184731584e-05, "loss": 2.4472, "step": 46000 }, { "epoch": 2.17, "eval_loss": 4.080161094665527, "eval_runtime": 5.4997, "eval_samples_per_second": 78.005, "eval_steps_per_second": 39.093, "step": 46000 }, { "epoch": 2.22, "learning_rate": 1.4851695470876715e-05, "loss": 2.4628, "step": 47000 }, { "epoch": 2.22, "eval_loss": 4.088040828704834, "eval_runtime": 5.492, "eval_samples_per_second": 78.114, "eval_steps_per_second": 39.148, "step": 47000 }, { "epoch": 2.27, "learning_rate": 1.4458314757021846e-05, "loss": 2.4508, "step": 48000 }, { "epoch": 2.27, "eval_loss": 4.079111576080322, "eval_runtime": 5.4843, "eval_samples_per_second": 78.224, "eval_steps_per_second": 39.203, "step": 48000 }, { "epoch": 2.31, "learning_rate": 1.4065327423880831e-05, "loss": 2.4743, "step": 49000 }, { "epoch": 2.31, "eval_loss": 4.076536655426025, "eval_runtime": 5.5106, "eval_samples_per_second": 77.85, "eval_steps_per_second": 39.016, "step": 49000 }, { "epoch": 2.36, "learning_rate": 1.3671946710025964e-05, "loss": 2.4692, "step": 50000 }, { "epoch": 2.36, "eval_loss": 4.073933124542236, "eval_runtime": 5.4811, "eval_samples_per_second": 78.269, "eval_steps_per_second": 39.226, "step": 50000 }, { "epoch": 2.41, "learning_rate": 1.327895937688495e-05, "loss": 2.4651, "step": 51000 }, { "epoch": 2.41, "eval_loss": 4.068994522094727, "eval_runtime": 5.4881, "eval_samples_per_second": 78.168, "eval_steps_per_second": 39.175, "step": 51000 }, { "epoch": 2.45, "learning_rate": 1.2885578663030082e-05, "loss": 2.4885, "step": 52000 }, { "epoch": 2.45, "eval_loss": 4.072272300720215, "eval_runtime": 5.4901, "eval_samples_per_second": 78.141, "eval_steps_per_second": 39.162, "step": 52000 }, { "epoch": 2.5, "learning_rate": 1.2492591329889067e-05, "loss": 2.5023, "step": 53000 }, { "epoch": 2.5, "eval_loss": 4.06748104095459, "eval_runtime": 5.5201, "eval_samples_per_second": 77.716, "eval_steps_per_second": 38.948, "step": 53000 }, { "epoch": 2.55, "learning_rate": 1.2099210616034198e-05, "loss": 2.4651, "step": 54000 }, { "epoch": 2.55, "eval_loss": 4.064865589141846, "eval_runtime": 5.5135, "eval_samples_per_second": 77.809, "eval_steps_per_second": 38.995, "step": 54000 }, { "epoch": 2.6, "learning_rate": 1.1706223282893185e-05, "loss": 2.4774, "step": 55000 }, { "epoch": 2.6, "eval_loss": 4.06947660446167, "eval_runtime": 5.4995, "eval_samples_per_second": 78.006, "eval_steps_per_second": 39.094, "step": 55000 }, { "epoch": 2.64, "learning_rate": 1.1312842569038316e-05, "loss": 2.4717, "step": 56000 }, { "epoch": 2.64, "eval_loss": 4.055931091308594, "eval_runtime": 5.4978, "eval_samples_per_second": 78.032, "eval_steps_per_second": 39.107, "step": 56000 }, { "epoch": 2.69, "learning_rate": 1.0919461855183447e-05, "loss": 2.4856, "step": 57000 }, { "epoch": 2.69, "eval_loss": 4.051236629486084, "eval_runtime": 5.5101, "eval_samples_per_second": 77.857, "eval_steps_per_second": 39.019, "step": 57000 }, { "epoch": 2.74, "learning_rate": 1.0526474522042433e-05, "loss": 2.4572, "step": 58000 }, { "epoch": 2.74, "eval_loss": 4.0473432540893555, "eval_runtime": 5.5015, "eval_samples_per_second": 77.979, "eval_steps_per_second": 39.08, "step": 58000 }, { "epoch": 2.79, "learning_rate": 1.0133093808187564e-05, "loss": 2.486, "step": 59000 }, { "epoch": 2.79, "eval_loss": 4.043802261352539, "eval_runtime": 5.52, "eval_samples_per_second": 77.717, "eval_steps_per_second": 38.949, "step": 59000 }, { "epoch": 2.83, "learning_rate": 9.74010647504655e-06, "loss": 2.449, "step": 60000 }, { "epoch": 2.83, "eval_loss": 4.038473606109619, "eval_runtime": 5.5095, "eval_samples_per_second": 77.866, "eval_steps_per_second": 39.024, "step": 60000 }, { "epoch": 2.88, "learning_rate": 9.34672576119168e-06, "loss": 2.456, "step": 61000 }, { "epoch": 2.88, "eval_loss": 4.035487651824951, "eval_runtime": 5.5078, "eval_samples_per_second": 77.889, "eval_steps_per_second": 39.035, "step": 61000 }, { "epoch": 2.93, "learning_rate": 8.953345047336813e-06, "loss": 2.4802, "step": 62000 }, { "epoch": 2.93, "eval_loss": 4.0377516746521, "eval_runtime": 5.5124, "eval_samples_per_second": 77.824, "eval_steps_per_second": 39.003, "step": 62000 }, { "epoch": 2.97, "learning_rate": 8.559964333481944e-06, "loss": 2.4635, "step": 63000 }, { "epoch": 2.97, "eval_loss": 4.03075647354126, "eval_runtime": 5.5127, "eval_samples_per_second": 77.82, "eval_steps_per_second": 39.001, "step": 63000 }, { "epoch": 3.02, "learning_rate": 8.16697700034093e-06, "loss": 2.3742, "step": 64000 }, { "epoch": 3.02, "eval_loss": 4.048806667327881, "eval_runtime": 5.5265, "eval_samples_per_second": 77.626, "eval_steps_per_second": 38.904, "step": 64000 }, { "epoch": 3.07, "learning_rate": 7.77359628648606e-06, "loss": 2.2371, "step": 65000 }, { "epoch": 3.07, "eval_loss": 4.057917594909668, "eval_runtime": 5.5266, "eval_samples_per_second": 77.625, "eval_steps_per_second": 38.903, "step": 65000 }, { "epoch": 3.12, "learning_rate": 7.380608953345048e-06, "loss": 2.2496, "step": 66000 }, { "epoch": 3.12, "eval_loss": 4.062958717346191, "eval_runtime": 5.4861, "eval_samples_per_second": 78.197, "eval_steps_per_second": 39.19, "step": 66000 }, { "epoch": 3.16, "learning_rate": 6.987228239490178e-06, "loss": 2.2758, "step": 67000 }, { "epoch": 3.16, "eval_loss": 4.05156135559082, "eval_runtime": 5.5178, "eval_samples_per_second": 77.748, "eval_steps_per_second": 38.965, "step": 67000 }, { "epoch": 3.21, "learning_rate": 6.594240906349165e-06, "loss": 2.2489, "step": 68000 }, { "epoch": 3.21, "eval_loss": 4.058535575866699, "eval_runtime": 5.4932, "eval_samples_per_second": 78.097, "eval_steps_per_second": 39.14, "step": 68000 }, { "epoch": 3.26, "learning_rate": 6.200860192494296e-06, "loss": 2.2374, "step": 69000 }, { "epoch": 3.26, "eval_loss": 4.071547031402588, "eval_runtime": 5.5054, "eval_samples_per_second": 77.923, "eval_steps_per_second": 39.052, "step": 69000 }, { "epoch": 3.3, "learning_rate": 5.807872859353283e-06, "loss": 2.2862, "step": 70000 }, { "epoch": 3.3, "eval_loss": 4.050685405731201, "eval_runtime": 5.492, "eval_samples_per_second": 78.114, "eval_steps_per_second": 39.148, "step": 70000 }, { "epoch": 3.35, "learning_rate": 5.414492145498413e-06, "loss": 2.2502, "step": 71000 }, { "epoch": 3.35, "eval_loss": 4.051216125488281, "eval_runtime": 5.5012, "eval_samples_per_second": 77.983, "eval_steps_per_second": 39.082, "step": 71000 }, { "epoch": 3.4, "learning_rate": 5.021504812357399e-06, "loss": 2.238, "step": 72000 }, { "epoch": 3.4, "eval_loss": 4.054477214813232, "eval_runtime": 5.4933, "eval_samples_per_second": 78.095, "eval_steps_per_second": 39.139, "step": 72000 }, { "epoch": 3.45, "learning_rate": 4.628517479216386e-06, "loss": 2.2407, "step": 73000 }, { "epoch": 3.45, "eval_loss": 4.045898914337158, "eval_runtime": 5.483, "eval_samples_per_second": 78.241, "eval_steps_per_second": 39.212, "step": 73000 }, { "epoch": 3.49, "learning_rate": 4.235136765361517e-06, "loss": 2.2529, "step": 74000 }, { "epoch": 3.49, "eval_loss": 4.04516077041626, "eval_runtime": 5.4909, "eval_samples_per_second": 78.129, "eval_steps_per_second": 39.156, "step": 74000 }, { "epoch": 3.54, "learning_rate": 3.841756051506649e-06, "loss": 2.2453, "step": 75000 }, { "epoch": 3.54, "eval_loss": 4.045854091644287, "eval_runtime": 5.4906, "eval_samples_per_second": 78.134, "eval_steps_per_second": 39.158, "step": 75000 }, { "epoch": 3.59, "learning_rate": 3.4483753376517792e-06, "loss": 2.2314, "step": 76000 }, { "epoch": 3.59, "eval_loss": 4.041632175445557, "eval_runtime": 5.4892, "eval_samples_per_second": 78.153, "eval_steps_per_second": 39.168, "step": 76000 }, { "epoch": 3.63, "learning_rate": 3.0553880045107656e-06, "loss": 2.2408, "step": 77000 }, { "epoch": 3.63, "eval_loss": 4.037862300872803, "eval_runtime": 5.5221, "eval_samples_per_second": 77.687, "eval_steps_per_second": 38.934, "step": 77000 }, { "epoch": 3.68, "learning_rate": 2.662007290655897e-06, "loss": 2.2497, "step": 78000 }, { "epoch": 3.68, "eval_loss": 4.034841060638428, "eval_runtime": 5.5069, "eval_samples_per_second": 77.903, "eval_steps_per_second": 39.042, "step": 78000 }, { "epoch": 3.73, "learning_rate": 2.269413338228738e-06, "loss": 2.2475, "step": 79000 }, { "epoch": 3.73, "eval_loss": 4.037409782409668, "eval_runtime": 5.525, "eval_samples_per_second": 77.647, "eval_steps_per_second": 38.914, "step": 79000 }, { "epoch": 3.78, "learning_rate": 1.876032624373869e-06, "loss": 2.2376, "step": 80000 }, { "epoch": 3.78, "eval_loss": 4.03186559677124, "eval_runtime": 5.5186, "eval_samples_per_second": 77.737, "eval_steps_per_second": 38.959, "step": 80000 }, { "epoch": 3.82, "learning_rate": 1.4826519105190004e-06, "loss": 2.244, "step": 81000 }, { "epoch": 3.82, "eval_loss": 4.033052921295166, "eval_runtime": 5.5134, "eval_samples_per_second": 77.81, "eval_steps_per_second": 38.996, "step": 81000 }, { "epoch": 3.87, "learning_rate": 1.0892711966641317e-06, "loss": 2.2611, "step": 82000 }, { "epoch": 3.87, "eval_loss": 4.030561923980713, "eval_runtime": 5.5018, "eval_samples_per_second": 77.974, "eval_steps_per_second": 39.078, "step": 82000 }, { "epoch": 3.92, "learning_rate": 6.962838635231176e-07, "loss": 2.237, "step": 83000 }, { "epoch": 3.92, "eval_loss": 4.030078411102295, "eval_runtime": 5.5145, "eval_samples_per_second": 77.795, "eval_steps_per_second": 38.988, "step": 83000 }, { "epoch": 3.97, "learning_rate": 3.0290314966824894e-07, "loss": 2.2337, "step": 84000 }, { "epoch": 3.97, "eval_loss": 4.0290679931640625, "eval_runtime": 5.5016, "eval_samples_per_second": 77.978, "eval_steps_per_second": 39.08, "step": 84000 }, { "epoch": 4.0, "step": 84736, "total_flos": 4.4280638078976e+16, "train_loss": 2.6336010373971255, "train_runtime": 9376.0618, "train_samples_per_second": 18.075, "train_steps_per_second": 9.037 } ], "max_steps": 84736, "num_train_epochs": 4, "total_flos": 4.4280638078976e+16, "trial_name": null, "trial_params": null }