{ "best_metric": null, "best_model_checkpoint": null, "epoch": 7.0, "global_step": 58268, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06, "learning_rate": 4.957094803322579e-05, "loss": 2.4741, "step": 500 }, { "epoch": 0.12, "learning_rate": 4.914189606645157e-05, "loss": 2.4661, "step": 1000 }, { "epoch": 0.18, "learning_rate": 4.871284409967736e-05, "loss": 2.4505, "step": 1500 }, { "epoch": 0.24, "learning_rate": 4.828379213290314e-05, "loss": 2.4463, "step": 2000 }, { "epoch": 0.3, "learning_rate": 4.785474016612893e-05, "loss": 2.4309, "step": 2500 }, { "epoch": 0.36, "learning_rate": 4.742568819935471e-05, "loss": 2.4286, "step": 3000 }, { "epoch": 0.42, "learning_rate": 4.699663623258049e-05, "loss": 2.4326, "step": 3500 }, { "epoch": 0.48, "learning_rate": 4.656758426580627e-05, "loss": 2.424, "step": 4000 }, { "epoch": 0.54, "learning_rate": 4.613853229903206e-05, "loss": 2.4179, "step": 4500 }, { "epoch": 0.6, "learning_rate": 4.570948033225785e-05, "loss": 2.4141, "step": 5000 }, { "epoch": 0.66, "learning_rate": 4.528042836548363e-05, "loss": 2.4121, "step": 5500 }, { "epoch": 0.72, "learning_rate": 4.4851376398709416e-05, "loss": 2.407, "step": 6000 }, { "epoch": 0.78, "learning_rate": 4.44223244319352e-05, "loss": 2.399, "step": 6500 }, { "epoch": 0.84, "learning_rate": 4.3993272465160985e-05, "loss": 2.3935, "step": 7000 }, { "epoch": 0.9, "learning_rate": 4.3564220498386766e-05, "loss": 2.3822, "step": 7500 }, { "epoch": 0.96, "learning_rate": 4.3135168531612554e-05, "loss": 2.3822, "step": 8000 }, { "epoch": 1.02, "learning_rate": 4.2706116564838335e-05, "loss": 2.387, "step": 8500 }, { "epoch": 1.08, "learning_rate": 4.227706459806412e-05, "loss": 2.3601, "step": 9000 }, { "epoch": 1.14, "learning_rate": 4.1848012631289904e-05, "loss": 2.3582, "step": 9500 }, { "epoch": 1.2, "learning_rate": 4.1418960664515685e-05, "loss": 2.3706, "step": 10000 }, { "epoch": 1.26, "learning_rate": 4.098990869774147e-05, "loss": 2.3669, "step": 10500 }, { "epoch": 1.32, "learning_rate": 4.0560856730967254e-05, "loss": 2.3518, "step": 11000 }, { "epoch": 1.38, "learning_rate": 4.013180476419304e-05, "loss": 2.3392, "step": 11500 }, { "epoch": 1.44, "learning_rate": 3.970275279741882e-05, "loss": 2.3502, "step": 12000 }, { "epoch": 1.5, "learning_rate": 3.927370083064461e-05, "loss": 2.3437, "step": 12500 }, { "epoch": 1.56, "learning_rate": 3.884464886387039e-05, "loss": 2.3577, "step": 13000 }, { "epoch": 1.62, "learning_rate": 3.841559689709618e-05, "loss": 2.3435, "step": 13500 }, { "epoch": 1.68, "learning_rate": 3.798654493032196e-05, "loss": 2.3456, "step": 14000 }, { "epoch": 1.74, "learning_rate": 3.755749296354775e-05, "loss": 2.3461, "step": 14500 }, { "epoch": 1.8, "learning_rate": 3.712844099677354e-05, "loss": 2.3472, "step": 15000 }, { "epoch": 1.86, "learning_rate": 3.669938902999932e-05, "loss": 2.3362, "step": 15500 }, { "epoch": 1.92, "learning_rate": 3.62703370632251e-05, "loss": 2.345, "step": 16000 }, { "epoch": 1.98, "learning_rate": 3.584128509645088e-05, "loss": 2.336, "step": 16500 }, { "epoch": 2.04, "learning_rate": 3.541223312967667e-05, "loss": 2.3365, "step": 17000 }, { "epoch": 2.1, "learning_rate": 3.498318116290245e-05, "loss": 2.3245, "step": 17500 }, { "epoch": 2.16, "learning_rate": 3.455412919612824e-05, "loss": 2.3086, "step": 18000 }, { "epoch": 2.22, "learning_rate": 3.412507722935402e-05, "loss": 2.3251, "step": 18500 }, { "epoch": 2.28, "learning_rate": 3.3696025262579806e-05, "loss": 2.32, "step": 19000 }, { "epoch": 2.34, "learning_rate": 3.326697329580559e-05, "loss": 2.3084, "step": 19500 }, { "epoch": 2.4, "learning_rate": 3.2837921329031375e-05, "loss": 2.3164, "step": 20000 }, { "epoch": 2.46, "learning_rate": 3.240886936225716e-05, "loss": 2.3054, "step": 20500 }, { "epoch": 2.52, "learning_rate": 3.1979817395482944e-05, "loss": 2.3172, "step": 21000 }, { "epoch": 2.58, "learning_rate": 3.1550765428708725e-05, "loss": 2.31, "step": 21500 }, { "epoch": 2.64, "learning_rate": 3.112171346193451e-05, "loss": 2.3141, "step": 22000 }, { "epoch": 2.7, "learning_rate": 3.0692661495160294e-05, "loss": 2.3226, "step": 22500 }, { "epoch": 2.76, "learning_rate": 3.026360952838608e-05, "loss": 2.3194, "step": 23000 }, { "epoch": 2.82, "learning_rate": 2.9834557561611863e-05, "loss": 2.3203, "step": 23500 }, { "epoch": 2.88, "learning_rate": 2.9405505594837644e-05, "loss": 2.3065, "step": 24000 }, { "epoch": 2.94, "learning_rate": 2.8976453628063432e-05, "loss": 2.3112, "step": 24500 }, { "epoch": 3.0, "learning_rate": 2.854740166128922e-05, "loss": 2.2985, "step": 25000 }, { "epoch": 3.06, "learning_rate": 2.8118349694515e-05, "loss": 2.3152, "step": 25500 }, { "epoch": 3.12, "learning_rate": 2.768929772774079e-05, "loss": 2.2912, "step": 26000 }, { "epoch": 3.18, "learning_rate": 2.726024576096657e-05, "loss": 2.2759, "step": 26500 }, { "epoch": 3.24, "learning_rate": 2.6831193794192354e-05, "loss": 2.2952, "step": 27000 }, { "epoch": 3.3, "learning_rate": 2.6402141827418136e-05, "loss": 2.2971, "step": 27500 }, { "epoch": 3.36, "learning_rate": 2.5973089860643923e-05, "loss": 2.2987, "step": 28000 }, { "epoch": 3.42, "learning_rate": 2.5544037893869704e-05, "loss": 2.2859, "step": 28500 }, { "epoch": 3.48, "learning_rate": 2.5114985927095492e-05, "loss": 2.2779, "step": 29000 }, { "epoch": 3.54, "learning_rate": 2.4685933960321277e-05, "loss": 2.2878, "step": 29500 }, { "epoch": 3.6, "learning_rate": 2.4256881993547058e-05, "loss": 2.2762, "step": 30000 }, { "epoch": 3.66, "learning_rate": 2.3827830026772842e-05, "loss": 2.2728, "step": 30500 }, { "epoch": 3.72, "learning_rate": 2.3398778059998627e-05, "loss": 2.2898, "step": 31000 }, { "epoch": 3.78, "learning_rate": 2.296972609322441e-05, "loss": 2.2836, "step": 31500 }, { "epoch": 3.84, "learning_rate": 2.25406741264502e-05, "loss": 2.2882, "step": 32000 }, { "epoch": 3.9, "learning_rate": 2.211162215967598e-05, "loss": 2.2778, "step": 32500 }, { "epoch": 3.96, "learning_rate": 2.1682570192901765e-05, "loss": 2.2793, "step": 33000 }, { "epoch": 4.02, "learning_rate": 2.125351822612755e-05, "loss": 2.2699, "step": 33500 }, { "epoch": 4.08, "learning_rate": 2.0824466259353334e-05, "loss": 2.2778, "step": 34000 }, { "epoch": 4.14, "learning_rate": 2.0395414292579118e-05, "loss": 2.2668, "step": 34500 }, { "epoch": 4.2, "learning_rate": 1.9966362325804903e-05, "loss": 2.257, "step": 35000 }, { "epoch": 4.26, "learning_rate": 1.9537310359030687e-05, "loss": 2.2496, "step": 35500 }, { "epoch": 4.32, "learning_rate": 1.9108258392256472e-05, "loss": 2.2742, "step": 36000 }, { "epoch": 4.38, "learning_rate": 1.8679206425482253e-05, "loss": 2.2643, "step": 36500 }, { "epoch": 4.44, "learning_rate": 1.825015445870804e-05, "loss": 2.2517, "step": 37000 }, { "epoch": 4.51, "learning_rate": 1.7821102491933825e-05, "loss": 2.2677, "step": 37500 }, { "epoch": 4.57, "learning_rate": 1.739205052515961e-05, "loss": 2.2654, "step": 38000 }, { "epoch": 4.63, "learning_rate": 1.6962998558385394e-05, "loss": 2.2645, "step": 38500 }, { "epoch": 4.69, "learning_rate": 1.6533946591611175e-05, "loss": 2.2456, "step": 39000 }, { "epoch": 4.75, "learning_rate": 1.610489462483696e-05, "loss": 2.2686, "step": 39500 }, { "epoch": 4.81, "learning_rate": 1.5675842658062744e-05, "loss": 2.2478, "step": 40000 }, { "epoch": 4.87, "learning_rate": 1.5246790691288529e-05, "loss": 2.2558, "step": 40500 }, { "epoch": 4.93, "learning_rate": 1.4817738724514313e-05, "loss": 2.2541, "step": 41000 }, { "epoch": 4.99, "learning_rate": 1.4388686757740098e-05, "loss": 2.236, "step": 41500 }, { "epoch": 5.05, "learning_rate": 1.3959634790965884e-05, "loss": 2.2543, "step": 42000 }, { "epoch": 5.11, "learning_rate": 1.3530582824191668e-05, "loss": 2.2479, "step": 42500 }, { "epoch": 5.17, "learning_rate": 1.3101530857417451e-05, "loss": 2.2525, "step": 43000 }, { "epoch": 5.23, "learning_rate": 1.2672478890643236e-05, "loss": 2.2506, "step": 43500 }, { "epoch": 5.29, "learning_rate": 1.224342692386902e-05, "loss": 2.2282, "step": 44000 }, { "epoch": 5.35, "learning_rate": 1.1814374957094803e-05, "loss": 2.2406, "step": 44500 }, { "epoch": 5.41, "learning_rate": 1.1385322990320587e-05, "loss": 2.2357, "step": 45000 }, { "epoch": 5.47, "learning_rate": 1.0956271023546374e-05, "loss": 2.2457, "step": 45500 }, { "epoch": 5.53, "learning_rate": 1.0527219056772156e-05, "loss": 2.2262, "step": 46000 }, { "epoch": 5.59, "learning_rate": 1.0098167089997941e-05, "loss": 2.2423, "step": 46500 }, { "epoch": 5.65, "learning_rate": 9.669115123223725e-06, "loss": 2.2435, "step": 47000 }, { "epoch": 5.71, "learning_rate": 9.24006315644951e-06, "loss": 2.2277, "step": 47500 }, { "epoch": 5.77, "learning_rate": 8.811011189675294e-06, "loss": 2.2445, "step": 48000 }, { "epoch": 5.83, "learning_rate": 8.381959222901079e-06, "loss": 2.2395, "step": 48500 }, { "epoch": 5.89, "learning_rate": 7.952907256126863e-06, "loss": 2.2506, "step": 49000 }, { "epoch": 5.95, "learning_rate": 7.523855289352647e-06, "loss": 2.234, "step": 49500 }, { "epoch": 6.01, "learning_rate": 7.0948033225784306e-06, "loss": 2.2488, "step": 50000 }, { "epoch": 6.07, "learning_rate": 6.665751355804216e-06, "loss": 2.2242, "step": 50500 }, { "epoch": 6.13, "learning_rate": 6.2366993890299995e-06, "loss": 2.2297, "step": 51000 }, { "epoch": 6.19, "learning_rate": 5.807647422255784e-06, "loss": 2.2259, "step": 51500 }, { "epoch": 6.25, "learning_rate": 5.378595455481568e-06, "loss": 2.2263, "step": 52000 }, { "epoch": 6.31, "learning_rate": 4.949543488707353e-06, "loss": 2.2214, "step": 52500 }, { "epoch": 6.37, "learning_rate": 4.520491521933137e-06, "loss": 2.2287, "step": 53000 }, { "epoch": 6.43, "learning_rate": 4.091439555158921e-06, "loss": 2.2384, "step": 53500 }, { "epoch": 6.49, "learning_rate": 3.662387588384705e-06, "loss": 2.2319, "step": 54000 }, { "epoch": 6.55, "learning_rate": 3.2333356216104892e-06, "loss": 2.2328, "step": 54500 }, { "epoch": 6.61, "learning_rate": 2.804283654836274e-06, "loss": 2.2359, "step": 55000 }, { "epoch": 6.67, "learning_rate": 2.375231688062058e-06, "loss": 2.2372, "step": 55500 }, { "epoch": 6.73, "learning_rate": 1.9461797212878423e-06, "loss": 2.2402, "step": 56000 }, { "epoch": 6.79, "learning_rate": 1.5171277545136267e-06, "loss": 2.2243, "step": 56500 }, { "epoch": 6.85, "learning_rate": 1.088075787739411e-06, "loss": 2.2267, "step": 57000 }, { "epoch": 6.91, "learning_rate": 6.590238209651953e-07, "loss": 2.2254, "step": 57500 }, { "epoch": 6.97, "learning_rate": 2.2997185419097963e-07, "loss": 2.2278, "step": 58000 }, { "epoch": 7.0, "step": 58268, "total_flos": 3.6314933518540524e+18, "train_runtime": 64951.4124, "train_samples_per_second": 0.897 } ], "max_steps": 58268, "num_train_epochs": 7, "total_flos": 3.6314933518540524e+18, "trial_name": null, "trial_params": null }