| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "global_step": 102957, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.9514360364035474e-05, | |
| "loss": 1.3581, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9028720728070946e-05, | |
| "loss": 0.6756, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.854308109210642e-05, | |
| "loss": 0.5975, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.805744145614189e-05, | |
| "loss": 0.5572, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.757180182017736e-05, | |
| "loss": 0.5289, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.7086162184212826e-05, | |
| "loss": 0.5127, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.66005225482483e-05, | |
| "loss": 0.4999, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.611488291228377e-05, | |
| "loss": 0.4843, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.562924327631924e-05, | |
| "loss": 0.4701, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.5143603640354713e-05, | |
| "loss": 0.4619, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.4657964004390185e-05, | |
| "loss": 0.4581, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.417232436842566e-05, | |
| "loss": 0.4538, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.368668473246112e-05, | |
| "loss": 0.4452, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.3201045096496594e-05, | |
| "loss": 0.4426, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.271540546053207e-05, | |
| "loss": 0.4381, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.2229765824567544e-05, | |
| "loss": 0.4302, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.174412618860301e-05, | |
| "loss": 0.4299, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.125848655263848e-05, | |
| "loss": 0.4218, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.077284691667395e-05, | |
| "loss": 0.4219, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.0287207280709424e-05, | |
| "loss": 0.4178, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 3.9801567644744896e-05, | |
| "loss": 0.4169, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 3.931592800878037e-05, | |
| "loss": 0.4161, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 3.883028837281584e-05, | |
| "loss": 0.409, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 3.8344648736851305e-05, | |
| "loss": 0.4057, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 3.7859009100886777e-05, | |
| "loss": 0.4045, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 3.7373369464922255e-05, | |
| "loss": 0.4049, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 3.688772982895772e-05, | |
| "loss": 0.399, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 3.640209019299319e-05, | |
| "loss": 0.3971, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 3.5916450557028664e-05, | |
| "loss": 0.3951, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.5430810921064135e-05, | |
| "loss": 0.3963, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.49451712850996e-05, | |
| "loss": 0.3933, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.445953164913508e-05, | |
| "loss": 0.3924, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 3.397389201317055e-05, | |
| "loss": 0.3889, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 3.348825237720602e-05, | |
| "loss": 0.3868, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 3.300261274124149e-05, | |
| "loss": 0.3844, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 3.251697310527696e-05, | |
| "loss": 0.3802, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 3.203133346931244e-05, | |
| "loss": 0.3825, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 3.15456938333479e-05, | |
| "loss": 0.3802, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 3.1060054197383375e-05, | |
| "loss": 0.378, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 3.0574414561418846e-05, | |
| "loss": 0.3768, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 3.0088774925454315e-05, | |
| "loss": 0.3749, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 2.9603135289489787e-05, | |
| "loss": 0.3758, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 2.9117495653525262e-05, | |
| "loss": 0.3733, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 2.8631856017560734e-05, | |
| "loss": 0.3694, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 2.8146216381596202e-05, | |
| "loss": 0.3689, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 2.7660576745631674e-05, | |
| "loss": 0.3693, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 2.7174937109667142e-05, | |
| "loss": 0.3703, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 2.6689297473702614e-05, | |
| "loss": 0.366, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 2.620365783773809e-05, | |
| "loss": 0.3664, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 2.5718018201773557e-05, | |
| "loss": 0.365, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 2.523237856580903e-05, | |
| "loss": 0.3636, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 2.4746738929844498e-05, | |
| "loss": 0.3611, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 2.4261099293879973e-05, | |
| "loss": 0.3626, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 2.377545965791544e-05, | |
| "loss": 0.3588, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 2.3289820021950913e-05, | |
| "loss": 0.3618, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 2.2804180385986385e-05, | |
| "loss": 0.3589, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 2.2318540750021853e-05, | |
| "loss": 0.356, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 2.1832901114057325e-05, | |
| "loss": 0.3545, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 2.1347261478092797e-05, | |
| "loss": 0.3561, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 2.086162184212827e-05, | |
| "loss": 0.3534, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 2.0375982206163737e-05, | |
| "loss": 0.3514, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 1.9890342570199212e-05, | |
| "loss": 0.3526, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 1.940470293423468e-05, | |
| "loss": 0.3512, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 1.8919063298270152e-05, | |
| "loss": 0.3509, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 1.8433423662305624e-05, | |
| "loss": 0.3484, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 1.7947784026341092e-05, | |
| "loss": 0.3478, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 1.7462144390376567e-05, | |
| "loss": 0.3449, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 1.6976504754412036e-05, | |
| "loss": 0.3483, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 1.6490865118447508e-05, | |
| "loss": 0.3455, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 1.600522548248298e-05, | |
| "loss": 0.3441, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 1.551958584651845e-05, | |
| "loss": 0.3446, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 1.503394621055392e-05, | |
| "loss": 0.343, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 1.4548306574589393e-05, | |
| "loss": 0.3405, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 1.4062666938624863e-05, | |
| "loss": 0.3401, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 1.3577027302660333e-05, | |
| "loss": 0.3385, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 1.3091387666695807e-05, | |
| "loss": 0.3378, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 1.2605748030731277e-05, | |
| "loss": 0.337, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 1.2120108394766749e-05, | |
| "loss": 0.3361, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 1.1634468758802219e-05, | |
| "loss": 0.338, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 1.1148829122837689e-05, | |
| "loss": 0.3367, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 1.066318948687316e-05, | |
| "loss": 0.3351, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 1.0177549850908632e-05, | |
| "loss": 0.3363, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 9.691910214944102e-06, | |
| "loss": 0.3327, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 9.206270578979574e-06, | |
| "loss": 0.3318, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 8.720630943015046e-06, | |
| "loss": 0.3307, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 8.234991307050518e-06, | |
| "loss": 0.3337, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 7.749351671085988e-06, | |
| "loss": 0.3299, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 7.263712035121459e-06, | |
| "loss": 0.3291, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 6.7780723991569305e-06, | |
| "loss": 0.3328, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 6.292432763192401e-06, | |
| "loss": 0.3279, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 5.8067931272278715e-06, | |
| "loss": 0.3331, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 5.321153491263343e-06, | |
| "loss": 0.3293, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 4.835513855298814e-06, | |
| "loss": 0.3274, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 4.349874219334285e-06, | |
| "loss": 0.3267, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 3.864234583369757e-06, | |
| "loss": 0.3261, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 3.3785949474052275e-06, | |
| "loss": 0.3238, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 2.8929553114406984e-06, | |
| "loss": 0.3237, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 2.4073156754761698e-06, | |
| "loss": 0.3225, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 1.9216760395116407e-06, | |
| "loss": 0.3242, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 1.436036403547112e-06, | |
| "loss": 0.3275, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 9.50396767582583e-07, | |
| "loss": 0.3219, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 4.6475713161805417e-07, | |
| "loss": 0.3232, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 102957, | |
| "total_flos": 7.554546670318524e+17, | |
| "train_loss": 0.3894348529865434, | |
| "train_runtime": 39614.9645, | |
| "train_samples_per_second": 83.164, | |
| "train_steps_per_second": 2.599 | |
| } | |
| ], | |
| "max_steps": 102957, | |
| "num_train_epochs": 3, | |
| "total_flos": 7.554546670318524e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |