| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "global_step": 33774, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 2.9555871380351753e-05, | |
| "loss": 0.5739, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 2.91117427607035e-05, | |
| "loss": 0.4701, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 2.8667614141055253e-05, | |
| "loss": 0.4401, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 2.8223485521406998e-05, | |
| "loss": 0.4007, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 2.777935690175875e-05, | |
| "loss": 0.3886, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 2.73352282821105e-05, | |
| "loss": 0.3668, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 2.689109966246225e-05, | |
| "loss": 0.3622, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 2.6446971042814e-05, | |
| "loss": 0.3513, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 2.600284242316575e-05, | |
| "loss": 0.3233, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.55587138035175e-05, | |
| "loss": 0.3341, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.5114585183869247e-05, | |
| "loss": 0.325, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.4670456564221e-05, | |
| "loss": 0.3164, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.4226327944572748e-05, | |
| "loss": 0.2953, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 2.37821993249245e-05, | |
| "loss": 0.3033, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 2.3338070705276248e-05, | |
| "loss": 0.2875, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 2.2893942085628e-05, | |
| "loss": 0.2848, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 2.244981346597975e-05, | |
| "loss": 0.27, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 2.20056848463315e-05, | |
| "loss": 0.2822, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 2.1561556226683245e-05, | |
| "loss": 0.2595, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 2.1117427607034997e-05, | |
| "loss": 0.2592, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.0673298987386746e-05, | |
| "loss": 0.2493, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 2.0229170367738498e-05, | |
| "loss": 0.2574, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 1.978504174809025e-05, | |
| "loss": 0.2059, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 1.9340913128441998e-05, | |
| "loss": 0.1537, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 1.889678450879375e-05, | |
| "loss": 0.1588, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 1.8452655889145495e-05, | |
| "loss": 0.1487, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 1.8008527269497247e-05, | |
| "loss": 0.1608, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 1.7564398649848995e-05, | |
| "loss": 0.1619, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 1.7120270030200747e-05, | |
| "loss": 0.155, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 1.6676141410552495e-05, | |
| "loss": 0.1418, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 1.6232012790904247e-05, | |
| "loss": 0.1558, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 1.5787884171255996e-05, | |
| "loss": 0.155, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 1.5343755551607744e-05, | |
| "loss": 0.1522, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 1.4899626931959496e-05, | |
| "loss": 0.1386, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 1.4455498312311245e-05, | |
| "loss": 0.1442, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 1.4011369692662995e-05, | |
| "loss": 0.1382, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 1.3567241073014745e-05, | |
| "loss": 0.1437, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 1.3123112453366497e-05, | |
| "loss": 0.1405, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 1.2678983833718245e-05, | |
| "loss": 0.1357, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 1.2234855214069995e-05, | |
| "loss": 0.125, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 1.1790726594421745e-05, | |
| "loss": 0.1376, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 1.1346597974773496e-05, | |
| "loss": 0.136, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 1.0902469355125244e-05, | |
| "loss": 0.1251, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 1.0458340735476994e-05, | |
| "loss": 0.1258, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.0014212115828744e-05, | |
| "loss": 0.1303, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 9.570083496180493e-06, | |
| "loss": 0.0792, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 9.125954876532243e-06, | |
| "loss": 0.0794, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 8.681826256883993e-06, | |
| "loss": 0.0781, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 8.237697637235745e-06, | |
| "loss": 0.0777, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 7.793569017587494e-06, | |
| "loss": 0.077, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 7.349440397939243e-06, | |
| "loss": 0.0792, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 6.905311778290994e-06, | |
| "loss": 0.0774, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 6.461183158642743e-06, | |
| "loss": 0.0762, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 6.017054538994493e-06, | |
| "loss": 0.0744, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 5.572925919346243e-06, | |
| "loss": 0.0725, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 5.128797299697992e-06, | |
| "loss": 0.076, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 4.684668680049743e-06, | |
| "loss": 0.0682, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 4.240540060401492e-06, | |
| "loss": 0.074, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 3.7964114407532424e-06, | |
| "loss": 0.0738, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 3.3522828211049918e-06, | |
| "loss": 0.0678, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 2.908154201456742e-06, | |
| "loss": 0.0668, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 2.4640255818084917e-06, | |
| "loss": 0.0685, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 2.0198969621602414e-06, | |
| "loss": 0.0668, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 1.5757683425119916e-06, | |
| "loss": 0.068, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 1.1316397228637413e-06, | |
| "loss": 0.0663, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 6.875111032154912e-07, | |
| "loss": 0.0629, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 2.4338248356724103e-07, | |
| "loss": 0.0676, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 33774, | |
| "train_runtime": 1945.6382, | |
| "train_samples_per_second": 17.359 | |
| } | |
| ], | |
| "max_steps": 33774, | |
| "num_train_epochs": 3, | |
| "total_flos": 52036968346583040, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |