{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 47, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02127659574468085, "grad_norm": 173.3797693640036, "learning_rate": 2.0000000000000003e-06, "loss": 2.0421, "step": 1 }, { "epoch": 0.0425531914893617, "grad_norm": 213.4863387447492, "learning_rate": 4.000000000000001e-06, "loss": 2.1926, "step": 2 }, { "epoch": 0.06382978723404255, "grad_norm": 139.12102571177553, "learning_rate": 6e-06, "loss": 1.6796, "step": 3 }, { "epoch": 0.0851063829787234, "grad_norm": 31.770887201034693, "learning_rate": 8.000000000000001e-06, "loss": 0.8817, "step": 4 }, { "epoch": 0.10638297872340426, "grad_norm": 11.974572830777873, "learning_rate": 1e-05, "loss": 0.6561, "step": 5 }, { "epoch": 0.1276595744680851, "grad_norm": 5.587929077588245, "learning_rate": 9.986018985905901e-06, "loss": 0.4698, "step": 6 }, { "epoch": 0.14893617021276595, "grad_norm": 7.58893303595288, "learning_rate": 9.944154131125643e-06, "loss": 0.3525, "step": 7 }, { "epoch": 0.1702127659574468, "grad_norm": 4.03951296758354, "learning_rate": 9.874639560909118e-06, "loss": 0.2916, "step": 8 }, { "epoch": 0.19148936170212766, "grad_norm": 2.36253683449773, "learning_rate": 9.777864028930705e-06, "loss": 0.2413, "step": 9 }, { "epoch": 0.2127659574468085, "grad_norm": 1.724376767595317, "learning_rate": 9.654368743221022e-06, "loss": 0.2407, "step": 10 }, { "epoch": 0.23404255319148937, "grad_norm": 7.656722082252226, "learning_rate": 9.504844339512096e-06, "loss": 0.2365, "step": 11 }, { "epoch": 0.2553191489361702, "grad_norm": 3.966824389729543, "learning_rate": 9.330127018922195e-06, "loss": 0.2593, "step": 12 }, { "epoch": 0.2765957446808511, "grad_norm": 2.4259726258566894, "learning_rate": 9.131193871579975e-06, "loss": 0.2063, "step": 13 }, { "epoch": 0.2978723404255319, "grad_norm": 2.0989965880083665, "learning_rate": 8.90915741234015e-06, "loss": 0.2137, "step": 14 }, { "epoch": 0.3191489361702128, "grad_norm": 0.7473125600483845, "learning_rate": 8.665259359149132e-06, "loss": 0.1752, "step": 15 }, { "epoch": 0.3404255319148936, "grad_norm": 2.362035437648623, "learning_rate": 8.400863688854598e-06, "loss": 0.2079, "step": 16 }, { "epoch": 0.3617021276595745, "grad_norm": 1.1278643764919287, "learning_rate": 8.117449009293668e-06, "loss": 0.2096, "step": 17 }, { "epoch": 0.3829787234042553, "grad_norm": 0.9888933504092371, "learning_rate": 7.81660029031811e-06, "loss": 0.1759, "step": 18 }, { "epoch": 0.40425531914893614, "grad_norm": 1.3935821371981716, "learning_rate": 7.500000000000001e-06, "loss": 0.2036, "step": 19 }, { "epoch": 0.425531914893617, "grad_norm": 0.7210016827193401, "learning_rate": 7.169418695587791e-06, "loss": 0.1757, "step": 20 }, { "epoch": 0.44680851063829785, "grad_norm": 1.3893738922334362, "learning_rate": 6.8267051218319766e-06, "loss": 0.1802, "step": 21 }, { "epoch": 0.46808510638297873, "grad_norm": 0.7826903349267914, "learning_rate": 6.473775872054522e-06, "loss": 0.1823, "step": 22 }, { "epoch": 0.48936170212765956, "grad_norm": 1.1055685574886962, "learning_rate": 6.112604669781572e-06, "loss": 0.1812, "step": 23 }, { "epoch": 0.5106382978723404, "grad_norm": 1.3398319352785983, "learning_rate": 5.745211330880872e-06, "loss": 0.1791, "step": 24 }, { "epoch": 0.5319148936170213, "grad_norm": 0.5530788932061305, "learning_rate": 5.373650467932122e-06, "loss": 0.1582, "step": 25 }, { "epoch": 0.5531914893617021, "grad_norm": 0.979470403637453, "learning_rate": 5e-06, "loss": 0.1687, "step": 26 }, { "epoch": 0.574468085106383, "grad_norm": 0.6524782994513246, "learning_rate": 4.626349532067879e-06, "loss": 0.1614, "step": 27 }, { "epoch": 0.5957446808510638, "grad_norm": 0.613082643775453, "learning_rate": 4.254788669119127e-06, "loss": 0.1406, "step": 28 }, { "epoch": 0.6170212765957447, "grad_norm": 0.6117099956858949, "learning_rate": 3.887395330218429e-06, "loss": 0.1637, "step": 29 }, { "epoch": 0.6382978723404256, "grad_norm": 0.4598315067554906, "learning_rate": 3.526224127945479e-06, "loss": 0.1488, "step": 30 }, { "epoch": 0.6595744680851063, "grad_norm": 0.5041452368752941, "learning_rate": 3.173294878168025e-06, "loss": 0.1385, "step": 31 }, { "epoch": 0.6808510638297872, "grad_norm": 0.6131569474074337, "learning_rate": 2.83058130441221e-06, "loss": 0.151, "step": 32 }, { "epoch": 0.7021276595744681, "grad_norm": 0.5119335095129718, "learning_rate": 2.5000000000000015e-06, "loss": 0.1492, "step": 33 }, { "epoch": 0.723404255319149, "grad_norm": 0.4443782774575828, "learning_rate": 2.1833997096818897e-06, "loss": 0.1174, "step": 34 }, { "epoch": 0.7446808510638298, "grad_norm": 0.45664829579358884, "learning_rate": 1.8825509907063328e-06, "loss": 0.1273, "step": 35 }, { "epoch": 0.7659574468085106, "grad_norm": 0.5213080660032051, "learning_rate": 1.5991363111454023e-06, "loss": 0.1422, "step": 36 }, { "epoch": 0.7872340425531915, "grad_norm": 0.5400795029678803, "learning_rate": 1.3347406408508695e-06, "loss": 0.148, "step": 37 }, { "epoch": 0.8085106382978723, "grad_norm": 0.47711498862269414, "learning_rate": 1.0908425876598512e-06, "loss": 0.1201, "step": 38 }, { "epoch": 0.8297872340425532, "grad_norm": 0.4674822262301143, "learning_rate": 8.688061284200266e-07, "loss": 0.1427, "step": 39 }, { "epoch": 0.851063829787234, "grad_norm": 0.4740692779208992, "learning_rate": 6.698729810778065e-07, "loss": 0.1259, "step": 40 }, { "epoch": 0.8723404255319149, "grad_norm": 0.5372156644144523, "learning_rate": 4.951556604879049e-07, "loss": 0.1342, "step": 41 }, { "epoch": 0.8936170212765957, "grad_norm": 0.5807354531086322, "learning_rate": 3.4563125677897936e-07, "loss": 0.1449, "step": 42 }, { "epoch": 0.9148936170212766, "grad_norm": 0.4917053466958668, "learning_rate": 2.2213597106929608e-07, "loss": 0.1344, "step": 43 }, { "epoch": 0.9361702127659575, "grad_norm": 0.45294061142670444, "learning_rate": 1.253604390908819e-07, "loss": 0.1187, "step": 44 }, { "epoch": 0.9574468085106383, "grad_norm": 0.42691924060512204, "learning_rate": 5.584586887435739e-08, "loss": 0.1114, "step": 45 }, { "epoch": 0.9787234042553191, "grad_norm": 0.4007354051634515, "learning_rate": 1.3981014094099354e-08, "loss": 0.1139, "step": 46 }, { "epoch": 1.0, "grad_norm": 0.4672720715521086, "learning_rate": 0.0, "loss": 0.1298, "step": 47 }, { "epoch": 1.0, "eval_loss": 0.14448508620262146, "eval_runtime": 6.6249, "eval_samples_per_second": 11.925, "eval_steps_per_second": 3.019, "step": 47 }, { "epoch": 1.0, "step": 47, "total_flos": 10394912686080.0, "train_loss": 0.3196908062759866, "train_runtime": 717.6944, "train_samples_per_second": 2.069, "train_steps_per_second": 0.065 } ], "logging_steps": 1, "max_steps": 47, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 10394912686080.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }