|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 47, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02127659574468085, |
|
"grad_norm": 173.3797693640036, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 2.0421, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0425531914893617, |
|
"grad_norm": 213.4863387447492, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 2.1926, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.06382978723404255, |
|
"grad_norm": 139.12102571177553, |
|
"learning_rate": 6e-06, |
|
"loss": 1.6796, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0851063829787234, |
|
"grad_norm": 31.770887201034693, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.8817, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.10638297872340426, |
|
"grad_norm": 11.974572830777873, |
|
"learning_rate": 1e-05, |
|
"loss": 0.6561, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.1276595744680851, |
|
"grad_norm": 5.587929077588245, |
|
"learning_rate": 9.986018985905901e-06, |
|
"loss": 0.4698, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.14893617021276595, |
|
"grad_norm": 7.58893303595288, |
|
"learning_rate": 9.944154131125643e-06, |
|
"loss": 0.3525, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.1702127659574468, |
|
"grad_norm": 4.03951296758354, |
|
"learning_rate": 9.874639560909118e-06, |
|
"loss": 0.2916, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.19148936170212766, |
|
"grad_norm": 2.36253683449773, |
|
"learning_rate": 9.777864028930705e-06, |
|
"loss": 0.2413, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.2127659574468085, |
|
"grad_norm": 1.724376767595317, |
|
"learning_rate": 9.654368743221022e-06, |
|
"loss": 0.2407, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.23404255319148937, |
|
"grad_norm": 7.656722082252226, |
|
"learning_rate": 9.504844339512096e-06, |
|
"loss": 0.2365, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.2553191489361702, |
|
"grad_norm": 3.966824389729543, |
|
"learning_rate": 9.330127018922195e-06, |
|
"loss": 0.2593, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.2765957446808511, |
|
"grad_norm": 2.4259726258566894, |
|
"learning_rate": 9.131193871579975e-06, |
|
"loss": 0.2063, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.2978723404255319, |
|
"grad_norm": 2.0989965880083665, |
|
"learning_rate": 8.90915741234015e-06, |
|
"loss": 0.2137, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.3191489361702128, |
|
"grad_norm": 0.7473125600483845, |
|
"learning_rate": 8.665259359149132e-06, |
|
"loss": 0.1752, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.3404255319148936, |
|
"grad_norm": 2.362035437648623, |
|
"learning_rate": 8.400863688854598e-06, |
|
"loss": 0.2079, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.3617021276595745, |
|
"grad_norm": 1.1278643764919287, |
|
"learning_rate": 8.117449009293668e-06, |
|
"loss": 0.2096, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.3829787234042553, |
|
"grad_norm": 0.9888933504092371, |
|
"learning_rate": 7.81660029031811e-06, |
|
"loss": 0.1759, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.40425531914893614, |
|
"grad_norm": 1.3935821371981716, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.2036, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.425531914893617, |
|
"grad_norm": 0.7210016827193401, |
|
"learning_rate": 7.169418695587791e-06, |
|
"loss": 0.1757, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.44680851063829785, |
|
"grad_norm": 1.3893738922334362, |
|
"learning_rate": 6.8267051218319766e-06, |
|
"loss": 0.1802, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.46808510638297873, |
|
"grad_norm": 0.7826903349267914, |
|
"learning_rate": 6.473775872054522e-06, |
|
"loss": 0.1823, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.48936170212765956, |
|
"grad_norm": 1.1055685574886962, |
|
"learning_rate": 6.112604669781572e-06, |
|
"loss": 0.1812, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.5106382978723404, |
|
"grad_norm": 1.3398319352785983, |
|
"learning_rate": 5.745211330880872e-06, |
|
"loss": 0.1791, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.5319148936170213, |
|
"grad_norm": 0.5530788932061305, |
|
"learning_rate": 5.373650467932122e-06, |
|
"loss": 0.1582, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.5531914893617021, |
|
"grad_norm": 0.979470403637453, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1687, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.574468085106383, |
|
"grad_norm": 0.6524782994513246, |
|
"learning_rate": 4.626349532067879e-06, |
|
"loss": 0.1614, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.5957446808510638, |
|
"grad_norm": 0.613082643775453, |
|
"learning_rate": 4.254788669119127e-06, |
|
"loss": 0.1406, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.6170212765957447, |
|
"grad_norm": 0.6117099956858949, |
|
"learning_rate": 3.887395330218429e-06, |
|
"loss": 0.1637, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.6382978723404256, |
|
"grad_norm": 0.4598315067554906, |
|
"learning_rate": 3.526224127945479e-06, |
|
"loss": 0.1488, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.6595744680851063, |
|
"grad_norm": 0.5041452368752941, |
|
"learning_rate": 3.173294878168025e-06, |
|
"loss": 0.1385, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.6808510638297872, |
|
"grad_norm": 0.6131569474074337, |
|
"learning_rate": 2.83058130441221e-06, |
|
"loss": 0.151, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.7021276595744681, |
|
"grad_norm": 0.5119335095129718, |
|
"learning_rate": 2.5000000000000015e-06, |
|
"loss": 0.1492, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.723404255319149, |
|
"grad_norm": 0.4443782774575828, |
|
"learning_rate": 2.1833997096818897e-06, |
|
"loss": 0.1174, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.7446808510638298, |
|
"grad_norm": 0.45664829579358884, |
|
"learning_rate": 1.8825509907063328e-06, |
|
"loss": 0.1273, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.7659574468085106, |
|
"grad_norm": 0.5213080660032051, |
|
"learning_rate": 1.5991363111454023e-06, |
|
"loss": 0.1422, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.7872340425531915, |
|
"grad_norm": 0.5400795029678803, |
|
"learning_rate": 1.3347406408508695e-06, |
|
"loss": 0.148, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.8085106382978723, |
|
"grad_norm": 0.47711498862269414, |
|
"learning_rate": 1.0908425876598512e-06, |
|
"loss": 0.1201, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.8297872340425532, |
|
"grad_norm": 0.4674822262301143, |
|
"learning_rate": 8.688061284200266e-07, |
|
"loss": 0.1427, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.851063829787234, |
|
"grad_norm": 0.4740692779208992, |
|
"learning_rate": 6.698729810778065e-07, |
|
"loss": 0.1259, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.8723404255319149, |
|
"grad_norm": 0.5372156644144523, |
|
"learning_rate": 4.951556604879049e-07, |
|
"loss": 0.1342, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.8936170212765957, |
|
"grad_norm": 0.5807354531086322, |
|
"learning_rate": 3.4563125677897936e-07, |
|
"loss": 0.1449, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.9148936170212766, |
|
"grad_norm": 0.4917053466958668, |
|
"learning_rate": 2.2213597106929608e-07, |
|
"loss": 0.1344, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.9361702127659575, |
|
"grad_norm": 0.45294061142670444, |
|
"learning_rate": 1.253604390908819e-07, |
|
"loss": 0.1187, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.9574468085106383, |
|
"grad_norm": 0.42691924060512204, |
|
"learning_rate": 5.584586887435739e-08, |
|
"loss": 0.1114, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.9787234042553191, |
|
"grad_norm": 0.4007354051634515, |
|
"learning_rate": 1.3981014094099354e-08, |
|
"loss": 0.1139, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.4672720715521086, |
|
"learning_rate": 0.0, |
|
"loss": 0.1298, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.14448508620262146, |
|
"eval_runtime": 6.6249, |
|
"eval_samples_per_second": 11.925, |
|
"eval_steps_per_second": 3.019, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 47, |
|
"total_flos": 10394912686080.0, |
|
"train_loss": 0.3196908062759866, |
|
"train_runtime": 717.6944, |
|
"train_samples_per_second": 2.069, |
|
"train_steps_per_second": 0.065 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 47, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 10394912686080.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|