|
{ |
|
"best_metric": 1.1159125566482544, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 0.03530969545387671, |
|
"eval_steps": 50, |
|
"global_step": 75, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0004707959393850228, |
|
"grad_norm": 2.019765853881836, |
|
"learning_rate": 5e-06, |
|
"loss": 1.7321, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0004707959393850228, |
|
"eval_loss": 1.5949971675872803, |
|
"eval_runtime": 1054.1316, |
|
"eval_samples_per_second": 6.788, |
|
"eval_steps_per_second": 1.697, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0009415918787700456, |
|
"grad_norm": 1.8714739084243774, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5917, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0014123878181550684, |
|
"grad_norm": 1.897204041481018, |
|
"learning_rate": 1.5e-05, |
|
"loss": 1.6, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0018831837575400913, |
|
"grad_norm": 2.012906074523926, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6586, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.002353979696925114, |
|
"grad_norm": 1.9945567846298218, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.5295, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.002824775636310137, |
|
"grad_norm": 1.5244598388671875, |
|
"learning_rate": 3e-05, |
|
"loss": 1.4992, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0032955715756951597, |
|
"grad_norm": 1.214459776878357, |
|
"learning_rate": 3.5e-05, |
|
"loss": 1.3186, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0037663675150801826, |
|
"grad_norm": 1.1973023414611816, |
|
"learning_rate": 4e-05, |
|
"loss": 1.3787, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0042371634544652054, |
|
"grad_norm": 0.9985453486442566, |
|
"learning_rate": 4.5e-05, |
|
"loss": 1.3009, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.004707959393850228, |
|
"grad_norm": 0.8486191630363464, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1828, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.005178755333235251, |
|
"grad_norm": 0.8273810744285583, |
|
"learning_rate": 5.500000000000001e-05, |
|
"loss": 1.2181, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.005649551272620274, |
|
"grad_norm": 0.7928112149238586, |
|
"learning_rate": 6e-05, |
|
"loss": 1.2206, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.006120347212005296, |
|
"grad_norm": 0.7599514126777649, |
|
"learning_rate": 6.500000000000001e-05, |
|
"loss": 1.2137, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.006591143151390319, |
|
"grad_norm": 0.8310830593109131, |
|
"learning_rate": 7e-05, |
|
"loss": 1.0807, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.007061939090775342, |
|
"grad_norm": 0.7516305446624756, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 1.2852, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.007532735030160365, |
|
"grad_norm": 0.8403298258781433, |
|
"learning_rate": 8e-05, |
|
"loss": 1.2441, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.008003530969545388, |
|
"grad_norm": 0.7195339202880859, |
|
"learning_rate": 8.5e-05, |
|
"loss": 1.2419, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.008474326908930411, |
|
"grad_norm": 0.6701211333274841, |
|
"learning_rate": 9e-05, |
|
"loss": 1.2008, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.008945122848315433, |
|
"grad_norm": 0.7064545154571533, |
|
"learning_rate": 9.5e-05, |
|
"loss": 1.1691, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.009415918787700456, |
|
"grad_norm": 0.7223305702209473, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1513, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.009886714727085478, |
|
"grad_norm": 0.6430530548095703, |
|
"learning_rate": 9.991845519630678e-05, |
|
"loss": 1.1434, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.010357510666470502, |
|
"grad_norm": 0.6965686082839966, |
|
"learning_rate": 9.967408676742751e-05, |
|
"loss": 1.1117, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.010828306605855525, |
|
"grad_norm": 0.6954874992370605, |
|
"learning_rate": 9.926769179238466e-05, |
|
"loss": 1.1805, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.011299102545240547, |
|
"grad_norm": 0.6635076999664307, |
|
"learning_rate": 9.870059584711668e-05, |
|
"loss": 1.1295, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.01176989848462557, |
|
"grad_norm": 0.6618334650993347, |
|
"learning_rate": 9.797464868072488e-05, |
|
"loss": 1.0415, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.012240694424010592, |
|
"grad_norm": 0.6411958932876587, |
|
"learning_rate": 9.709221818197624e-05, |
|
"loss": 1.1411, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.012711490363395616, |
|
"grad_norm": 0.6791735887527466, |
|
"learning_rate": 9.60561826557425e-05, |
|
"loss": 1.2177, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.013182286302780639, |
|
"grad_norm": 0.7023264765739441, |
|
"learning_rate": 9.486992143456792e-05, |
|
"loss": 1.2012, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.013653082242165661, |
|
"grad_norm": 0.6761083602905273, |
|
"learning_rate": 9.353730385598887e-05, |
|
"loss": 1.1297, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.014123878181550684, |
|
"grad_norm": 0.720009982585907, |
|
"learning_rate": 9.206267664155907e-05, |
|
"loss": 1.1497, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.014594674120935706, |
|
"grad_norm": 0.6534513235092163, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 1.2473, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.01506547006032073, |
|
"grad_norm": 0.62485271692276, |
|
"learning_rate": 8.870708053195413e-05, |
|
"loss": 1.1087, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.015536265999705753, |
|
"grad_norm": 0.6664251089096069, |
|
"learning_rate": 8.683705689382024e-05, |
|
"loss": 1.1532, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.016007061939090777, |
|
"grad_norm": 0.6808733344078064, |
|
"learning_rate": 8.484687843276469e-05, |
|
"loss": 1.1473, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.016477857878475798, |
|
"grad_norm": 0.627700924873352, |
|
"learning_rate": 8.274303669726426e-05, |
|
"loss": 1.125, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.016948653817860822, |
|
"grad_norm": 0.6665758490562439, |
|
"learning_rate": 8.053239398177191e-05, |
|
"loss": 1.0809, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.017419449757245842, |
|
"grad_norm": 0.6675161719322205, |
|
"learning_rate": 7.822216094333847e-05, |
|
"loss": 1.1696, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.017890245696630867, |
|
"grad_norm": 0.6735967993736267, |
|
"learning_rate": 7.58198730819481e-05, |
|
"loss": 1.1322, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.01836104163601589, |
|
"grad_norm": 0.6226209402084351, |
|
"learning_rate": 7.333336616128369e-05, |
|
"loss": 1.1079, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.01883183757540091, |
|
"grad_norm": 0.648352324962616, |
|
"learning_rate": 7.077075065009433e-05, |
|
"loss": 1.1011, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.019302633514785936, |
|
"grad_norm": 0.6328292489051819, |
|
"learning_rate": 6.814038526753205e-05, |
|
"loss": 1.1568, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.019773429454170956, |
|
"grad_norm": 0.7246712446212769, |
|
"learning_rate": 6.545084971874738e-05, |
|
"loss": 1.112, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.02024422539355598, |
|
"grad_norm": 0.689822256565094, |
|
"learning_rate": 6.271091670967436e-05, |
|
"loss": 1.1909, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.020715021332941005, |
|
"grad_norm": 0.6674534678459167, |
|
"learning_rate": 5.992952333228728e-05, |
|
"loss": 1.1454, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.021185817272326025, |
|
"grad_norm": 0.630682647228241, |
|
"learning_rate": 5.7115741913664264e-05, |
|
"loss": 1.1645, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.02165661321171105, |
|
"grad_norm": 0.6754137277603149, |
|
"learning_rate": 5.427875042394199e-05, |
|
"loss": 1.1158, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.02212740915109607, |
|
"grad_norm": 0.6505759358406067, |
|
"learning_rate": 5.142780253968481e-05, |
|
"loss": 1.0663, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.022598205090481095, |
|
"grad_norm": 0.6762368083000183, |
|
"learning_rate": 4.85721974603152e-05, |
|
"loss": 1.1291, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.02306900102986612, |
|
"grad_norm": 0.7021005749702454, |
|
"learning_rate": 4.5721249576058027e-05, |
|
"loss": 1.1421, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.02353979696925114, |
|
"grad_norm": 0.6380864381790161, |
|
"learning_rate": 4.288425808633575e-05, |
|
"loss": 1.0817, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02353979696925114, |
|
"eval_loss": 1.1159125566482544, |
|
"eval_runtime": 1067.2054, |
|
"eval_samples_per_second": 6.704, |
|
"eval_steps_per_second": 1.676, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.024010592908636164, |
|
"grad_norm": 0.6711025834083557, |
|
"learning_rate": 4.007047666771274e-05, |
|
"loss": 1.0878, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.024481388848021184, |
|
"grad_norm": 0.6568624973297119, |
|
"learning_rate": 3.728908329032567e-05, |
|
"loss": 1.0447, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.02495218478740621, |
|
"grad_norm": 0.6191370487213135, |
|
"learning_rate": 3.4549150281252636e-05, |
|
"loss": 1.1122, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.025422980726791233, |
|
"grad_norm": 0.6773213744163513, |
|
"learning_rate": 3.1859614732467954e-05, |
|
"loss": 1.1193, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.025893776666176253, |
|
"grad_norm": 0.6792715191841125, |
|
"learning_rate": 2.9229249349905684e-05, |
|
"loss": 1.1247, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.026364572605561278, |
|
"grad_norm": 0.6690425872802734, |
|
"learning_rate": 2.6666633838716314e-05, |
|
"loss": 1.1017, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.026835368544946298, |
|
"grad_norm": 0.6485929489135742, |
|
"learning_rate": 2.418012691805191e-05, |
|
"loss": 1.0747, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.027306164484331322, |
|
"grad_norm": 0.675126314163208, |
|
"learning_rate": 2.1777839056661554e-05, |
|
"loss": 1.07, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.027776960423716347, |
|
"grad_norm": 0.6962899565696716, |
|
"learning_rate": 1.946760601822809e-05, |
|
"loss": 1.0591, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.028247756363101367, |
|
"grad_norm": 0.662079393863678, |
|
"learning_rate": 1.725696330273575e-05, |
|
"loss": 1.1415, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02871855230248639, |
|
"grad_norm": 0.6812634468078613, |
|
"learning_rate": 1.5153121567235335e-05, |
|
"loss": 1.0937, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.029189348241871412, |
|
"grad_norm": 0.7200164198875427, |
|
"learning_rate": 1.3162943106179749e-05, |
|
"loss": 1.2216, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.029660144181256436, |
|
"grad_norm": 0.6734728813171387, |
|
"learning_rate": 1.1292919468045877e-05, |
|
"loss": 1.1649, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.03013094012064146, |
|
"grad_norm": 0.650775671005249, |
|
"learning_rate": 9.549150281252633e-06, |
|
"loss": 1.1018, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.03060173606002648, |
|
"grad_norm": 0.6936342716217041, |
|
"learning_rate": 7.937323358440935e-06, |
|
"loss": 1.16, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.031072531999411505, |
|
"grad_norm": 0.6400872468948364, |
|
"learning_rate": 6.462696144011149e-06, |
|
"loss": 1.083, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.03154332793879653, |
|
"grad_norm": 0.6761430501937866, |
|
"learning_rate": 5.13007856543209e-06, |
|
"loss": 1.0366, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.032014123878181554, |
|
"grad_norm": 0.6780744194984436, |
|
"learning_rate": 3.9438173442575e-06, |
|
"loss": 1.1405, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.03248491981756657, |
|
"grad_norm": 0.6598646640777588, |
|
"learning_rate": 2.9077818180237693e-06, |
|
"loss": 1.084, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.032955715756951595, |
|
"grad_norm": 0.6628397107124329, |
|
"learning_rate": 2.0253513192751373e-06, |
|
"loss": 1.061, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.03342651169633662, |
|
"grad_norm": 0.6676144599914551, |
|
"learning_rate": 1.2994041528833266e-06, |
|
"loss": 1.1337, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.033897307635721644, |
|
"grad_norm": 0.6578212976455688, |
|
"learning_rate": 7.323082076153509e-07, |
|
"loss": 1.1128, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.03436810357510667, |
|
"grad_norm": 0.6542023420333862, |
|
"learning_rate": 3.2591323257248893e-07, |
|
"loss": 1.0441, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.034838899514491685, |
|
"grad_norm": 0.6668130159378052, |
|
"learning_rate": 8.15448036932176e-08, |
|
"loss": 1.0559, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.03530969545387671, |
|
"grad_norm": 0.6653211712837219, |
|
"learning_rate": 0.0, |
|
"loss": 1.1329, |
|
"step": 75 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 75, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.26566605602816e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|