|
{ |
|
"best_metric": 0.7915147542953491, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-450", |
|
"epoch": 0.1088929219600726, |
|
"eval_steps": 50, |
|
"global_step": 450, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00024198427102238354, |
|
"eval_loss": 1.3080251216888428, |
|
"eval_runtime": 53.1456, |
|
"eval_samples_per_second": 32.74, |
|
"eval_steps_per_second": 8.185, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0024198427102238356, |
|
"grad_norm": 1.103469967842102, |
|
"learning_rate": 4.2600000000000005e-05, |
|
"loss": 0.9286, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.004839685420447671, |
|
"grad_norm": 1.7151211500167847, |
|
"learning_rate": 8.520000000000001e-05, |
|
"loss": 0.9736, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.007259528130671506, |
|
"grad_norm": 1.8954715728759766, |
|
"learning_rate": 0.0001278, |
|
"loss": 1.108, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.009679370840895343, |
|
"grad_norm": 2.3523519039154053, |
|
"learning_rate": 0.00017040000000000002, |
|
"loss": 1.1477, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.012099213551119177, |
|
"grad_norm": 2.1743710041046143, |
|
"learning_rate": 0.000213, |
|
"loss": 1.0015, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.012099213551119177, |
|
"eval_loss": 1.0154005289077759, |
|
"eval_runtime": 52.97, |
|
"eval_samples_per_second": 32.849, |
|
"eval_steps_per_second": 8.212, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.014519056261343012, |
|
"grad_norm": 1.202251672744751, |
|
"learning_rate": 0.00021274057135267128, |
|
"loss": 0.8938, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.01693889897156685, |
|
"grad_norm": 1.0863494873046875, |
|
"learning_rate": 0.00021196354932097723, |
|
"loss": 0.9626, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.019358741681790685, |
|
"grad_norm": 1.537191390991211, |
|
"learning_rate": 0.0002106727194781503, |
|
"loss": 1.0011, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.021778584392014518, |
|
"grad_norm": 2.1484382152557373, |
|
"learning_rate": 0.00020887437061743096, |
|
"loss": 1.1008, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.024198427102238355, |
|
"grad_norm": 2.325395107269287, |
|
"learning_rate": 0.00020657726411369925, |
|
"loss": 0.9323, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.024198427102238355, |
|
"eval_loss": 1.0085864067077637, |
|
"eval_runtime": 53.2168, |
|
"eval_samples_per_second": 32.696, |
|
"eval_steps_per_second": 8.174, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02661826981246219, |
|
"grad_norm": 0.9922317266464233, |
|
"learning_rate": 0.000203792591238937, |
|
"loss": 0.8588, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.029038112522686024, |
|
"grad_norm": 1.1746575832366943, |
|
"learning_rate": 0.0002005339186394757, |
|
"loss": 0.8886, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03145795523290986, |
|
"grad_norm": 1.4047287702560425, |
|
"learning_rate": 0.00019681712224065936, |
|
"loss": 0.9517, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.0338777979431337, |
|
"grad_norm": 2.049302816390991, |
|
"learning_rate": 0.0001926603099009319, |
|
"loss": 1.0665, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.036297640653357534, |
|
"grad_norm": 1.6594562530517578, |
|
"learning_rate": 0.00018808373319217114, |
|
"loss": 0.8871, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.036297640653357534, |
|
"eval_loss": 0.9642200469970703, |
|
"eval_runtime": 53.2251, |
|
"eval_samples_per_second": 32.691, |
|
"eval_steps_per_second": 8.173, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03871748336358137, |
|
"grad_norm": 0.8076631426811218, |
|
"learning_rate": 0.00018310968873606635, |
|
"loss": 0.7604, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.0411373260738052, |
|
"grad_norm": 1.2448451519012451, |
|
"learning_rate": 0.0001777624095772184, |
|
"loss": 0.8971, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.043557168784029036, |
|
"grad_norm": 1.4778797626495361, |
|
"learning_rate": 0.0001720679471221826, |
|
"loss": 0.9395, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.04597701149425287, |
|
"grad_norm": 2.3277740478515625, |
|
"learning_rate": 0.00016605404421963453, |
|
"loss": 1.0046, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.04839685420447671, |
|
"grad_norm": 2.7840893268585205, |
|
"learning_rate": 0.00015975, |
|
"loss": 0.9651, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.04839685420447671, |
|
"eval_loss": 0.9314111471176147, |
|
"eval_runtime": 53.3426, |
|
"eval_samples_per_second": 32.619, |
|
"eval_steps_per_second": 8.155, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.050816696914700546, |
|
"grad_norm": 0.8467873930931091, |
|
"learning_rate": 0.00015318652713303674, |
|
"loss": 0.7771, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.05323653962492438, |
|
"grad_norm": 1.810876488685608, |
|
"learning_rate": 0.00014639560219879464, |
|
"loss": 0.8956, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.05565638233514821, |
|
"grad_norm": 1.5156196355819702, |
|
"learning_rate": 0.0001394103099009319, |
|
"loss": 0.953, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.05807622504537205, |
|
"grad_norm": 1.6678131818771362, |
|
"learning_rate": 0.0001322646818813646, |
|
"loss": 0.971, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.060496067755595885, |
|
"grad_norm": 2.130657196044922, |
|
"learning_rate": 0.0001249935309215281, |
|
"loss": 0.8652, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.060496067755595885, |
|
"eval_loss": 0.8826441168785095, |
|
"eval_runtime": 53.194, |
|
"eval_samples_per_second": 32.71, |
|
"eval_steps_per_second": 8.178, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.06291591046581972, |
|
"grad_norm": 0.86822110414505, |
|
"learning_rate": 0.0001176322813380051, |
|
"loss": 0.7765, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.06533575317604355, |
|
"grad_norm": 1.0380805730819702, |
|
"learning_rate": 0.00011021679639881638, |
|
"loss": 0.8417, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.0677555958862674, |
|
"grad_norm": 1.1815681457519531, |
|
"learning_rate": 0.00010278320360118368, |
|
"loss": 0.8916, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.07017543859649122, |
|
"grad_norm": 2.9040372371673584, |
|
"learning_rate": 9.536771866199493e-05, |
|
"loss": 0.8656, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.07259528130671507, |
|
"grad_norm": 2.076068878173828, |
|
"learning_rate": 8.800646907847192e-05, |
|
"loss": 0.8418, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07259528130671507, |
|
"eval_loss": 0.8545441031455994, |
|
"eval_runtime": 53.1112, |
|
"eval_samples_per_second": 32.761, |
|
"eval_steps_per_second": 8.19, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.0750151240169389, |
|
"grad_norm": 0.856940507888794, |
|
"learning_rate": 8.07353181186354e-05, |
|
"loss": 0.7956, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.07743496672716274, |
|
"grad_norm": 0.9584810137748718, |
|
"learning_rate": 7.35896900990681e-05, |
|
"loss": 0.7721, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.07985480943738657, |
|
"grad_norm": 1.5156158208847046, |
|
"learning_rate": 6.660439780120536e-05, |
|
"loss": 0.9232, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.0822746521476104, |
|
"grad_norm": 2.322288990020752, |
|
"learning_rate": 5.981347286696324e-05, |
|
"loss": 0.8629, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.08469449485783424, |
|
"grad_norm": 1.789493441581726, |
|
"learning_rate": 5.325000000000002e-05, |
|
"loss": 0.8331, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.08469449485783424, |
|
"eval_loss": 0.8212170600891113, |
|
"eval_runtime": 53.0695, |
|
"eval_samples_per_second": 32.787, |
|
"eval_steps_per_second": 8.197, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.08711433756805807, |
|
"grad_norm": 0.8615260720252991, |
|
"learning_rate": 4.6945955780365475e-05, |
|
"loss": 0.7184, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.08953418027828192, |
|
"grad_norm": 1.1007622480392456, |
|
"learning_rate": 4.0932052877817393e-05, |
|
"loss": 0.8244, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.09195402298850575, |
|
"grad_norm": 1.265352725982666, |
|
"learning_rate": 3.523759042278163e-05, |
|
"loss": 0.8621, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.09437386569872959, |
|
"grad_norm": 1.663914680480957, |
|
"learning_rate": 2.989031126393367e-05, |
|
"loss": 0.8621, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.09679370840895342, |
|
"grad_norm": 1.8117319345474243, |
|
"learning_rate": 2.4916266807828855e-05, |
|
"loss": 0.7855, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.09679370840895342, |
|
"eval_loss": 0.8026256561279297, |
|
"eval_runtime": 53.2521, |
|
"eval_samples_per_second": 32.675, |
|
"eval_steps_per_second": 8.169, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.09921355111917725, |
|
"grad_norm": 0.8329099416732788, |
|
"learning_rate": 2.033969009906811e-05, |
|
"loss": 0.708, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.10163339382940109, |
|
"grad_norm": 1.3031426668167114, |
|
"learning_rate": 1.6182877759340637e-05, |
|
"loss": 0.7208, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.10405323653962492, |
|
"grad_norm": 1.197944164276123, |
|
"learning_rate": 1.2466081360524275e-05, |
|
"loss": 0.7973, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.10647307924984876, |
|
"grad_norm": 2.2623517513275146, |
|
"learning_rate": 9.207408761062996e-06, |
|
"loss": 0.8831, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.1088929219600726, |
|
"grad_norm": 1.5466116666793823, |
|
"learning_rate": 6.422735886300764e-06, |
|
"loss": 0.669, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.1088929219600726, |
|
"eval_loss": 0.7915147542953491, |
|
"eval_runtime": 53.2722, |
|
"eval_samples_per_second": 32.662, |
|
"eval_steps_per_second": 8.166, |
|
"step": 450 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.1270519349248e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|