|
{ |
|
"best_metric": 2.5573015213012695, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-400", |
|
"epoch": 0.09102286949596086, |
|
"eval_steps": 50, |
|
"global_step": 400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00022755717373990216, |
|
"eval_loss": 2.9036054611206055, |
|
"eval_runtime": 184.2385, |
|
"eval_samples_per_second": 10.047, |
|
"eval_steps_per_second": 2.513, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0022755717373990213, |
|
"grad_norm": 0.3486361503601074, |
|
"learning_rate": 4.16e-05, |
|
"loss": 2.918, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.004551143474798043, |
|
"grad_norm": 0.4807929992675781, |
|
"learning_rate": 8.32e-05, |
|
"loss": 2.7357, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.006826715212197064, |
|
"grad_norm": 0.5630984306335449, |
|
"learning_rate": 0.0001248, |
|
"loss": 2.8322, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.009102286949596085, |
|
"grad_norm": 0.544504702091217, |
|
"learning_rate": 0.0001664, |
|
"loss": 2.6563, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.011377858686995108, |
|
"grad_norm": 1.6601486206054688, |
|
"learning_rate": 0.000208, |
|
"loss": 2.8123, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.011377858686995108, |
|
"eval_loss": 2.7227728366851807, |
|
"eval_runtime": 184.1917, |
|
"eval_samples_per_second": 10.049, |
|
"eval_steps_per_second": 2.514, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.013653430424394129, |
|
"grad_norm": 0.5499740242958069, |
|
"learning_rate": 0.0002077466612270217, |
|
"loss": 2.6736, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.01592900216179315, |
|
"grad_norm": 0.5605274438858032, |
|
"learning_rate": 0.0002069878791491233, |
|
"loss": 2.7277, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.01820457389919217, |
|
"grad_norm": 0.5069101452827454, |
|
"learning_rate": 0.00020572735047631578, |
|
"loss": 2.6469, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.020480145636591195, |
|
"grad_norm": 0.5251340866088867, |
|
"learning_rate": 0.00020397121637758515, |
|
"loss": 2.5055, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.022755717373990215, |
|
"grad_norm": 2.59476375579834, |
|
"learning_rate": 0.00020172803256173445, |
|
"loss": 2.7778, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.022755717373990215, |
|
"eval_loss": 2.6594133377075195, |
|
"eval_runtime": 184.2257, |
|
"eval_samples_per_second": 10.047, |
|
"eval_steps_per_second": 2.513, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.025031289111389236, |
|
"grad_norm": 0.5118083953857422, |
|
"learning_rate": 0.00019900872759483047, |
|
"loss": 2.7046, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.027306860848788257, |
|
"grad_norm": 0.6953938603401184, |
|
"learning_rate": 0.0001958265496573284, |
|
"loss": 2.7211, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.029582432586187278, |
|
"grad_norm": 0.4843135178089142, |
|
"learning_rate": 0.00019219700200026827, |
|
"loss": 2.6335, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.0318580043235863, |
|
"grad_norm": 0.48902738094329834, |
|
"learning_rate": 0.0001881377674149945, |
|
"loss": 2.5555, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.03413357606098532, |
|
"grad_norm": 0.9428099989891052, |
|
"learning_rate": 0.00018366862208437368, |
|
"loss": 2.6027, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03413357606098532, |
|
"eval_loss": 2.6243715286254883, |
|
"eval_runtime": 184.6541, |
|
"eval_samples_per_second": 10.024, |
|
"eval_steps_per_second": 2.507, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03640914779838434, |
|
"grad_norm": 0.5289075970649719, |
|
"learning_rate": 0.00017881133923521971, |
|
"loss": 2.6422, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.03868471953578337, |
|
"grad_norm": 0.5151132345199585, |
|
"learning_rate": 0.00017358958306132124, |
|
"loss": 2.61, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.04096029127318239, |
|
"grad_norm": 0.5393058061599731, |
|
"learning_rate": 0.00016802879343386844, |
|
"loss": 2.5273, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.04323586301058141, |
|
"grad_norm": 0.599901556968689, |
|
"learning_rate": 0.00016215606196095766, |
|
"loss": 2.4814, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.04551143474798043, |
|
"grad_norm": 1.3163615465164185, |
|
"learning_rate": 0.000156, |
|
"loss": 2.6113, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.04551143474798043, |
|
"eval_loss": 2.606013059616089, |
|
"eval_runtime": 184.8057, |
|
"eval_samples_per_second": 10.016, |
|
"eval_steps_per_second": 2.505, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.04778700648537945, |
|
"grad_norm": 0.5513772964477539, |
|
"learning_rate": 0.00014959059926606403, |
|
"loss": 2.6334, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.05006257822277847, |
|
"grad_norm": 0.4891476035118103, |
|
"learning_rate": 0.00014295908571525487, |
|
"loss": 2.5474, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.052338149960177494, |
|
"grad_norm": 0.5638535022735596, |
|
"learning_rate": 0.00013613776741499452, |
|
"loss": 2.6109, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.054613721697576514, |
|
"grad_norm": 0.5022374987602234, |
|
"learning_rate": 0.00012915987714236542, |
|
"loss": 2.5818, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.056889293434975535, |
|
"grad_norm": 1.3279516696929932, |
|
"learning_rate": 0.00012205941047736077, |
|
"loss": 2.4663, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.056889293434975535, |
|
"eval_loss": 2.584219217300415, |
|
"eval_runtime": 184.6096, |
|
"eval_samples_per_second": 10.027, |
|
"eval_steps_per_second": 2.508, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.059164865172374556, |
|
"grad_norm": 0.539516270160675, |
|
"learning_rate": 0.00011487096017983597, |
|
"loss": 2.6549, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.061440436909773584, |
|
"grad_norm": 0.4973564147949219, |
|
"learning_rate": 0.00010762954765706012, |
|
"loss": 2.5091, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.0637160086471726, |
|
"grad_norm": 0.5017938613891602, |
|
"learning_rate": 0.00010037045234293992, |
|
"loss": 2.4611, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.06599158038457162, |
|
"grad_norm": 0.519973635673523, |
|
"learning_rate": 9.312903982016405e-05, |
|
"loss": 2.4266, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.06826715212197064, |
|
"grad_norm": 2.7459232807159424, |
|
"learning_rate": 8.594058952263925e-05, |
|
"loss": 2.6504, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.06826715212197064, |
|
"eval_loss": 2.5742878913879395, |
|
"eval_runtime": 184.451, |
|
"eval_samples_per_second": 10.035, |
|
"eval_steps_per_second": 2.51, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07054272385936966, |
|
"grad_norm": 0.5434854030609131, |
|
"learning_rate": 7.884012285763457e-05, |
|
"loss": 2.6699, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.07281829559676868, |
|
"grad_norm": 0.48433616757392883, |
|
"learning_rate": 7.186223258500548e-05, |
|
"loss": 2.6301, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.07509386733416772, |
|
"grad_norm": 0.5066972374916077, |
|
"learning_rate": 6.504091428474514e-05, |
|
"loss": 2.4588, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.07736943907156674, |
|
"grad_norm": 0.5236348509788513, |
|
"learning_rate": 5.840940073393593e-05, |
|
"loss": 2.4142, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.07964501080896576, |
|
"grad_norm": 2.0875391960144043, |
|
"learning_rate": 5.200000000000002e-05, |
|
"loss": 2.4484, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.07964501080896576, |
|
"eval_loss": 2.5629172325134277, |
|
"eval_runtime": 184.6435, |
|
"eval_samples_per_second": 10.025, |
|
"eval_steps_per_second": 2.508, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.08192058254636478, |
|
"grad_norm": 0.5457383394241333, |
|
"learning_rate": 4.5843938039042344e-05, |
|
"loss": 2.6142, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.0841961542837638, |
|
"grad_norm": 0.5010927319526672, |
|
"learning_rate": 3.997120656613154e-05, |
|
"loss": 2.5545, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.08647172602116282, |
|
"grad_norm": 0.5765501856803894, |
|
"learning_rate": 3.441041693867878e-05, |
|
"loss": 2.4515, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.08874729775856184, |
|
"grad_norm": 0.5465764999389648, |
|
"learning_rate": 2.9188660764780296e-05, |
|
"loss": 2.4339, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.09102286949596086, |
|
"grad_norm": 0.9333412647247314, |
|
"learning_rate": 2.4331377915626298e-05, |
|
"loss": 2.4501, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.09102286949596086, |
|
"eval_loss": 2.5573015213012695, |
|
"eval_runtime": 184.621, |
|
"eval_samples_per_second": 10.026, |
|
"eval_steps_per_second": 2.508, |
|
"step": 400 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.3159403401877914e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|