| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 5.0, | |
| "eval_steps": 1000.0, | |
| "global_step": 5495, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.09099181073703366, | |
| "grad_norm": 1.2852333784103394, | |
| "learning_rate": 3.080335788799027e-05, | |
| "loss": 0.1592, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.18198362147406733, | |
| "grad_norm": 1.0739330053329468, | |
| "learning_rate": 4.1080243101273595e-05, | |
| "loss": 0.1603, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.272975432211101, | |
| "grad_norm": 0.9484991431236267, | |
| "learning_rate": 4.030441225894548e-05, | |
| "loss": 0.1797, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.36396724294813465, | |
| "grad_norm": 1.8888756036758423, | |
| "learning_rate": 3.9528581416617364e-05, | |
| "loss": 0.1754, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.4549590536851683, | |
| "grad_norm": 1.1603392362594604, | |
| "learning_rate": 3.8752750574289256e-05, | |
| "loss": 0.1818, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.545950864422202, | |
| "grad_norm": 1.3092777729034424, | |
| "learning_rate": 3.797691973196114e-05, | |
| "loss": 0.1805, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.6369426751592356, | |
| "grad_norm": 1.2778502702713013, | |
| "learning_rate": 3.7201088889633025e-05, | |
| "loss": 0.1642, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.7279344858962693, | |
| "grad_norm": 3.04357647895813, | |
| "learning_rate": 3.642525804730491e-05, | |
| "loss": 0.1626, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.818926296633303, | |
| "grad_norm": 0.8114387392997742, | |
| "learning_rate": 3.56494272049768e-05, | |
| "loss": 0.1625, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.9099181073703366, | |
| "grad_norm": 1.6360591650009155, | |
| "learning_rate": 3.4873596362648686e-05, | |
| "loss": 0.1644, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.9099181073703366, | |
| "eval_cer": 0.17117944011014227, | |
| "eval_loss": 0.24234530329704285, | |
| "eval_runtime": 16.3456, | |
| "eval_samples_per_second": 30.589, | |
| "eval_steps_per_second": 0.979, | |
| "eval_wer": 0.36909142371921877, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.0009099181073704, | |
| "grad_norm": 1.0669325590133667, | |
| "learning_rate": 3.409776552032057e-05, | |
| "loss": 0.1556, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.091901728844404, | |
| "grad_norm": 0.9532307386398315, | |
| "learning_rate": 3.332193467799246e-05, | |
| "loss": 0.1368, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.1828935395814377, | |
| "grad_norm": 2.0215086936950684, | |
| "learning_rate": 3.254610383566435e-05, | |
| "loss": 0.1458, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.2738853503184713, | |
| "grad_norm": 1.452370285987854, | |
| "learning_rate": 3.177027299333623e-05, | |
| "loss": 0.1433, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.364877161055505, | |
| "grad_norm": 0.7807812690734863, | |
| "learning_rate": 3.099444215100812e-05, | |
| "loss": 0.1433, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.4558689717925386, | |
| "grad_norm": 0.5693336129188538, | |
| "learning_rate": 3.0218611308680008e-05, | |
| "loss": 0.1372, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.5468607825295724, | |
| "grad_norm": 1.0194613933563232, | |
| "learning_rate": 2.9442780466351896e-05, | |
| "loss": 0.1429, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.6378525932666061, | |
| "grad_norm": 1.2318494319915771, | |
| "learning_rate": 2.866694962402378e-05, | |
| "loss": 0.1403, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.7288444040036397, | |
| "grad_norm": 0.6520742774009705, | |
| "learning_rate": 2.789111878169567e-05, | |
| "loss": 0.1442, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.8198362147406733, | |
| "grad_norm": 0.8180395364761353, | |
| "learning_rate": 2.7115287939367557e-05, | |
| "loss": 0.1373, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.8198362147406733, | |
| "eval_cer": 0.1699278234386082, | |
| "eval_loss": 0.23588787019252777, | |
| "eval_runtime": 16.2786, | |
| "eval_samples_per_second": 30.715, | |
| "eval_steps_per_second": 0.983, | |
| "eval_wer": 0.3586187376167563, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.910828025477707, | |
| "grad_norm": 0.929227352142334, | |
| "learning_rate": 2.6339457097039442e-05, | |
| "loss": 0.1407, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 2.001819836214741, | |
| "grad_norm": 1.137143850326538, | |
| "learning_rate": 2.556362625471133e-05, | |
| "loss": 0.1363, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 2.092811646951774, | |
| "grad_norm": 1.959381341934204, | |
| "learning_rate": 2.4787795412383218e-05, | |
| "loss": 0.1274, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 2.183803457688808, | |
| "grad_norm": 0.8095592856407166, | |
| "learning_rate": 2.4011964570055103e-05, | |
| "loss": 0.1288, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 2.2747952684258417, | |
| "grad_norm": 0.717786431312561, | |
| "learning_rate": 2.323613372772699e-05, | |
| "loss": 0.1314, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.3657870791628755, | |
| "grad_norm": 1.3463408946990967, | |
| "learning_rate": 2.246030288539888e-05, | |
| "loss": 0.1243, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 2.4567788898999092, | |
| "grad_norm": 1.703278660774231, | |
| "learning_rate": 2.1684472043070764e-05, | |
| "loss": 0.1226, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 2.5477707006369426, | |
| "grad_norm": 1.0250530242919922, | |
| "learning_rate": 2.0908641200742652e-05, | |
| "loss": 0.1217, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 2.6387625113739763, | |
| "grad_norm": 2.0595955848693848, | |
| "learning_rate": 2.0132810358414537e-05, | |
| "loss": 0.1162, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 2.72975432211101, | |
| "grad_norm": 0.5484445691108704, | |
| "learning_rate": 1.9356979516086425e-05, | |
| "loss": 0.1219, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.72975432211101, | |
| "eval_cer": 0.16654845842546623, | |
| "eval_loss": 0.2350655198097229, | |
| "eval_runtime": 16.364, | |
| "eval_samples_per_second": 30.555, | |
| "eval_steps_per_second": 0.978, | |
| "eval_wer": 0.34871214265496747, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.8207461328480434, | |
| "grad_norm": 0.8254183530807495, | |
| "learning_rate": 1.858114867375831e-05, | |
| "loss": 0.1302, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 2.911737943585077, | |
| "grad_norm": 1.559329867362976, | |
| "learning_rate": 1.7805317831430197e-05, | |
| "loss": 0.1224, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 3.002729754322111, | |
| "grad_norm": 0.9372493624687195, | |
| "learning_rate": 1.7029486989102082e-05, | |
| "loss": 0.1186, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 3.0937215650591448, | |
| "grad_norm": 0.5558441877365112, | |
| "learning_rate": 1.625365614677397e-05, | |
| "loss": 0.1084, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 3.1847133757961785, | |
| "grad_norm": 0.9594938158988953, | |
| "learning_rate": 1.547782530444586e-05, | |
| "loss": 0.114, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 3.275705186533212, | |
| "grad_norm": 0.835418164730072, | |
| "learning_rate": 1.4701994462117745e-05, | |
| "loss": 0.1099, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 3.3666969972702456, | |
| "grad_norm": 1.4973269701004028, | |
| "learning_rate": 1.3926163619789631e-05, | |
| "loss": 0.1111, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 3.4576888080072794, | |
| "grad_norm": 0.5722721219062805, | |
| "learning_rate": 1.3150332777461518e-05, | |
| "loss": 0.1117, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 3.548680618744313, | |
| "grad_norm": 1.0630409717559814, | |
| "learning_rate": 1.2374501935133406e-05, | |
| "loss": 0.1171, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 3.6396724294813465, | |
| "grad_norm": 4.837808609008789, | |
| "learning_rate": 1.1598671092805292e-05, | |
| "loss": 0.1106, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 3.6396724294813465, | |
| "eval_cer": 0.16671534064833743, | |
| "eval_loss": 0.23994748294353485, | |
| "eval_runtime": 16.3865, | |
| "eval_samples_per_second": 30.513, | |
| "eval_steps_per_second": 0.976, | |
| "eval_wer": 0.3447495046702519, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 3.7306642402183803, | |
| "grad_norm": 1.5471532344818115, | |
| "learning_rate": 1.0822840250477179e-05, | |
| "loss": 0.1061, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 3.821656050955414, | |
| "grad_norm": 0.7752039432525635, | |
| "learning_rate": 1.0047009408149065e-05, | |
| "loss": 0.1032, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 3.912647861692448, | |
| "grad_norm": 1.4473958015441895, | |
| "learning_rate": 9.271178565820951e-06, | |
| "loss": 0.1117, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 4.003639672429482, | |
| "grad_norm": 1.3828502893447876, | |
| "learning_rate": 8.495347723492838e-06, | |
| "loss": 0.1089, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 4.094631483166515, | |
| "grad_norm": 0.6974540948867798, | |
| "learning_rate": 7.719516881164726e-06, | |
| "loss": 0.1072, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 4.185623293903548, | |
| "grad_norm": 0.8870043158531189, | |
| "learning_rate": 6.943686038836612e-06, | |
| "loss": 0.1028, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 4.276615104640582, | |
| "grad_norm": 0.685329020023346, | |
| "learning_rate": 6.1678551965085e-06, | |
| "loss": 0.1063, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 4.367606915377616, | |
| "grad_norm": 0.7879564762115479, | |
| "learning_rate": 5.392024354180386e-06, | |
| "loss": 0.0988, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 4.45859872611465, | |
| "grad_norm": 0.7849826216697693, | |
| "learning_rate": 4.616193511852273e-06, | |
| "loss": 0.1055, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 4.549590536851683, | |
| "grad_norm": 0.9511623382568359, | |
| "learning_rate": 3.840362669524161e-06, | |
| "loss": 0.0986, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 4.549590536851683, | |
| "eval_cer": 0.16387834285952688, | |
| "eval_loss": 0.24175503849983215, | |
| "eval_runtime": 16.3966, | |
| "eval_samples_per_second": 30.494, | |
| "eval_steps_per_second": 0.976, | |
| "eval_wer": 0.3388055476931786, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 4.640582347588717, | |
| "grad_norm": 1.7066700458526611, | |
| "learning_rate": 3.064531827196047e-06, | |
| "loss": 0.1091, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 4.731574158325751, | |
| "grad_norm": 2.1451282501220703, | |
| "learning_rate": 2.288700984867934e-06, | |
| "loss": 0.1034, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 4.822565969062785, | |
| "grad_norm": 1.3042198419570923, | |
| "learning_rate": 1.5128701425398207e-06, | |
| "loss": 0.1025, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 4.9135577797998184, | |
| "grad_norm": 0.7029935717582703, | |
| "learning_rate": 7.370393002117075e-07, | |
| "loss": 0.1028, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "step": 5495, | |
| "total_flos": 4.052184710714386e+19, | |
| "train_loss": 0.1293755126064533, | |
| "train_runtime": 8966.103, | |
| "train_samples_per_second": 19.598, | |
| "train_steps_per_second": 0.613 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 5495, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.052184710714386e+19, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |