|
{ |
|
"best_metric": 0.30884110927581787, |
|
"best_model_checkpoint": "/scratch/skscla001/speech/results/mms-1b-lozgen-male-model/checkpoint-1800", |
|
"epoch": 16.03053435114504, |
|
"eval_steps": 100, |
|
"global_step": 2100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.7633587786259542, |
|
"grad_norm": 2.278857707977295, |
|
"learning_rate": 0.000285, |
|
"loss": 6.7665, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.7633587786259542, |
|
"eval_loss": 3.1609649658203125, |
|
"eval_runtime": 20.525, |
|
"eval_samples_per_second": 14.811, |
|
"eval_steps_per_second": 3.703, |
|
"eval_wer": 0.9990169574834111, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.5267175572519083, |
|
"grad_norm": 2.781928062438965, |
|
"learning_rate": 0.00029255874673629243, |
|
"loss": 2.6476, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.5267175572519083, |
|
"eval_loss": 2.2410073280334473, |
|
"eval_runtime": 20.3251, |
|
"eval_samples_per_second": 14.957, |
|
"eval_steps_per_second": 3.739, |
|
"eval_wer": 0.8953059719832883, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.2900763358778624, |
|
"grad_norm": 2.653702974319458, |
|
"learning_rate": 0.00028472584856396865, |
|
"loss": 1.5694, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.2900763358778624, |
|
"eval_loss": 0.5584537386894226, |
|
"eval_runtime": 20.2001, |
|
"eval_samples_per_second": 15.049, |
|
"eval_steps_per_second": 3.762, |
|
"eval_wer": 0.7598918653231752, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.053435114503817, |
|
"grad_norm": 1.1257972717285156, |
|
"learning_rate": 0.0002768929503916449, |
|
"loss": 0.6761, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.053435114503817, |
|
"eval_loss": 0.4565717875957489, |
|
"eval_runtime": 20.2697, |
|
"eval_samples_per_second": 14.998, |
|
"eval_steps_per_second": 3.749, |
|
"eval_wer": 0.6318505775374785, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.816793893129771, |
|
"grad_norm": 0.982060968875885, |
|
"learning_rate": 0.0002690600522193211, |
|
"loss": 0.5793, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.816793893129771, |
|
"eval_loss": 0.4013397693634033, |
|
"eval_runtime": 20.4119, |
|
"eval_samples_per_second": 14.893, |
|
"eval_steps_per_second": 3.723, |
|
"eval_wer": 0.4821823543868272, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.580152671755725, |
|
"grad_norm": 0.9821244478225708, |
|
"learning_rate": 0.0002612271540469974, |
|
"loss": 0.5392, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 4.580152671755725, |
|
"eval_loss": 0.3795173168182373, |
|
"eval_runtime": 20.3646, |
|
"eval_samples_per_second": 14.928, |
|
"eval_steps_per_second": 3.732, |
|
"eval_wer": 0.4553944458097813, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 5.34351145038168, |
|
"grad_norm": 1.2693768739700317, |
|
"learning_rate": 0.0002533942558746736, |
|
"loss": 0.4809, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 5.34351145038168, |
|
"eval_loss": 0.37301722168922424, |
|
"eval_runtime": 20.1691, |
|
"eval_samples_per_second": 15.073, |
|
"eval_steps_per_second": 3.768, |
|
"eval_wer": 0.4332759891865323, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 6.106870229007634, |
|
"grad_norm": 1.0504028797149658, |
|
"learning_rate": 0.0002455613577023499, |
|
"loss": 0.4813, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 6.106870229007634, |
|
"eval_loss": 0.35973772406578064, |
|
"eval_runtime": 20.1824, |
|
"eval_samples_per_second": 15.063, |
|
"eval_steps_per_second": 3.766, |
|
"eval_wer": 0.4229540427623495, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 6.870229007633588, |
|
"grad_norm": 1.0751953125, |
|
"learning_rate": 0.0002377284595300261, |
|
"loss": 0.4484, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 6.870229007633588, |
|
"eval_loss": 0.3431786596775055, |
|
"eval_runtime": 20.3009, |
|
"eval_samples_per_second": 14.975, |
|
"eval_steps_per_second": 3.744, |
|
"eval_wer": 0.39247972474809534, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 7.633587786259542, |
|
"grad_norm": 3.291668176651001, |
|
"learning_rate": 0.00022989556135770233, |
|
"loss": 0.4418, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 7.633587786259542, |
|
"eval_loss": 0.3390863239765167, |
|
"eval_runtime": 20.3166, |
|
"eval_samples_per_second": 14.963, |
|
"eval_steps_per_second": 3.741, |
|
"eval_wer": 0.39469157041042024, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 8.396946564885496, |
|
"grad_norm": 1.688865303993225, |
|
"learning_rate": 0.00022206266318537858, |
|
"loss": 0.4322, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 8.396946564885496, |
|
"eval_loss": 0.33392783999443054, |
|
"eval_runtime": 20.3067, |
|
"eval_samples_per_second": 14.97, |
|
"eval_steps_per_second": 3.743, |
|
"eval_wer": 0.3841238633570902, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 9.16030534351145, |
|
"grad_norm": 1.2780994176864624, |
|
"learning_rate": 0.0002142297650130548, |
|
"loss": 0.3963, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 9.16030534351145, |
|
"eval_loss": 0.3294471502304077, |
|
"eval_runtime": 20.282, |
|
"eval_samples_per_second": 14.989, |
|
"eval_steps_per_second": 3.747, |
|
"eval_wer": 0.36692061931678543, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 9.923664122137405, |
|
"grad_norm": 1.1817948818206787, |
|
"learning_rate": 0.00020639686684073108, |
|
"loss": 0.4104, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 9.923664122137405, |
|
"eval_loss": 0.321709543466568, |
|
"eval_runtime": 20.3324, |
|
"eval_samples_per_second": 14.952, |
|
"eval_steps_per_second": 3.738, |
|
"eval_wer": 0.3634799705087245, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 10.68702290076336, |
|
"grad_norm": 2.474210262298584, |
|
"learning_rate": 0.0001985639686684073, |
|
"loss": 0.3777, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 10.68702290076336, |
|
"eval_loss": 0.3176502287387848, |
|
"eval_runtime": 20.3222, |
|
"eval_samples_per_second": 14.959, |
|
"eval_steps_per_second": 3.74, |
|
"eval_wer": 0.3610223642172524, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 11.450381679389313, |
|
"grad_norm": 1.456008791923523, |
|
"learning_rate": 0.00019073107049608353, |
|
"loss": 0.3785, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 11.450381679389313, |
|
"eval_loss": 0.32359373569488525, |
|
"eval_runtime": 20.3296, |
|
"eval_samples_per_second": 14.954, |
|
"eval_steps_per_second": 3.738, |
|
"eval_wer": 0.35389530597198327, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 12.213740458015268, |
|
"grad_norm": 1.4315876960754395, |
|
"learning_rate": 0.00018289817232375978, |
|
"loss": 0.3682, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 12.213740458015268, |
|
"eval_loss": 0.31440281867980957, |
|
"eval_runtime": 20.1701, |
|
"eval_samples_per_second": 15.072, |
|
"eval_steps_per_second": 3.768, |
|
"eval_wer": 0.34676824772671416, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 12.977099236641221, |
|
"grad_norm": 2.2636640071868896, |
|
"learning_rate": 0.000175065274151436, |
|
"loss": 0.3654, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 12.977099236641221, |
|
"eval_loss": 0.31223368644714355, |
|
"eval_runtime": 20.3687, |
|
"eval_samples_per_second": 14.925, |
|
"eval_steps_per_second": 3.731, |
|
"eval_wer": 0.35291226345539445, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 13.740458015267176, |
|
"grad_norm": 0.9211711883544922, |
|
"learning_rate": 0.00016723237597911225, |
|
"loss": 0.3509, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 13.740458015267176, |
|
"eval_loss": 0.30884110927581787, |
|
"eval_runtime": 20.3386, |
|
"eval_samples_per_second": 14.947, |
|
"eval_steps_per_second": 3.737, |
|
"eval_wer": 0.3462767264684198, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 14.50381679389313, |
|
"grad_norm": 2.6637320518493652, |
|
"learning_rate": 0.0001593994778067885, |
|
"loss": 0.3412, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 14.50381679389313, |
|
"eval_loss": 0.314583420753479, |
|
"eval_runtime": 20.1788, |
|
"eval_samples_per_second": 15.065, |
|
"eval_steps_per_second": 3.766, |
|
"eval_wer": 0.33472597689850087, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 15.267175572519085, |
|
"grad_norm": 0.9389367699623108, |
|
"learning_rate": 0.00015156657963446475, |
|
"loss": 0.3344, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 15.267175572519085, |
|
"eval_loss": 0.31076034903526306, |
|
"eval_runtime": 20.2164, |
|
"eval_samples_per_second": 15.037, |
|
"eval_steps_per_second": 3.759, |
|
"eval_wer": 0.34160727451462275, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 16.03053435114504, |
|
"grad_norm": 0.9784950017929077, |
|
"learning_rate": 0.00014373368146214098, |
|
"loss": 0.3351, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 16.03053435114504, |
|
"eval_loss": 0.3107351064682007, |
|
"eval_runtime": 20.3863, |
|
"eval_samples_per_second": 14.912, |
|
"eval_steps_per_second": 3.728, |
|
"eval_wer": 0.3263701155074957, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 16.03053435114504, |
|
"step": 2100, |
|
"total_flos": 5.844277956095999e+18, |
|
"train_loss": 0.8914755775814964, |
|
"train_runtime": 1802.4751, |
|
"train_samples_per_second": 8.721, |
|
"train_steps_per_second": 2.18 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 3930, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 400, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 2 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.844277956095999e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|