{ "best_metric": 0.30884110927581787, "best_model_checkpoint": "/scratch/skscla001/speech/results/mms-1b-lozgen-male-model/checkpoint-1800", "epoch": 16.03053435114504, "eval_steps": 100, "global_step": 2100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.7633587786259542, "grad_norm": 2.278857707977295, "learning_rate": 0.000285, "loss": 6.7665, "step": 100 }, { "epoch": 0.7633587786259542, "eval_loss": 3.1609649658203125, "eval_runtime": 20.525, "eval_samples_per_second": 14.811, "eval_steps_per_second": 3.703, "eval_wer": 0.9990169574834111, "step": 100 }, { "epoch": 1.5267175572519083, "grad_norm": 2.781928062438965, "learning_rate": 0.00029255874673629243, "loss": 2.6476, "step": 200 }, { "epoch": 1.5267175572519083, "eval_loss": 2.2410073280334473, "eval_runtime": 20.3251, "eval_samples_per_second": 14.957, "eval_steps_per_second": 3.739, "eval_wer": 0.8953059719832883, "step": 200 }, { "epoch": 2.2900763358778624, "grad_norm": 2.653702974319458, "learning_rate": 0.00028472584856396865, "loss": 1.5694, "step": 300 }, { "epoch": 2.2900763358778624, "eval_loss": 0.5584537386894226, "eval_runtime": 20.2001, "eval_samples_per_second": 15.049, "eval_steps_per_second": 3.762, "eval_wer": 0.7598918653231752, "step": 300 }, { "epoch": 3.053435114503817, "grad_norm": 1.1257972717285156, "learning_rate": 0.0002768929503916449, "loss": 0.6761, "step": 400 }, { "epoch": 3.053435114503817, "eval_loss": 0.4565717875957489, "eval_runtime": 20.2697, "eval_samples_per_second": 14.998, "eval_steps_per_second": 3.749, "eval_wer": 0.6318505775374785, "step": 400 }, { "epoch": 3.816793893129771, "grad_norm": 0.982060968875885, "learning_rate": 0.0002690600522193211, "loss": 0.5793, "step": 500 }, { "epoch": 3.816793893129771, "eval_loss": 0.4013397693634033, "eval_runtime": 20.4119, "eval_samples_per_second": 14.893, "eval_steps_per_second": 3.723, "eval_wer": 0.4821823543868272, "step": 500 }, { "epoch": 4.580152671755725, "grad_norm": 0.9821244478225708, "learning_rate": 0.0002612271540469974, "loss": 0.5392, "step": 600 }, { "epoch": 4.580152671755725, "eval_loss": 0.3795173168182373, "eval_runtime": 20.3646, "eval_samples_per_second": 14.928, "eval_steps_per_second": 3.732, "eval_wer": 0.4553944458097813, "step": 600 }, { "epoch": 5.34351145038168, "grad_norm": 1.2693768739700317, "learning_rate": 0.0002533942558746736, "loss": 0.4809, "step": 700 }, { "epoch": 5.34351145038168, "eval_loss": 0.37301722168922424, "eval_runtime": 20.1691, "eval_samples_per_second": 15.073, "eval_steps_per_second": 3.768, "eval_wer": 0.4332759891865323, "step": 700 }, { "epoch": 6.106870229007634, "grad_norm": 1.0504028797149658, "learning_rate": 0.0002455613577023499, "loss": 0.4813, "step": 800 }, { "epoch": 6.106870229007634, "eval_loss": 0.35973772406578064, "eval_runtime": 20.1824, "eval_samples_per_second": 15.063, "eval_steps_per_second": 3.766, "eval_wer": 0.4229540427623495, "step": 800 }, { "epoch": 6.870229007633588, "grad_norm": 1.0751953125, "learning_rate": 0.0002377284595300261, "loss": 0.4484, "step": 900 }, { "epoch": 6.870229007633588, "eval_loss": 0.3431786596775055, "eval_runtime": 20.3009, "eval_samples_per_second": 14.975, "eval_steps_per_second": 3.744, "eval_wer": 0.39247972474809534, "step": 900 }, { "epoch": 7.633587786259542, "grad_norm": 3.291668176651001, "learning_rate": 0.00022989556135770233, "loss": 0.4418, "step": 1000 }, { "epoch": 7.633587786259542, "eval_loss": 0.3390863239765167, "eval_runtime": 20.3166, "eval_samples_per_second": 14.963, "eval_steps_per_second": 3.741, "eval_wer": 0.39469157041042024, "step": 1000 }, { "epoch": 8.396946564885496, "grad_norm": 1.688865303993225, "learning_rate": 0.00022206266318537858, "loss": 0.4322, "step": 1100 }, { "epoch": 8.396946564885496, "eval_loss": 0.33392783999443054, "eval_runtime": 20.3067, "eval_samples_per_second": 14.97, "eval_steps_per_second": 3.743, "eval_wer": 0.3841238633570902, "step": 1100 }, { "epoch": 9.16030534351145, "grad_norm": 1.2780994176864624, "learning_rate": 0.0002142297650130548, "loss": 0.3963, "step": 1200 }, { "epoch": 9.16030534351145, "eval_loss": 0.3294471502304077, "eval_runtime": 20.282, "eval_samples_per_second": 14.989, "eval_steps_per_second": 3.747, "eval_wer": 0.36692061931678543, "step": 1200 }, { "epoch": 9.923664122137405, "grad_norm": 1.1817948818206787, "learning_rate": 0.00020639686684073108, "loss": 0.4104, "step": 1300 }, { "epoch": 9.923664122137405, "eval_loss": 0.321709543466568, "eval_runtime": 20.3324, "eval_samples_per_second": 14.952, "eval_steps_per_second": 3.738, "eval_wer": 0.3634799705087245, "step": 1300 }, { "epoch": 10.68702290076336, "grad_norm": 2.474210262298584, "learning_rate": 0.0001985639686684073, "loss": 0.3777, "step": 1400 }, { "epoch": 10.68702290076336, "eval_loss": 0.3176502287387848, "eval_runtime": 20.3222, "eval_samples_per_second": 14.959, "eval_steps_per_second": 3.74, "eval_wer": 0.3610223642172524, "step": 1400 }, { "epoch": 11.450381679389313, "grad_norm": 1.456008791923523, "learning_rate": 0.00019073107049608353, "loss": 0.3785, "step": 1500 }, { "epoch": 11.450381679389313, "eval_loss": 0.32359373569488525, "eval_runtime": 20.3296, "eval_samples_per_second": 14.954, "eval_steps_per_second": 3.738, "eval_wer": 0.35389530597198327, "step": 1500 }, { "epoch": 12.213740458015268, "grad_norm": 1.4315876960754395, "learning_rate": 0.00018289817232375978, "loss": 0.3682, "step": 1600 }, { "epoch": 12.213740458015268, "eval_loss": 0.31440281867980957, "eval_runtime": 20.1701, "eval_samples_per_second": 15.072, "eval_steps_per_second": 3.768, "eval_wer": 0.34676824772671416, "step": 1600 }, { "epoch": 12.977099236641221, "grad_norm": 2.2636640071868896, "learning_rate": 0.000175065274151436, "loss": 0.3654, "step": 1700 }, { "epoch": 12.977099236641221, "eval_loss": 0.31223368644714355, "eval_runtime": 20.3687, "eval_samples_per_second": 14.925, "eval_steps_per_second": 3.731, "eval_wer": 0.35291226345539445, "step": 1700 }, { "epoch": 13.740458015267176, "grad_norm": 0.9211711883544922, "learning_rate": 0.00016723237597911225, "loss": 0.3509, "step": 1800 }, { "epoch": 13.740458015267176, "eval_loss": 0.30884110927581787, "eval_runtime": 20.3386, "eval_samples_per_second": 14.947, "eval_steps_per_second": 3.737, "eval_wer": 0.3462767264684198, "step": 1800 }, { "epoch": 14.50381679389313, "grad_norm": 2.6637320518493652, "learning_rate": 0.0001593994778067885, "loss": 0.3412, "step": 1900 }, { "epoch": 14.50381679389313, "eval_loss": 0.314583420753479, "eval_runtime": 20.1788, "eval_samples_per_second": 15.065, "eval_steps_per_second": 3.766, "eval_wer": 0.33472597689850087, "step": 1900 }, { "epoch": 15.267175572519085, "grad_norm": 0.9389367699623108, "learning_rate": 0.00015156657963446475, "loss": 0.3344, "step": 2000 }, { "epoch": 15.267175572519085, "eval_loss": 0.31076034903526306, "eval_runtime": 20.2164, "eval_samples_per_second": 15.037, "eval_steps_per_second": 3.759, "eval_wer": 0.34160727451462275, "step": 2000 }, { "epoch": 16.03053435114504, "grad_norm": 0.9784950017929077, "learning_rate": 0.00014373368146214098, "loss": 0.3351, "step": 2100 }, { "epoch": 16.03053435114504, "eval_loss": 0.3107351064682007, "eval_runtime": 20.3863, "eval_samples_per_second": 14.912, "eval_steps_per_second": 3.728, "eval_wer": 0.3263701155074957, "step": 2100 }, { "epoch": 16.03053435114504, "step": 2100, "total_flos": 5.844277956095999e+18, "train_loss": 0.8914755775814964, "train_runtime": 1802.4751, "train_samples_per_second": 8.721, "train_steps_per_second": 2.18 } ], "logging_steps": 100, "max_steps": 3930, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 400, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 2 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5.844277956095999e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }