{ "best_metric": 3.1372523307800293, "best_model_checkpoint": "xtreme_s_xlsr_mls_upd/checkpoint-100", "epoch": 3.0, "global_step": 102, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 0.0, "loss": 10.1653, "step": 1 }, { "epoch": 0.06, "learning_rate": 0.0003, "loss": 9.9916, "step": 2 }, { "epoch": 0.09, "learning_rate": 0.000297029702970297, "loss": 9.9785, "step": 3 }, { "epoch": 0.12, "learning_rate": 0.000294059405940594, "loss": 9.1255, "step": 4 }, { "epoch": 0.15, "learning_rate": 0.000294059405940594, "loss": 7.897, "step": 5 }, { "epoch": 0.18, "learning_rate": 0.00029108910891089107, "loss": 7.7678, "step": 6 }, { "epoch": 0.21, "learning_rate": 0.00028811881188118806, "loss": 6.6699, "step": 7 }, { "epoch": 0.24, "learning_rate": 0.0002851485148514851, "loss": 5.6942, "step": 8 }, { "epoch": 0.26, "learning_rate": 0.00028217821782178216, "loss": 5.0037, "step": 9 }, { "epoch": 0.29, "learning_rate": 0.0002792079207920792, "loss": 4.5539, "step": 10 }, { "epoch": 0.32, "learning_rate": 0.0002762376237623762, "loss": 4.3802, "step": 11 }, { "epoch": 0.35, "learning_rate": 0.00027326732673267325, "loss": 4.0771, "step": 12 }, { "epoch": 0.38, "learning_rate": 0.0002702970297029703, "loss": 3.9155, "step": 13 }, { "epoch": 0.41, "learning_rate": 0.0002673267326732673, "loss": 3.8023, "step": 14 }, { "epoch": 0.44, "learning_rate": 0.00026435643564356434, "loss": 3.7282, "step": 15 }, { "epoch": 0.47, "learning_rate": 0.00026138613861386134, "loss": 3.7088, "step": 16 }, { "epoch": 0.5, "learning_rate": 0.0002584158415841584, "loss": 3.5787, "step": 17 }, { "epoch": 0.53, "learning_rate": 0.00025544554455445543, "loss": 3.5437, "step": 18 }, { "epoch": 0.56, "learning_rate": 0.00025247524752475243, "loss": 3.495, "step": 19 }, { "epoch": 0.59, "learning_rate": 0.0002495049504950495, "loss": 3.4678, "step": 20 }, { "epoch": 0.59, "eval_cer": 1.0, "eval_loss": 3.458103895187378, "eval_runtime": 36.7504, "eval_samples_per_second": 13.932, "eval_steps_per_second": 0.871, "eval_wer": 1.0, "step": 20 }, { "epoch": 0.62, "learning_rate": 0.0002465346534653465, "loss": 3.4143, "step": 21 }, { "epoch": 0.65, "learning_rate": 0.00024356435643564355, "loss": 3.4058, "step": 22 }, { "epoch": 0.68, "learning_rate": 0.0002405940594059406, "loss": 3.3713, "step": 23 }, { "epoch": 0.71, "learning_rate": 0.00023762376237623762, "loss": 3.3317, "step": 24 }, { "epoch": 0.74, "learning_rate": 0.00023465346534653464, "loss": 3.2929, "step": 25 }, { "epoch": 0.76, "learning_rate": 0.00023168316831683166, "loss": 3.2983, "step": 26 }, { "epoch": 0.79, "learning_rate": 0.00022871287128712868, "loss": 3.2625, "step": 27 }, { "epoch": 0.82, "learning_rate": 0.00022574257425742573, "loss": 3.3042, "step": 28 }, { "epoch": 0.85, "learning_rate": 0.00022277227722772275, "loss": 3.2494, "step": 29 }, { "epoch": 0.88, "learning_rate": 0.00021980198019801977, "loss": 3.231, "step": 30 }, { "epoch": 0.91, "learning_rate": 0.0002168316831683168, "loss": 3.2249, "step": 31 }, { "epoch": 0.94, "learning_rate": 0.00021386138613861385, "loss": 3.2355, "step": 32 }, { "epoch": 0.97, "learning_rate": 0.00021089108910891087, "loss": 3.1952, "step": 33 }, { "epoch": 1.0, "learning_rate": 0.0002079207920792079, "loss": 3.1892, "step": 34 }, { "epoch": 1.03, "learning_rate": 0.00020495049504950494, "loss": 3.1875, "step": 35 }, { "epoch": 1.06, "learning_rate": 0.00020198019801980199, "loss": 3.1733, "step": 36 }, { "epoch": 1.09, "learning_rate": 0.000199009900990099, "loss": 3.2136, "step": 37 }, { "epoch": 1.12, "learning_rate": 0.00019603960396039603, "loss": 3.1819, "step": 38 }, { "epoch": 1.15, "learning_rate": 0.00019306930693069305, "loss": 3.1764, "step": 39 }, { "epoch": 1.18, "learning_rate": 0.00019009900990099007, "loss": 3.1713, "step": 40 }, { "epoch": 1.18, "eval_cer": 1.0, "eval_loss": 3.1816344261169434, "eval_runtime": 36.1118, "eval_samples_per_second": 14.178, "eval_steps_per_second": 0.886, "eval_wer": 1.0, "step": 40 }, { "epoch": 1.21, "learning_rate": 0.00018712871287128712, "loss": 3.176, "step": 41 }, { "epoch": 1.24, "learning_rate": 0.00018415841584158414, "loss": 3.1669, "step": 42 }, { "epoch": 1.26, "learning_rate": 0.00018118811881188116, "loss": 3.1446, "step": 43 }, { "epoch": 1.29, "learning_rate": 0.00017821782178217819, "loss": 3.1556, "step": 44 }, { "epoch": 1.32, "learning_rate": 0.0001752475247524752, "loss": 3.1447, "step": 45 }, { "epoch": 1.35, "learning_rate": 0.00017227722772277226, "loss": 3.1412, "step": 46 }, { "epoch": 1.38, "learning_rate": 0.00016930693069306928, "loss": 3.1521, "step": 47 }, { "epoch": 1.41, "learning_rate": 0.00016633663366336633, "loss": 3.1493, "step": 48 }, { "epoch": 1.44, "learning_rate": 0.00016336633663366338, "loss": 3.1494, "step": 49 }, { "epoch": 1.47, "learning_rate": 0.0001603960396039604, "loss": 3.1419, "step": 50 }, { "epoch": 1.5, "learning_rate": 0.00015742574257425742, "loss": 3.1407, "step": 51 }, { "epoch": 1.53, "learning_rate": 0.00015445544554455444, "loss": 3.1456, "step": 52 }, { "epoch": 1.56, "learning_rate": 0.00015148514851485146, "loss": 3.1317, "step": 53 }, { "epoch": 1.59, "learning_rate": 0.0001485148514851485, "loss": 3.1369, "step": 54 }, { "epoch": 1.62, "learning_rate": 0.00014554455445544553, "loss": 3.1547, "step": 55 }, { "epoch": 1.65, "learning_rate": 0.00014257425742574255, "loss": 3.1356, "step": 56 }, { "epoch": 1.68, "learning_rate": 0.0001396039603960396, "loss": 3.1538, "step": 57 }, { "epoch": 1.71, "learning_rate": 0.00013663366336633662, "loss": 3.1549, "step": 58 }, { "epoch": 1.74, "learning_rate": 0.00013366336633663365, "loss": 3.1275, "step": 59 }, { "epoch": 1.76, "learning_rate": 0.00013069306930693067, "loss": 3.134, "step": 60 }, { "epoch": 1.76, "eval_cer": 1.0, "eval_loss": 3.153820276260376, "eval_runtime": 36.2824, "eval_samples_per_second": 14.112, "eval_steps_per_second": 0.882, "eval_wer": 1.0, "step": 60 }, { "epoch": 1.79, "learning_rate": 0.00012772277227722772, "loss": 3.1667, "step": 61 }, { "epoch": 1.82, "learning_rate": 0.00012475247524752474, "loss": 3.1346, "step": 62 }, { "epoch": 1.85, "learning_rate": 0.00012178217821782177, "loss": 3.1247, "step": 63 }, { "epoch": 1.88, "learning_rate": 0.00011881188118811881, "loss": 3.1358, "step": 64 }, { "epoch": 1.91, "learning_rate": 0.00011584158415841583, "loss": 3.1382, "step": 65 }, { "epoch": 1.94, "learning_rate": 0.00011287128712871287, "loss": 3.1455, "step": 66 }, { "epoch": 1.97, "learning_rate": 0.00010990099009900989, "loss": 3.1397, "step": 67 }, { "epoch": 2.0, "learning_rate": 0.00010693069306930692, "loss": 3.1402, "step": 68 }, { "epoch": 2.03, "learning_rate": 0.00010396039603960394, "loss": 3.1163, "step": 69 }, { "epoch": 2.06, "learning_rate": 0.00010099009900990099, "loss": 3.1175, "step": 70 }, { "epoch": 2.09, "learning_rate": 9.801980198019801e-05, "loss": 3.1169, "step": 71 }, { "epoch": 2.12, "learning_rate": 9.504950495049504e-05, "loss": 3.1242, "step": 72 }, { "epoch": 2.15, "learning_rate": 9.207920792079207e-05, "loss": 3.1326, "step": 73 }, { "epoch": 2.18, "learning_rate": 8.910891089108909e-05, "loss": 3.1394, "step": 74 }, { "epoch": 2.21, "learning_rate": 8.613861386138613e-05, "loss": 3.1369, "step": 75 }, { "epoch": 2.24, "learning_rate": 8.316831683168316e-05, "loss": 3.1284, "step": 76 }, { "epoch": 2.26, "learning_rate": 8.01980198019802e-05, "loss": 3.1203, "step": 77 }, { "epoch": 2.29, "learning_rate": 7.722772277227722e-05, "loss": 3.1245, "step": 78 }, { "epoch": 2.32, "learning_rate": 7.425742574257426e-05, "loss": 3.1243, "step": 79 }, { "epoch": 2.35, "learning_rate": 7.128712871287128e-05, "loss": 3.132, "step": 80 }, { "epoch": 2.35, "eval_cer": 1.0, "eval_loss": 3.1410903930664062, "eval_runtime": 36.0623, "eval_samples_per_second": 14.198, "eval_steps_per_second": 0.887, "eval_wer": 1.0, "step": 80 }, { "epoch": 2.38, "learning_rate": 6.831683168316831e-05, "loss": 3.1249, "step": 81 }, { "epoch": 2.41, "learning_rate": 6.534653465346533e-05, "loss": 3.1271, "step": 82 }, { "epoch": 2.44, "learning_rate": 6.237623762376237e-05, "loss": 3.1245, "step": 83 }, { "epoch": 2.47, "learning_rate": 5.9405940594059404e-05, "loss": 3.1242, "step": 84 }, { "epoch": 2.5, "learning_rate": 5.643564356435643e-05, "loss": 3.126, "step": 85 }, { "epoch": 2.53, "learning_rate": 5.346534653465346e-05, "loss": 3.1327, "step": 86 }, { "epoch": 2.56, "learning_rate": 5.0495049504950497e-05, "loss": 3.1214, "step": 87 }, { "epoch": 2.59, "learning_rate": 4.752475247524752e-05, "loss": 3.1513, "step": 88 }, { "epoch": 2.62, "learning_rate": 4.4554455445544547e-05, "loss": 3.128, "step": 89 }, { "epoch": 2.65, "learning_rate": 4.158415841584158e-05, "loss": 3.1243, "step": 90 }, { "epoch": 2.68, "learning_rate": 3.861386138613861e-05, "loss": 3.1342, "step": 91 }, { "epoch": 2.71, "learning_rate": 3.564356435643564e-05, "loss": 3.1474, "step": 92 }, { "epoch": 2.74, "learning_rate": 3.267326732673267e-05, "loss": 3.1259, "step": 93 }, { "epoch": 2.76, "learning_rate": 2.9702970297029702e-05, "loss": 3.1205, "step": 94 }, { "epoch": 2.79, "learning_rate": 2.673267326732673e-05, "loss": 3.1273, "step": 95 }, { "epoch": 2.82, "learning_rate": 2.376237623762376e-05, "loss": 3.1177, "step": 96 }, { "epoch": 2.85, "learning_rate": 2.079207920792079e-05, "loss": 3.1179, "step": 97 }, { "epoch": 2.88, "learning_rate": 1.782178217821782e-05, "loss": 3.1272, "step": 98 }, { "epoch": 2.91, "learning_rate": 1.4851485148514851e-05, "loss": 3.1404, "step": 99 }, { "epoch": 2.94, "learning_rate": 1.188118811881188e-05, "loss": 3.1295, "step": 100 }, { "epoch": 2.94, "eval_cer": 1.0, "eval_loss": 3.1372523307800293, "eval_runtime": 36.088, "eval_samples_per_second": 14.188, "eval_steps_per_second": 0.887, "eval_wer": 1.0, "step": 100 }, { "epoch": 2.97, "learning_rate": 8.91089108910891e-06, "loss": 3.1441, "step": 101 }, { "epoch": 3.0, "learning_rate": 5.94059405940594e-06, "loss": 3.1193, "step": 102 }, { "epoch": 3.0, "step": 102, "total_flos": 3.102653790168285e+18, "train_loss": 3.6673195455588545, "train_runtime": 1011.7717, "train_samples_per_second": 6.443, "train_steps_per_second": 0.101 } ], "max_steps": 102, "num_train_epochs": 3, "total_flos": 3.102653790168285e+18, "trial_name": null, "trial_params": null }