{ "best_metric": null, "best_model_checkpoint": null, "epoch": 49.99819168173599, "global_step": 6900, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.72, "learning_rate": 1.485e-05, "loss": 10.7673, "step": 100 }, { "epoch": 1.45, "learning_rate": 2.985e-05, "loss": 4.4331, "step": 200 }, { "epoch": 2.17, "learning_rate": 4.484999999999999e-05, "loss": 3.4707, "step": 300 }, { "epoch": 2.9, "learning_rate": 5.985e-05, "loss": 3.0807, "step": 400 }, { "epoch": 3.62, "learning_rate": 7.484999999999999e-05, "loss": 2.9736, "step": 500 }, { "epoch": 3.62, "eval_cer": 1.0, "eval_loss": 2.9508235454559326, "eval_runtime": 114.1579, "eval_samples_per_second": 33.664, "eval_steps_per_second": 4.213, "eval_wer": 1.0, "step": 500 }, { "epoch": 4.35, "learning_rate": 8.984999999999999e-05, "loss": 2.9229, "step": 600 }, { "epoch": 5.07, "learning_rate": 0.00010484999999999999, "loss": 2.8971, "step": 700 }, { "epoch": 5.8, "learning_rate": 0.00011985, "loss": 2.7156, "step": 800 }, { "epoch": 6.52, "learning_rate": 0.00013485, "loss": 1.8057, "step": 900 }, { "epoch": 7.25, "learning_rate": 0.00014984999999999998, "loss": 1.3293, "step": 1000 }, { "epoch": 7.25, "eval_cer": 0.08615393306828052, "eval_loss": 0.33302000164985657, "eval_runtime": 114.4981, "eval_samples_per_second": 33.564, "eval_steps_per_second": 4.201, "eval_wer": 0.8407494145199064, "step": 1000 }, { "epoch": 7.97, "learning_rate": 0.00016485, "loss": 1.1212, "step": 1100 }, { "epoch": 8.69, "learning_rate": 0.00017984999999999998, "loss": 1.0403, "step": 1200 }, { "epoch": 9.42, "learning_rate": 0.00019484999999999997, "loss": 0.9903, "step": 1300 }, { "epoch": 10.14, "learning_rate": 0.00020984999999999998, "loss": 0.9766, "step": 1400 }, { "epoch": 10.87, "learning_rate": 0.00022485, "loss": 0.956, "step": 1500 }, { "epoch": 10.87, "eval_cer": 0.06022301157428658, "eval_loss": 0.2042141854763031, "eval_runtime": 114.0353, "eval_samples_per_second": 33.7, "eval_steps_per_second": 4.218, "eval_wer": 0.6872235232890971, "step": 1500 }, { "epoch": 11.59, "learning_rate": 0.00023984999999999998, "loss": 0.9616, "step": 1600 }, { "epoch": 12.32, "learning_rate": 0.00025485, "loss": 0.9653, "step": 1700 }, { "epoch": 13.04, "learning_rate": 0.00026984999999999997, "loss": 0.9653, "step": 1800 }, { "epoch": 13.77, "learning_rate": 0.00028484999999999996, "loss": 0.9486, "step": 1900 }, { "epoch": 14.49, "learning_rate": 0.00029985, "loss": 0.9509, "step": 2000 }, { "epoch": 14.49, "eval_cer": 0.06515214643343903, "eval_loss": 0.21843121945858002, "eval_runtime": 115.7242, "eval_samples_per_second": 33.208, "eval_steps_per_second": 4.156, "eval_wer": 0.7088212334113974, "step": 2000 }, { "epoch": 15.22, "learning_rate": 0.00029393877551020406, "loss": 0.9402, "step": 2100 }, { "epoch": 15.94, "learning_rate": 0.0002878163265306122, "loss": 0.9357, "step": 2200 }, { "epoch": 16.67, "learning_rate": 0.0002816938775510204, "loss": 0.9301, "step": 2300 }, { "epoch": 17.39, "learning_rate": 0.00027563265306122445, "loss": 0.9268, "step": 2400 }, { "epoch": 18.12, "learning_rate": 0.00026951020408163266, "loss": 0.9272, "step": 2500 }, { "epoch": 18.12, "eval_cer": 0.07027901163078096, "eval_loss": 0.2312462031841278, "eval_runtime": 107.6714, "eval_samples_per_second": 35.692, "eval_steps_per_second": 4.467, "eval_wer": 0.7210512620348686, "step": 2500 }, { "epoch": 18.84, "learning_rate": 0.0002633877551020408, "loss": 0.9191, "step": 2600 }, { "epoch": 19.56, "learning_rate": 0.00025726530612244896, "loss": 0.9016, "step": 2700 }, { "epoch": 20.29, "learning_rate": 0.0002511428571428571, "loss": 0.9105, "step": 2800 }, { "epoch": 21.01, "learning_rate": 0.00024502040816326527, "loss": 0.8923, "step": 2900 }, { "epoch": 21.74, "learning_rate": 0.00023889795918367345, "loss": 0.8561, "step": 3000 }, { "epoch": 21.74, "eval_cer": 0.06309716327582676, "eval_loss": 0.21578675508499146, "eval_runtime": 108.0601, "eval_samples_per_second": 35.564, "eval_steps_per_second": 4.451, "eval_wer": 0.6838407494145199, "step": 3000 }, { "epoch": 22.46, "learning_rate": 0.0002327755102040816, "loss": 0.8562, "step": 3100 }, { "epoch": 23.19, "learning_rate": 0.00022665306122448975, "loss": 0.855, "step": 3200 }, { "epoch": 23.91, "learning_rate": 0.00022053061224489796, "loss": 0.8398, "step": 3300 }, { "epoch": 24.64, "learning_rate": 0.0002144081632653061, "loss": 0.827, "step": 3400 }, { "epoch": 25.36, "learning_rate": 0.00020828571428571426, "loss": 0.8258, "step": 3500 }, { "epoch": 25.36, "eval_cer": 0.060060590225059496, "eval_loss": 0.19697847962379456, "eval_runtime": 107.078, "eval_samples_per_second": 35.89, "eval_steps_per_second": 4.492, "eval_wer": 0.6843611761644548, "step": 3500 }, { "epoch": 26.09, "learning_rate": 0.00020216326530612242, "loss": 0.8283, "step": 3600 }, { "epoch": 26.81, "learning_rate": 0.00019604081632653057, "loss": 0.8212, "step": 3700 }, { "epoch": 27.54, "learning_rate": 0.00018991836734693878, "loss": 0.8332, "step": 3800 }, { "epoch": 28.26, "learning_rate": 0.00018379591836734693, "loss": 0.8324, "step": 3900 }, { "epoch": 28.98, "learning_rate": 0.00017767346938775508, "loss": 0.7993, "step": 4000 }, { "epoch": 28.98, "eval_cer": 0.05770901156016298, "eval_loss": 0.18949392437934875, "eval_runtime": 106.6069, "eval_samples_per_second": 36.048, "eval_steps_per_second": 4.512, "eval_wer": 0.6697892271662763, "step": 4000 }, { "epoch": 29.71, "learning_rate": 0.00017155102040816324, "loss": 0.7777, "step": 4100 }, { "epoch": 30.43, "learning_rate": 0.0001654285714285714, "loss": 0.7764, "step": 4200 }, { "epoch": 31.16, "learning_rate": 0.0001593061224489796, "loss": 0.7748, "step": 4300 }, { "epoch": 31.88, "learning_rate": 0.00015318367346938775, "loss": 0.7587, "step": 4400 }, { "epoch": 32.61, "learning_rate": 0.00014712244897959183, "loss": 0.7525, "step": 4500 }, { "epoch": 32.61, "eval_cer": 0.05496197221888748, "eval_loss": 0.18448850512504578, "eval_runtime": 106.7665, "eval_samples_per_second": 35.994, "eval_steps_per_second": 4.505, "eval_wer": 0.6453291699193339, "step": 4500 }, { "epoch": 33.33, "learning_rate": 0.00014099999999999998, "loss": 0.7558, "step": 4600 }, { "epoch": 34.06, "learning_rate": 0.00013487755102040816, "loss": 0.7444, "step": 4700 }, { "epoch": 34.78, "learning_rate": 0.00012875510204081632, "loss": 0.7302, "step": 4800 }, { "epoch": 35.51, "learning_rate": 0.0001226326530612245, "loss": 0.7391, "step": 4900 }, { "epoch": 36.23, "learning_rate": 0.00011657142857142856, "loss": 0.7211, "step": 5000 }, { "epoch": 36.23, "eval_cer": 0.05309765760167223, "eval_loss": 0.17814987897872925, "eval_runtime": 106.0316, "eval_samples_per_second": 36.244, "eval_steps_per_second": 4.536, "eval_wer": 0.6273744470465782, "step": 5000 }, { "epoch": 36.95, "learning_rate": 0.00011044897959183672, "loss": 0.7074, "step": 5100 }, { "epoch": 37.68, "learning_rate": 0.00010432653061224489, "loss": 0.6976, "step": 5200 }, { "epoch": 38.41, "learning_rate": 9.820408163265305e-05, "loss": 0.6933, "step": 5300 }, { "epoch": 39.13, "learning_rate": 9.214285714285714e-05, "loss": 0.6845, "step": 5400 }, { "epoch": 39.85, "learning_rate": 8.602040816326529e-05, "loss": 0.677, "step": 5500 }, { "epoch": 39.85, "eval_cer": 0.05139576433368407, "eval_loss": 0.17315863072872162, "eval_runtime": 106.2239, "eval_samples_per_second": 36.178, "eval_steps_per_second": 4.528, "eval_wer": 0.6187874056726516, "step": 5500 }, { "epoch": 40.58, "learning_rate": 7.989795918367346e-05, "loss": 0.6951, "step": 5600 }, { "epoch": 41.3, "learning_rate": 7.377551020408162e-05, "loss": 0.6814, "step": 5700 }, { "epoch": 42.03, "learning_rate": 6.765306122448979e-05, "loss": 0.6822, "step": 5800 }, { "epoch": 42.75, "learning_rate": 6.153061224489796e-05, "loss": 0.6658, "step": 5900 }, { "epoch": 43.48, "learning_rate": 5.5408163265306116e-05, "loss": 0.6517, "step": 6000 }, { "epoch": 43.48, "eval_cer": 0.05030824747364184, "eval_loss": 0.1690707802772522, "eval_runtime": 105.9054, "eval_samples_per_second": 36.287, "eval_steps_per_second": 4.542, "eval_wer": 0.6177465521727816, "step": 6000 }, { "epoch": 44.2, "learning_rate": 4.928571428571428e-05, "loss": 0.6327, "step": 6100 }, { "epoch": 44.93, "learning_rate": 4.316326530612245e-05, "loss": 0.6355, "step": 6200 }, { "epoch": 45.65, "learning_rate": 3.704081632653061e-05, "loss": 0.6328, "step": 6300 }, { "epoch": 46.38, "learning_rate": 3.0918367346938774e-05, "loss": 0.6388, "step": 6400 }, { "epoch": 47.1, "learning_rate": 2.4795918367346937e-05, "loss": 0.6326, "step": 6500 }, { "epoch": 47.1, "eval_cer": 0.0478507418418581, "eval_loss": 0.1618689000606537, "eval_runtime": 108.5769, "eval_samples_per_second": 35.394, "eval_steps_per_second": 4.43, "eval_wer": 0.6044756700494406, "step": 6500 }, { "epoch": 47.82, "learning_rate": 1.86734693877551e-05, "loss": 0.6231, "step": 6600 }, { "epoch": 48.55, "learning_rate": 1.2551020408163265e-05, "loss": 0.6309, "step": 6700 }, { "epoch": 49.27, "learning_rate": 6.428571428571428e-06, "loss": 0.6341, "step": 6800 }, { "epoch": 50.0, "learning_rate": 3.0612244897959183e-07, "loss": 0.6141, "step": 6900 }, { "epoch": 50.0, "step": 6900, "total_flos": 4.523197640315366e+19, "train_loss": 1.2140741569408473, "train_runtime": 17169.0647, "train_samples_per_second": 25.747, "train_steps_per_second": 0.402 } ], "max_steps": 6900, "num_train_epochs": 50, "total_flos": 4.523197640315366e+19, "trial_name": null, "trial_params": null }