diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,8 +1,8 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 100.0, - "global_step": 6900, + "epoch": 200.0, + "global_step": 13800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -3717,566 +3717,4832 @@ }, { "epoch": 87.1, - "learning_rate": 1.5135593220338984e-05, - "loss": 0.5942, + "learning_rate": 6.088281250000001e-05, + "loss": 0.595, "step": 6010 }, { "epoch": 87.25, - "learning_rate": 1.4966101694915256e-05, - "loss": 0.6446, + "learning_rate": 6.08046875e-05, + "loss": 0.6517, "step": 6020 }, { "epoch": 87.39, - "learning_rate": 1.4796610169491525e-05, - "loss": 0.5991, + "learning_rate": 6.07265625e-05, + "loss": 0.6079, "step": 6030 }, { "epoch": 87.54, - "learning_rate": 1.4627118644067797e-05, - "loss": 0.6077, + "learning_rate": 6.06484375e-05, + "loss": 0.6203, "step": 6040 }, { "epoch": 87.68, - "learning_rate": 1.4457627118644068e-05, - "loss": 0.5856, + "learning_rate": 6.05703125e-05, + "loss": 0.6005, "step": 6050 }, { "epoch": 87.83, - "learning_rate": 1.428813559322034e-05, - "loss": 0.6133, + "learning_rate": 6.0492187500000006e-05, + "loss": 0.6321, "step": 6060 }, { "epoch": 87.97, - "learning_rate": 1.411864406779661e-05, - "loss": 0.5992, + "learning_rate": 6.04140625e-05, + "loss": 0.6156, "step": 6070 }, { "epoch": 88.12, - "learning_rate": 1.3949152542372881e-05, - "loss": 0.6122, + "learning_rate": 6.0335937500000005e-05, + "loss": 0.6329, "step": 6080 }, { "epoch": 88.26, - "learning_rate": 1.3779661016949153e-05, - "loss": 0.6178, + "learning_rate": 6.02578125e-05, + "loss": 0.6311, "step": 6090 }, { "epoch": 88.41, - "learning_rate": 1.3610169491525424e-05, - "loss": 0.6505, + "learning_rate": 6.0179687500000005e-05, + "loss": 0.6689, "step": 6100 }, { "epoch": 88.55, - "learning_rate": 1.3440677966101695e-05, - "loss": 0.5641, + "learning_rate": 6.010156250000001e-05, + "loss": 0.5824, "step": 6110 }, { "epoch": 88.7, - "learning_rate": 1.3271186440677965e-05, - "loss": 0.6329, + "learning_rate": 6.0023437500000005e-05, + "loss": 0.6491, "step": 6120 }, { "epoch": 88.84, - "learning_rate": 1.3101694915254237e-05, - "loss": 0.6119, + "learning_rate": 5.994531250000001e-05, + "loss": 0.6326, "step": 6130 }, { "epoch": 88.99, - "learning_rate": 1.2932203389830508e-05, - "loss": 0.6043, + "learning_rate": 5.98671875e-05, + "loss": 0.6272, "step": 6140 }, { "epoch": 89.13, - "learning_rate": 1.276271186440678e-05, - "loss": 0.6661, + "learning_rate": 5.97890625e-05, + "loss": 0.6865, "step": 6150 }, { "epoch": 89.28, - "learning_rate": 1.2593220338983053e-05, - "loss": 0.5938, + "learning_rate": 5.971093750000001e-05, + "loss": 0.6149, "step": 6160 }, { "epoch": 89.42, - "learning_rate": 1.2423728813559323e-05, - "loss": 0.6135, + "learning_rate": 5.96328125e-05, + "loss": 0.6385, "step": 6170 }, { "epoch": 89.57, - "learning_rate": 1.2254237288135594e-05, - "loss": 0.6354, + "learning_rate": 5.9554687500000003e-05, + "loss": 0.6621, "step": 6180 }, { "epoch": 89.71, - "learning_rate": 1.2084745762711865e-05, - "loss": 0.5597, + "learning_rate": 5.94765625e-05, + "loss": 0.5817, "step": 6190 }, { "epoch": 89.86, - "learning_rate": 1.1915254237288135e-05, - "loss": 0.6065, + "learning_rate": 5.93984375e-05, + "loss": 0.6333, "step": 6200 }, { "epoch": 90.0, - "learning_rate": 1.1745762711864407e-05, - "loss": 0.5579, + "learning_rate": 5.9320312500000006e-05, + "loss": 0.5794, "step": 6210 }, { "epoch": 90.14, - "learning_rate": 1.157627118644068e-05, - "loss": 0.5914, + "learning_rate": 5.92421875e-05, + "loss": 0.6153, "step": 6220 }, { "epoch": 90.29, - "learning_rate": 1.140677966101695e-05, - "loss": 0.6552, + "learning_rate": 5.9164062500000006e-05, + "loss": 0.6794, "step": 6230 }, { "epoch": 90.43, - "learning_rate": 1.1237288135593221e-05, - "loss": 0.5762, + "learning_rate": 5.90859375e-05, + "loss": 0.601, "step": 6240 }, { "epoch": 90.58, - "learning_rate": 1.1067796610169492e-05, - "loss": 0.5902, + "learning_rate": 5.9007812500000005e-05, + "loss": 0.6176, "step": 6250 }, { "epoch": 90.72, - "learning_rate": 1.0898305084745764e-05, - "loss": 0.5821, + "learning_rate": 5.892968750000001e-05, + "loss": 0.6055, "step": 6260 }, { "epoch": 90.87, - "learning_rate": 1.0728813559322035e-05, - "loss": 0.6163, + "learning_rate": 5.88515625e-05, + "loss": 0.6389, "step": 6270 }, { "epoch": 91.01, - "learning_rate": 1.0559322033898305e-05, - "loss": 0.6451, + "learning_rate": 5.877343750000001e-05, + "loss": 0.6727, "step": 6280 }, { "epoch": 91.16, - "learning_rate": 1.0389830508474577e-05, - "loss": 0.6399, + "learning_rate": 5.86953125e-05, + "loss": 0.6592, "step": 6290 }, { "epoch": 91.3, - "learning_rate": 1.0220338983050848e-05, - "loss": 0.6132, + "learning_rate": 5.86171875e-05, + "loss": 0.6367, "step": 6300 }, { "epoch": 91.45, - "learning_rate": 1.005084745762712e-05, - "loss": 0.612, + "learning_rate": 5.853906250000001e-05, + "loss": 0.6366, "step": 6310 }, { "epoch": 91.59, - "learning_rate": 9.88135593220339e-06, - "loss": 0.5939, + "learning_rate": 5.84609375e-05, + "loss": 0.6171, "step": 6320 }, { "epoch": 91.74, - "learning_rate": 9.71186440677966e-06, - "loss": 0.6379, + "learning_rate": 5.8382812500000004e-05, + "loss": 0.6642, "step": 6330 }, { "epoch": 91.88, - "learning_rate": 9.542372881355932e-06, - "loss": 0.6063, + "learning_rate": 5.83046875e-05, + "loss": 0.6345, "step": 6340 }, { "epoch": 92.03, - "learning_rate": 9.372881355932204e-06, - "loss": 0.6005, + "learning_rate": 5.82265625e-05, + "loss": 0.6265, "step": 6350 }, { "epoch": 92.17, - "learning_rate": 9.203389830508475e-06, - "loss": 0.6076, + "learning_rate": 5.8148437500000006e-05, + "loss": 0.6302, "step": 6360 }, { "epoch": 92.32, - "learning_rate": 9.033898305084747e-06, - "loss": 0.6082, + "learning_rate": 5.80703125e-05, + "loss": 0.6347, "step": 6370 }, { "epoch": 92.46, - "learning_rate": 8.864406779661018e-06, - "loss": 0.6571, + "learning_rate": 5.7992187500000006e-05, + "loss": 0.6858, "step": 6380 }, { "epoch": 92.61, - "learning_rate": 8.69491525423729e-06, - "loss": 0.5666, + "learning_rate": 5.79140625e-05, + "loss": 0.5876, "step": 6390 }, { "epoch": 92.75, - "learning_rate": 8.52542372881356e-06, - "loss": 0.5932, + "learning_rate": 5.7835937500000006e-05, + "loss": 0.6195, "step": 6400 }, { "epoch": 92.9, - "learning_rate": 8.35593220338983e-06, - "loss": 0.6317, + "learning_rate": 5.775781250000001e-05, + "loss": 0.6559, "step": 6410 }, { "epoch": 93.04, - "learning_rate": 8.186440677966102e-06, - "loss": 0.6665, + "learning_rate": 5.76796875e-05, + "loss": 0.6908, "step": 6420 }, { "epoch": 93.19, - "learning_rate": 8.016949152542374e-06, - "loss": 0.5687, + "learning_rate": 5.760156250000001e-05, + "loss": 0.5887, "step": 6430 }, { "epoch": 93.33, - "learning_rate": 7.847457627118643e-06, - "loss": 0.5904, + "learning_rate": 5.75234375e-05, + "loss": 0.6118, "step": 6440 }, { "epoch": 93.48, - "learning_rate": 7.677966101694915e-06, - "loss": 0.7073, + "learning_rate": 5.74453125e-05, + "loss": 0.7352, "step": 6450 }, { "epoch": 93.62, - "learning_rate": 7.508474576271186e-06, - "loss": 0.619, + "learning_rate": 5.736718750000001e-05, + "loss": 0.6462, "step": 6460 }, { "epoch": 93.77, - "learning_rate": 7.338983050847458e-06, - "loss": 0.6203, + "learning_rate": 5.72890625e-05, + "loss": 0.6448, "step": 6470 }, { "epoch": 93.91, - "learning_rate": 7.1694915254237284e-06, - "loss": 0.6576, + "learning_rate": 5.7210937500000004e-05, + "loss": 0.6806, "step": 6480 }, { "epoch": 94.06, - "learning_rate": 7.000000000000001e-06, - "loss": 0.6249, + "learning_rate": 5.71328125e-05, + "loss": 0.6465, "step": 6490 }, { "epoch": 94.2, - "learning_rate": 6.830508474576272e-06, - "loss": 0.588, + "learning_rate": 5.7062500000000005e-05, + "loss": 0.608, "step": 6500 }, { "epoch": 94.2, - "eval_loss": 0.2143363356590271, - "eval_runtime": 573.5555, - "eval_samples_per_second": 5.917, - "eval_steps_per_second": 0.741, - "eval_wer": 0.15602067565679725, + "eval_loss": 0.22552849352359772, + "eval_runtime": 596.3798, + "eval_samples_per_second": 5.691, + "eval_steps_per_second": 0.713, + "eval_wer": 0.1638849854958131, "step": 6500 }, { "epoch": 94.35, - "learning_rate": 6.661016949152543e-06, - "loss": 0.669, + "learning_rate": 5.6984375e-05, + "loss": 0.6931, "step": 6510 }, { "epoch": 94.49, - "learning_rate": 6.491525423728814e-06, - "loss": 0.5974, + "learning_rate": 5.6906250000000004e-05, + "loss": 0.6172, "step": 6520 }, { "epoch": 94.64, - "learning_rate": 6.322033898305085e-06, - "loss": 0.6671, + "learning_rate": 5.6828125e-05, + "loss": 0.686, "step": 6530 }, { "epoch": 94.78, - "learning_rate": 6.152542372881356e-06, - "loss": 0.6735, + "learning_rate": 5.6750000000000004e-05, + "loss": 0.6993, "step": 6540 }, { "epoch": 94.93, - "learning_rate": 5.983050847457628e-06, - "loss": 0.6356, + "learning_rate": 5.667187500000001e-05, + "loss": 0.6582, "step": 6550 }, { "epoch": 95.07, - "learning_rate": 5.813559322033898e-06, - "loss": 0.6135, + "learning_rate": 5.6593750000000003e-05, + "loss": 0.6366, "step": 6560 }, { "epoch": 95.22, - "learning_rate": 5.64406779661017e-06, - "loss": 0.6403, + "learning_rate": 5.6515625000000007e-05, + "loss": 0.6609, "step": 6570 }, { "epoch": 95.36, - "learning_rate": 5.4745762711864405e-06, - "loss": 0.5574, + "learning_rate": 5.6437499999999996e-05, + "loss": 0.5802, "step": 6580 }, { "epoch": 95.51, - "learning_rate": 5.305084745762713e-06, - "loss": 0.552, + "learning_rate": 5.6359375000000006e-05, + "loss": 0.5748, "step": 6590 }, { "epoch": 95.65, - "learning_rate": 5.135593220338983e-06, - "loss": 0.5979, + "learning_rate": 5.628125000000001e-05, + "loss": 0.6195, "step": 6600 }, { "epoch": 95.8, - "learning_rate": 4.966101694915255e-06, - "loss": 0.6342, + "learning_rate": 5.6203125e-05, + "loss": 0.6524, "step": 6610 }, { "epoch": 95.94, - "learning_rate": 4.7966101694915255e-06, - "loss": 0.6397, + "learning_rate": 5.6125e-05, + "loss": 0.6621, "step": 6620 }, { "epoch": 96.09, - "learning_rate": 4.627118644067797e-06, - "loss": 0.5847, + "learning_rate": 5.6046875e-05, + "loss": 0.6074, "step": 6630 }, { "epoch": 96.23, - "learning_rate": 4.4576271186440676e-06, - "loss": 0.5736, + "learning_rate": 5.596875e-05, + "loss": 0.598, "step": 6640 }, { "epoch": 96.38, - "learning_rate": 4.288135593220339e-06, - "loss": 0.6148, + "learning_rate": 5.5890625000000005e-05, + "loss": 0.6296, "step": 6650 }, { "epoch": 96.52, - "learning_rate": 4.1186440677966105e-06, - "loss": 0.6084, + "learning_rate": 5.58125e-05, + "loss": 0.6353, "step": 6660 }, { "epoch": 96.67, - "learning_rate": 3.949152542372882e-06, - "loss": 0.5835, + "learning_rate": 5.5734375000000005e-05, + "loss": 0.6013, "step": 6670 }, { "epoch": 96.81, - "learning_rate": 3.779661016949153e-06, - "loss": 0.5791, + "learning_rate": 5.565625e-05, + "loss": 0.606, "step": 6680 }, { "epoch": 96.96, - "learning_rate": 3.610169491525424e-06, - "loss": 0.7132, + "learning_rate": 5.5578125000000004e-05, + "loss": 0.742, "step": 6690 }, { "epoch": 97.1, - "learning_rate": 3.440677966101695e-06, - "loss": 0.5572, + "learning_rate": 5.550000000000001e-05, + "loss": 0.5763, "step": 6700 }, { "epoch": 97.25, - "learning_rate": 3.271186440677966e-06, - "loss": 0.6607, + "learning_rate": 5.5421875000000004e-05, + "loss": 0.6839, "step": 6710 }, { "epoch": 97.39, - "learning_rate": 3.1016949152542375e-06, - "loss": 0.6343, + "learning_rate": 5.534375000000001e-05, + "loss": 0.6565, "step": 6720 }, { "epoch": 97.54, - "learning_rate": 2.9322033898305086e-06, - "loss": 0.6126, + "learning_rate": 5.5265624999999997e-05, + "loss": 0.6341, "step": 6730 }, { "epoch": 97.68, - "learning_rate": 2.76271186440678e-06, - "loss": 0.6536, + "learning_rate": 5.51875e-05, + "loss": 0.6721, "step": 6740 }, { "epoch": 97.83, - "learning_rate": 2.593220338983051e-06, - "loss": 0.5843, + "learning_rate": 5.510937500000001e-05, + "loss": 0.6134, "step": 6750 }, { "epoch": 97.97, - "learning_rate": 2.423728813559322e-06, - "loss": 0.5874, + "learning_rate": 5.503125e-05, + "loss": 0.6161, "step": 6760 }, { "epoch": 98.12, - "learning_rate": 2.254237288135593e-06, - "loss": 0.6067, + "learning_rate": 5.4953125e-05, + "loss": 0.6273, "step": 6770 }, { "epoch": 98.26, - "learning_rate": 2.0847457627118646e-06, - "loss": 0.5626, + "learning_rate": 5.4875e-05, + "loss": 0.5778, "step": 6780 }, { "epoch": 98.41, - "learning_rate": 1.9152542372881356e-06, - "loss": 0.6659, + "learning_rate": 5.4796875e-05, + "loss": 0.6879, "step": 6790 }, { "epoch": 98.55, - "learning_rate": 1.7457627118644067e-06, - "loss": 0.6462, + "learning_rate": 5.4718750000000005e-05, + "loss": 0.6652, "step": 6800 }, { "epoch": 98.7, - "learning_rate": 1.5762711864406781e-06, - "loss": 0.67, + "learning_rate": 5.4640625e-05, + "loss": 0.6894, "step": 6810 }, { "epoch": 98.84, - "learning_rate": 1.4067796610169492e-06, - "loss": 0.5734, + "learning_rate": 5.4562500000000005e-05, + "loss": 0.5951, "step": 6820 }, { "epoch": 98.99, - "learning_rate": 1.2372881355932204e-06, - "loss": 0.6133, + "learning_rate": 5.4484375e-05, + "loss": 0.6397, "step": 6830 }, { "epoch": 99.13, - "learning_rate": 1.0677966101694917e-06, - "loss": 0.596, + "learning_rate": 5.4406250000000004e-05, + "loss": 0.6221, "step": 6840 }, { "epoch": 99.28, - "learning_rate": 9.152542372881356e-07, - "loss": 0.6763, + "learning_rate": 5.432812500000001e-05, + "loss": 0.7, "step": 6850 }, { "epoch": 99.42, - "learning_rate": 7.457627118644068e-07, - "loss": 0.6368, + "learning_rate": 5.4250000000000004e-05, + "loss": 0.6638, "step": 6860 }, { "epoch": 99.57, - "learning_rate": 5.76271186440678e-07, - "loss": 0.6463, + "learning_rate": 5.417187500000001e-05, + "loss": 0.6702, "step": 6870 }, { "epoch": 99.71, - "learning_rate": 4.0677966101694916e-07, - "loss": 0.62, + "learning_rate": 5.409375e-05, + "loss": 0.637, "step": 6880 }, { "epoch": 99.86, - "learning_rate": 2.3728813559322033e-07, - "loss": 0.6346, + "learning_rate": 5.4015625e-05, + "loss": 0.6518, "step": 6890 }, { "epoch": 100.0, - "learning_rate": 6.779661016949153e-08, - "loss": 0.5672, + "learning_rate": 5.393750000000001e-05, + "loss": 0.5912, "step": 6900 }, { - "epoch": 100.0, - "step": 6900, - "total_flos": 1.0025325448199992e+20, - "train_loss": 1.2894020353538402, - "train_runtime": 52505.7639, - "train_samples_per_second": 4.179, - "train_steps_per_second": 0.131 + "epoch": 100.14, + "learning_rate": 5.3859375e-05, + "loss": 0.6999, + "step": 6910 + }, + { + "epoch": 100.29, + "learning_rate": 5.378125e-05, + "loss": 0.5682, + "step": 6920 + }, + { + "epoch": 100.43, + "learning_rate": 5.3703125e-05, + "loss": 0.6023, + "step": 6930 + }, + { + "epoch": 100.58, + "learning_rate": 5.3625e-05, + "loss": 0.6437, + "step": 6940 + }, + { + "epoch": 100.72, + "learning_rate": 5.3546875000000006e-05, + "loss": 0.7168, + "step": 6950 + }, + { + "epoch": 100.87, + "learning_rate": 5.346875e-05, + "loss": 0.5913, + "step": 6960 + }, + { + "epoch": 101.01, + "learning_rate": 5.3390625000000005e-05, + "loss": 0.6047, + "step": 6970 + }, + { + "epoch": 101.16, + "learning_rate": 5.33125e-05, + "loss": 0.6355, + "step": 6980 + }, + { + "epoch": 101.3, + "learning_rate": 5.3234375000000005e-05, + "loss": 0.7046, + "step": 6990 + }, + { + "epoch": 101.45, + "learning_rate": 5.315625000000001e-05, + "loss": 0.6099, + "step": 7000 + }, + { + "epoch": 101.45, + "eval_loss": 0.22652995586395264, + "eval_runtime": 583.6707, + "eval_samples_per_second": 5.815, + "eval_steps_per_second": 0.728, + "eval_wer": 0.1621839278817313, + "step": 7000 + }, + { + "epoch": 101.59, + "learning_rate": 5.3078125e-05, + "loss": 0.5706, + "step": 7010 + }, + { + "epoch": 101.74, + "learning_rate": 5.300000000000001e-05, + "loss": 0.5656, + "step": 7020 + }, + { + "epoch": 101.88, + "learning_rate": 5.2921875e-05, + "loss": 0.6755, + "step": 7030 + }, + { + "epoch": 102.03, + "learning_rate": 5.284375e-05, + "loss": 0.6343, + "step": 7040 + }, + { + "epoch": 102.17, + "learning_rate": 5.276562500000001e-05, + "loss": 0.6129, + "step": 7050 + }, + { + "epoch": 102.32, + "learning_rate": 5.26875e-05, + "loss": 0.6321, + "step": 7060 + }, + { + "epoch": 102.46, + "learning_rate": 5.2609375e-05, + "loss": 0.6189, + "step": 7070 + }, + { + "epoch": 102.61, + "learning_rate": 5.253125e-05, + "loss": 0.6267, + "step": 7080 + }, + { + "epoch": 102.75, + "learning_rate": 5.2453125e-05, + "loss": 0.601, + "step": 7090 + }, + { + "epoch": 102.9, + "learning_rate": 5.2375000000000006e-05, + "loss": 0.6051, + "step": 7100 + }, + { + "epoch": 103.04, + "learning_rate": 5.2296875e-05, + "loss": 0.538, + "step": 7110 + }, + { + "epoch": 103.19, + "learning_rate": 5.2218750000000006e-05, + "loss": 0.6136, + "step": 7120 + }, + { + "epoch": 103.33, + "learning_rate": 5.2140624999999995e-05, + "loss": 0.625, + "step": 7130 + }, + { + "epoch": 103.48, + "learning_rate": 5.2062500000000005e-05, + "loss": 0.5999, + "step": 7140 + }, + { + "epoch": 103.62, + "learning_rate": 5.198437500000001e-05, + "loss": 0.694, + "step": 7150 + }, + { + "epoch": 103.77, + "learning_rate": 5.190625e-05, + "loss": 0.6446, + "step": 7160 + }, + { + "epoch": 103.91, + "learning_rate": 5.182812500000001e-05, + "loss": 0.5879, + "step": 7170 + }, + { + "epoch": 104.06, + "learning_rate": 5.175e-05, + "loss": 0.6115, + "step": 7180 + }, + { + "epoch": 104.2, + "learning_rate": 5.1671875e-05, + "loss": 0.5663, + "step": 7190 + }, + { + "epoch": 104.35, + "learning_rate": 5.159375000000001e-05, + "loss": 0.5825, + "step": 7200 + }, + { + "epoch": 104.49, + "learning_rate": 5.1515625e-05, + "loss": 0.5692, + "step": 7210 + }, + { + "epoch": 104.64, + "learning_rate": 5.1437500000000003e-05, + "loss": 0.614, + "step": 7220 + }, + { + "epoch": 104.78, + "learning_rate": 5.1359375e-05, + "loss": 0.6005, + "step": 7230 + }, + { + "epoch": 104.93, + "learning_rate": 5.128125e-05, + "loss": 0.6169, + "step": 7240 + }, + { + "epoch": 105.07, + "learning_rate": 5.1203125000000006e-05, + "loss": 0.6383, + "step": 7250 + }, + { + "epoch": 105.22, + "learning_rate": 5.1125e-05, + "loss": 0.5705, + "step": 7260 + }, + { + "epoch": 105.36, + "learning_rate": 5.1046875000000006e-05, + "loss": 0.6212, + "step": 7270 + }, + { + "epoch": 105.51, + "learning_rate": 5.0968749999999995e-05, + "loss": 0.5747, + "step": 7280 + }, + { + "epoch": 105.65, + "learning_rate": 5.0890625000000005e-05, + "loss": 0.6407, + "step": 7290 + }, + { + "epoch": 105.8, + "learning_rate": 5.081250000000001e-05, + "loss": 0.5993, + "step": 7300 + }, + { + "epoch": 105.94, + "learning_rate": 5.0734375e-05, + "loss": 0.6226, + "step": 7310 + }, + { + "epoch": 106.09, + "learning_rate": 5.065625000000001e-05, + "loss": 0.6316, + "step": 7320 + }, + { + "epoch": 106.23, + "learning_rate": 5.0578125e-05, + "loss": 0.6922, + "step": 7330 + }, + { + "epoch": 106.38, + "learning_rate": 5.05e-05, + "loss": 0.6252, + "step": 7340 + }, + { + "epoch": 106.52, + "learning_rate": 5.0421875000000004e-05, + "loss": 0.5841, + "step": 7350 + }, + { + "epoch": 106.67, + "learning_rate": 5.034375e-05, + "loss": 0.6174, + "step": 7360 + }, + { + "epoch": 106.81, + "learning_rate": 5.0265625000000004e-05, + "loss": 0.6482, + "step": 7370 + }, + { + "epoch": 106.96, + "learning_rate": 5.01875e-05, + "loss": 0.5965, + "step": 7380 + }, + { + "epoch": 107.1, + "learning_rate": 5.0109375e-05, + "loss": 0.6417, + "step": 7390 + }, + { + "epoch": 107.25, + "learning_rate": 5.0031250000000007e-05, + "loss": 0.6373, + "step": 7400 + }, + { + "epoch": 107.39, + "learning_rate": 4.9953125e-05, + "loss": 0.6205, + "step": 7410 + }, + { + "epoch": 107.54, + "learning_rate": 4.9875000000000006e-05, + "loss": 0.5891, + "step": 7420 + }, + { + "epoch": 107.68, + "learning_rate": 4.9796875e-05, + "loss": 0.6333, + "step": 7430 + }, + { + "epoch": 107.83, + "learning_rate": 4.9718750000000006e-05, + "loss": 0.6479, + "step": 7440 + }, + { + "epoch": 107.97, + "learning_rate": 4.9640625e-05, + "loss": 0.5854, + "step": 7450 + }, + { + "epoch": 108.12, + "learning_rate": 4.95625e-05, + "loss": 0.602, + "step": 7460 + }, + { + "epoch": 108.26, + "learning_rate": 4.9484375e-05, + "loss": 0.6362, + "step": 7470 + }, + { + "epoch": 108.41, + "learning_rate": 4.9406250000000005e-05, + "loss": 0.7472, + "step": 7480 + }, + { + "epoch": 108.55, + "learning_rate": 4.9328125e-05, + "loss": 0.6158, + "step": 7490 + }, + { + "epoch": 108.7, + "learning_rate": 4.9250000000000004e-05, + "loss": 0.6069, + "step": 7500 + }, + { + "epoch": 108.7, + "eval_loss": 0.22459882497787476, + "eval_runtime": 574.1645, + "eval_samples_per_second": 5.911, + "eval_steps_per_second": 0.74, + "eval_wer": 0.1592502198226627, + "step": 7500 + }, + { + "epoch": 108.84, + "learning_rate": 4.9171875e-05, + "loss": 0.7501, + "step": 7510 + }, + { + "epoch": 108.99, + "learning_rate": 4.9093750000000004e-05, + "loss": 0.585, + "step": 7520 + }, + { + "epoch": 109.13, + "learning_rate": 4.901562500000001e-05, + "loss": 0.6811, + "step": 7530 + }, + { + "epoch": 109.28, + "learning_rate": 4.8937500000000004e-05, + "loss": 0.7343, + "step": 7540 + }, + { + "epoch": 109.42, + "learning_rate": 4.8859375e-05, + "loss": 0.6275, + "step": 7550 + }, + { + "epoch": 109.57, + "learning_rate": 4.878125e-05, + "loss": 0.6326, + "step": 7560 + }, + { + "epoch": 109.71, + "learning_rate": 4.8703125000000006e-05, + "loss": 0.6374, + "step": 7570 + }, + { + "epoch": 109.86, + "learning_rate": 4.8625e-05, + "loss": 0.6079, + "step": 7580 + }, + { + "epoch": 110.0, + "learning_rate": 4.8546875000000006e-05, + "loss": 0.5572, + "step": 7590 + }, + { + "epoch": 110.14, + "learning_rate": 4.846875e-05, + "loss": 0.6443, + "step": 7600 + }, + { + "epoch": 110.29, + "learning_rate": 4.8390625e-05, + "loss": 0.5898, + "step": 7610 + }, + { + "epoch": 110.43, + "learning_rate": 4.83125e-05, + "loss": 0.6258, + "step": 7620 + }, + { + "epoch": 110.58, + "learning_rate": 4.8234375000000005e-05, + "loss": 0.6048, + "step": 7630 + }, + { + "epoch": 110.72, + "learning_rate": 4.815625e-05, + "loss": 0.6092, + "step": 7640 + }, + { + "epoch": 110.87, + "learning_rate": 4.8078125000000005e-05, + "loss": 0.5939, + "step": 7650 + }, + { + "epoch": 111.01, + "learning_rate": 4.8e-05, + "loss": 0.6584, + "step": 7660 + }, + { + "epoch": 111.16, + "learning_rate": 4.7921875000000004e-05, + "loss": 0.6788, + "step": 7670 + }, + { + "epoch": 111.3, + "learning_rate": 4.784375e-05, + "loss": 0.6859, + "step": 7680 + }, + { + "epoch": 111.45, + "learning_rate": 4.7765625000000004e-05, + "loss": 0.6178, + "step": 7690 + }, + { + "epoch": 111.59, + "learning_rate": 4.76875e-05, + "loss": 0.5804, + "step": 7700 + }, + { + "epoch": 111.74, + "learning_rate": 4.7609375000000004e-05, + "loss": 0.693, + "step": 7710 + }, + { + "epoch": 111.88, + "learning_rate": 4.753125000000001e-05, + "loss": 0.6894, + "step": 7720 + }, + { + "epoch": 112.03, + "learning_rate": 4.7453125e-05, + "loss": 0.5552, + "step": 7730 + }, + { + "epoch": 112.17, + "learning_rate": 4.7375e-05, + "loss": 0.5774, + "step": 7740 + }, + { + "epoch": 112.32, + "learning_rate": 4.7296875e-05, + "loss": 0.5482, + "step": 7750 + }, + { + "epoch": 112.46, + "learning_rate": 4.721875e-05, + "loss": 0.6571, + "step": 7760 + }, + { + "epoch": 112.61, + "learning_rate": 4.7140625e-05, + "loss": 0.6455, + "step": 7770 + }, + { + "epoch": 112.75, + "learning_rate": 4.7062500000000006e-05, + "loss": 0.6115, + "step": 7780 + }, + { + "epoch": 112.9, + "learning_rate": 4.6984375e-05, + "loss": 0.6006, + "step": 7790 + }, + { + "epoch": 113.04, + "learning_rate": 4.690625e-05, + "loss": 0.5906, + "step": 7800 + }, + { + "epoch": 113.19, + "learning_rate": 4.6828125e-05, + "loss": 0.5477, + "step": 7810 + }, + { + "epoch": 113.33, + "learning_rate": 4.6750000000000005e-05, + "loss": 0.6492, + "step": 7820 + }, + { + "epoch": 113.48, + "learning_rate": 4.6671875e-05, + "loss": 0.5919, + "step": 7830 + }, + { + "epoch": 113.62, + "learning_rate": 4.6593750000000004e-05, + "loss": 0.5931, + "step": 7840 + }, + { + "epoch": 113.77, + "learning_rate": 4.6515625e-05, + "loss": 0.6467, + "step": 7850 + }, + { + "epoch": 113.91, + "learning_rate": 4.64375e-05, + "loss": 0.5646, + "step": 7860 + }, + { + "epoch": 114.06, + "learning_rate": 4.635937500000001e-05, + "loss": 0.6137, + "step": 7870 + }, + { + "epoch": 114.2, + "learning_rate": 4.6281250000000003e-05, + "loss": 0.5523, + "step": 7880 + }, + { + "epoch": 114.35, + "learning_rate": 4.6203125e-05, + "loss": 0.6965, + "step": 7890 + }, + { + "epoch": 114.49, + "learning_rate": 4.6125e-05, + "loss": 0.542, + "step": 7900 + }, + { + "epoch": 114.64, + "learning_rate": 4.6046875e-05, + "loss": 0.5662, + "step": 7910 + }, + { + "epoch": 114.78, + "learning_rate": 4.596875e-05, + "loss": 0.5677, + "step": 7920 + }, + { + "epoch": 114.93, + "learning_rate": 4.5890625000000006e-05, + "loss": 0.5547, + "step": 7930 + }, + { + "epoch": 115.07, + "learning_rate": 4.58125e-05, + "loss": 0.6085, + "step": 7940 + }, + { + "epoch": 115.22, + "learning_rate": 4.5734375e-05, + "loss": 0.5735, + "step": 7950 + }, + { + "epoch": 115.36, + "learning_rate": 4.565625e-05, + "loss": 0.5977, + "step": 7960 + }, + { + "epoch": 115.51, + "learning_rate": 4.5578125000000005e-05, + "loss": 0.6654, + "step": 7970 + }, + { + "epoch": 115.65, + "learning_rate": 4.55e-05, + "loss": 0.6036, + "step": 7980 + }, + { + "epoch": 115.8, + "learning_rate": 4.5421875000000005e-05, + "loss": 0.6112, + "step": 7990 + }, + { + "epoch": 115.94, + "learning_rate": 4.534375e-05, + "loss": 0.5929, + "step": 8000 + }, + { + "epoch": 115.94, + "eval_loss": 0.2322680950164795, + "eval_runtime": 572.4419, + "eval_samples_per_second": 5.929, + "eval_steps_per_second": 0.742, + "eval_wer": 0.16172373838226956, + "step": 8000 + }, + { + "epoch": 116.09, + "learning_rate": 4.5265625e-05, + "loss": 0.5724, + "step": 8010 + }, + { + "epoch": 116.23, + "learning_rate": 4.518750000000001e-05, + "loss": 0.589, + "step": 8020 + }, + { + "epoch": 116.38, + "learning_rate": 4.5109375000000004e-05, + "loss": 0.607, + "step": 8030 + }, + { + "epoch": 116.52, + "learning_rate": 4.503125e-05, + "loss": 0.6171, + "step": 8040 + }, + { + "epoch": 116.67, + "learning_rate": 4.4953125000000003e-05, + "loss": 0.6011, + "step": 8050 + }, + { + "epoch": 116.81, + "learning_rate": 4.4875e-05, + "loss": 0.6247, + "step": 8060 + }, + { + "epoch": 116.96, + "learning_rate": 4.4796875e-05, + "loss": 0.566, + "step": 8070 + }, + { + "epoch": 117.1, + "learning_rate": 4.4718750000000006e-05, + "loss": 0.6967, + "step": 8080 + }, + { + "epoch": 117.25, + "learning_rate": 4.4640625e-05, + "loss": 0.6296, + "step": 8090 + }, + { + "epoch": 117.39, + "learning_rate": 4.45625e-05, + "loss": 0.6186, + "step": 8100 + }, + { + "epoch": 117.54, + "learning_rate": 4.4484375e-05, + "loss": 0.5811, + "step": 8110 + }, + { + "epoch": 117.68, + "learning_rate": 4.4406250000000005e-05, + "loss": 0.5486, + "step": 8120 + }, + { + "epoch": 117.83, + "learning_rate": 4.4328125e-05, + "loss": 0.5805, + "step": 8130 + }, + { + "epoch": 117.97, + "learning_rate": 4.4250000000000005e-05, + "loss": 0.572, + "step": 8140 + }, + { + "epoch": 118.12, + "learning_rate": 4.4171875e-05, + "loss": 0.6108, + "step": 8150 + }, + { + "epoch": 118.26, + "learning_rate": 4.409375e-05, + "loss": 0.666, + "step": 8160 + }, + { + "epoch": 118.41, + "learning_rate": 4.401562500000001e-05, + "loss": 0.5758, + "step": 8170 + }, + { + "epoch": 118.55, + "learning_rate": 4.3937500000000004e-05, + "loss": 0.6212, + "step": 8180 + }, + { + "epoch": 118.7, + "learning_rate": 4.3859375e-05, + "loss": 0.6679, + "step": 8190 + }, + { + "epoch": 118.84, + "learning_rate": 4.3781250000000004e-05, + "loss": 0.6261, + "step": 8200 + }, + { + "epoch": 118.99, + "learning_rate": 4.3703125e-05, + "loss": 0.6025, + "step": 8210 + }, + { + "epoch": 119.13, + "learning_rate": 4.3625e-05, + "loss": 0.6722, + "step": 8220 + }, + { + "epoch": 119.28, + "learning_rate": 4.3546875000000006e-05, + "loss": 0.5637, + "step": 8230 + }, + { + "epoch": 119.42, + "learning_rate": 4.346875e-05, + "loss": 0.6138, + "step": 8240 + }, + { + "epoch": 119.57, + "learning_rate": 4.3390625e-05, + "loss": 0.6397, + "step": 8250 + }, + { + "epoch": 119.71, + "learning_rate": 4.33125e-05, + "loss": 0.5852, + "step": 8260 + }, + { + "epoch": 119.86, + "learning_rate": 4.3234375000000006e-05, + "loss": 0.616, + "step": 8270 + }, + { + "epoch": 120.0, + "learning_rate": 4.315625e-05, + "loss": 0.5824, + "step": 8280 + }, + { + "epoch": 120.14, + "learning_rate": 4.3078125000000005e-05, + "loss": 0.5909, + "step": 8290 + }, + { + "epoch": 120.29, + "learning_rate": 4.3e-05, + "loss": 0.6523, + "step": 8300 + }, + { + "epoch": 120.43, + "learning_rate": 4.2921875e-05, + "loss": 0.6134, + "step": 8310 + }, + { + "epoch": 120.58, + "learning_rate": 4.284375000000001e-05, + "loss": 0.5903, + "step": 8320 + }, + { + "epoch": 120.72, + "learning_rate": 4.2765625000000004e-05, + "loss": 0.5471, + "step": 8330 + }, + { + "epoch": 120.87, + "learning_rate": 4.26875e-05, + "loss": 0.62, + "step": 8340 + }, + { + "epoch": 121.01, + "learning_rate": 4.2609375000000004e-05, + "loss": 0.6299, + "step": 8350 + }, + { + "epoch": 121.16, + "learning_rate": 4.253125e-05, + "loss": 0.5991, + "step": 8360 + }, + { + "epoch": 121.3, + "learning_rate": 4.2453125000000004e-05, + "loss": 0.597, + "step": 8370 + }, + { + "epoch": 121.45, + "learning_rate": 4.237500000000001e-05, + "loss": 0.5925, + "step": 8380 + }, + { + "epoch": 121.59, + "learning_rate": 4.2296875e-05, + "loss": 0.6336, + "step": 8390 + }, + { + "epoch": 121.74, + "learning_rate": 4.221875e-05, + "loss": 0.5826, + "step": 8400 + }, + { + "epoch": 121.88, + "learning_rate": 4.2140625e-05, + "loss": 0.5964, + "step": 8410 + }, + { + "epoch": 122.03, + "learning_rate": 4.2062500000000006e-05, + "loss": 0.5978, + "step": 8420 + }, + { + "epoch": 122.17, + "learning_rate": 4.1984375e-05, + "loss": 0.6456, + "step": 8430 + }, + { + "epoch": 122.32, + "learning_rate": 4.1906250000000006e-05, + "loss": 0.6088, + "step": 8440 + }, + { + "epoch": 122.46, + "learning_rate": 4.1828125e-05, + "loss": 0.6204, + "step": 8450 + }, + { + "epoch": 122.61, + "learning_rate": 4.175e-05, + "loss": 0.5545, + "step": 8460 + }, + { + "epoch": 122.75, + "learning_rate": 4.1671875e-05, + "loss": 0.5823, + "step": 8470 + }, + { + "epoch": 122.9, + "learning_rate": 4.1593750000000005e-05, + "loss": 0.5386, + "step": 8480 + }, + { + "epoch": 123.04, + "learning_rate": 4.1515625e-05, + "loss": 0.5619, + "step": 8490 + }, + { + "epoch": 123.19, + "learning_rate": 4.1437500000000004e-05, + "loss": 0.6218, + "step": 8500 + }, + { + "epoch": 123.19, + "eval_loss": 0.22871814668178558, + "eval_runtime": 573.0196, + "eval_samples_per_second": 5.923, + "eval_steps_per_second": 0.742, + "eval_wer": 0.1565712595222247, + "step": 8500 + }, + { + "epoch": 123.33, + "learning_rate": 4.1359375e-05, + "loss": 0.6351, + "step": 8510 + }, + { + "epoch": 123.48, + "learning_rate": 4.1281250000000004e-05, + "loss": 0.5678, + "step": 8520 + }, + { + "epoch": 123.62, + "learning_rate": 4.1203125e-05, + "loss": 0.7051, + "step": 8530 + }, + { + "epoch": 123.77, + "learning_rate": 4.1125000000000004e-05, + "loss": 0.5785, + "step": 8540 + }, + { + "epoch": 123.91, + "learning_rate": 4.1046875e-05, + "loss": 0.6527, + "step": 8550 + }, + { + "epoch": 124.06, + "learning_rate": 4.096875e-05, + "loss": 0.5509, + "step": 8560 + }, + { + "epoch": 124.2, + "learning_rate": 4.0890625000000006e-05, + "loss": 0.6065, + "step": 8570 + }, + { + "epoch": 124.35, + "learning_rate": 4.08125e-05, + "loss": 0.6249, + "step": 8580 + }, + { + "epoch": 124.49, + "learning_rate": 4.0734375e-05, + "loss": 0.5477, + "step": 8590 + }, + { + "epoch": 124.64, + "learning_rate": 4.065625e-05, + "loss": 0.6066, + "step": 8600 + }, + { + "epoch": 124.78, + "learning_rate": 4.0585937500000007e-05, + "loss": 0.5868, + "step": 8610 + }, + { + "epoch": 124.93, + "learning_rate": 4.05078125e-05, + "loss": 0.5902, + "step": 8620 + }, + { + "epoch": 125.07, + "learning_rate": 4.04296875e-05, + "loss": 0.6138, + "step": 8630 + }, + { + "epoch": 125.22, + "learning_rate": 4.03515625e-05, + "loss": 0.6382, + "step": 8640 + }, + { + "epoch": 125.36, + "learning_rate": 4.02734375e-05, + "loss": 0.6279, + "step": 8650 + }, + { + "epoch": 125.51, + "learning_rate": 4.01953125e-05, + "loss": 0.6699, + "step": 8660 + }, + { + "epoch": 125.65, + "learning_rate": 4.0117187500000005e-05, + "loss": 0.6396, + "step": 8670 + }, + { + "epoch": 125.8, + "learning_rate": 4.00390625e-05, + "loss": 0.564, + "step": 8680 + }, + { + "epoch": 125.94, + "learning_rate": 3.99609375e-05, + "loss": 0.556, + "step": 8690 + }, + { + "epoch": 126.09, + "learning_rate": 3.98828125e-05, + "loss": 0.6425, + "step": 8700 + }, + { + "epoch": 126.23, + "learning_rate": 3.9804687500000004e-05, + "loss": 0.5753, + "step": 8710 + }, + { + "epoch": 126.38, + "learning_rate": 3.97265625e-05, + "loss": 0.5892, + "step": 8720 + }, + { + "epoch": 126.52, + "learning_rate": 3.9648437500000004e-05, + "loss": 0.5827, + "step": 8730 + }, + { + "epoch": 126.67, + "learning_rate": 3.95703125e-05, + "loss": 0.6517, + "step": 8740 + }, + { + "epoch": 126.81, + "learning_rate": 3.94921875e-05, + "loss": 0.5346, + "step": 8750 + }, + { + "epoch": 126.96, + "learning_rate": 3.941406250000001e-05, + "loss": 0.5868, + "step": 8760 + }, + { + "epoch": 127.1, + "learning_rate": 3.93359375e-05, + "loss": 0.5673, + "step": 8770 + }, + { + "epoch": 127.25, + "learning_rate": 3.92578125e-05, + "loss": 0.59, + "step": 8780 + }, + { + "epoch": 127.39, + "learning_rate": 3.91796875e-05, + "loss": 0.5705, + "step": 8790 + }, + { + "epoch": 127.54, + "learning_rate": 3.91015625e-05, + "loss": 0.5958, + "step": 8800 + }, + { + "epoch": 127.68, + "learning_rate": 3.90234375e-05, + "loss": 0.5522, + "step": 8810 + }, + { + "epoch": 127.83, + "learning_rate": 3.8945312500000006e-05, + "loss": 0.581, + "step": 8820 + }, + { + "epoch": 127.97, + "learning_rate": 3.88671875e-05, + "loss": 0.5469, + "step": 8830 + }, + { + "epoch": 128.12, + "learning_rate": 3.87890625e-05, + "loss": 0.6312, + "step": 8840 + }, + { + "epoch": 128.26, + "learning_rate": 3.87109375e-05, + "loss": 0.6486, + "step": 8850 + }, + { + "epoch": 128.41, + "learning_rate": 3.8632812500000005e-05, + "loss": 0.6362, + "step": 8860 + }, + { + "epoch": 128.55, + "learning_rate": 3.85546875e-05, + "loss": 0.5602, + "step": 8870 + }, + { + "epoch": 128.7, + "learning_rate": 3.8476562500000004e-05, + "loss": 0.5554, + "step": 8880 + }, + { + "epoch": 128.84, + "learning_rate": 3.83984375e-05, + "loss": 0.5491, + "step": 8890 + }, + { + "epoch": 128.99, + "learning_rate": 3.83203125e-05, + "loss": 0.5681, + "step": 8900 + }, + { + "epoch": 129.13, + "learning_rate": 3.824218750000001e-05, + "loss": 0.6239, + "step": 8910 + }, + { + "epoch": 129.28, + "learning_rate": 3.8164062500000004e-05, + "loss": 0.5759, + "step": 8920 + }, + { + "epoch": 129.42, + "learning_rate": 3.80859375e-05, + "loss": 0.5868, + "step": 8930 + }, + { + "epoch": 129.57, + "learning_rate": 3.8015625e-05, + "loss": 0.5796, + "step": 8940 + }, + { + "epoch": 129.71, + "learning_rate": 3.79375e-05, + "loss": 0.5501, + "step": 8950 + }, + { + "epoch": 129.86, + "learning_rate": 3.7859375000000004e-05, + "loss": 0.5976, + "step": 8960 + }, + { + "epoch": 130.0, + "learning_rate": 3.778125e-05, + "loss": 0.5488, + "step": 8970 + }, + { + "epoch": 130.14, + "learning_rate": 3.7703125e-05, + "loss": 0.5688, + "step": 8980 + }, + { + "epoch": 130.29, + "learning_rate": 3.7625e-05, + "loss": 0.5448, + "step": 8990 + }, + { + "epoch": 130.43, + "learning_rate": 3.7546875e-05, + "loss": 0.5751, + "step": 9000 + }, + { + "epoch": 130.43, + "eval_loss": 0.22747375071048737, + "eval_runtime": 574.7485, + "eval_samples_per_second": 5.905, + "eval_steps_per_second": 0.739, + "eval_wer": 0.1562672057457946, + "step": 9000 + }, + { + "epoch": 130.58, + "learning_rate": 3.746875e-05, + "loss": 0.6, + "step": 9010 + }, + { + "epoch": 130.72, + "learning_rate": 3.7390625e-05, + "loss": 0.5781, + "step": 9020 + }, + { + "epoch": 130.87, + "learning_rate": 3.73125e-05, + "loss": 0.5545, + "step": 9030 + }, + { + "epoch": 131.01, + "learning_rate": 3.7234375e-05, + "loss": 0.6116, + "step": 9040 + }, + { + "epoch": 131.16, + "learning_rate": 3.7156250000000005e-05, + "loss": 0.5634, + "step": 9050 + }, + { + "epoch": 131.3, + "learning_rate": 3.7078125e-05, + "loss": 0.6436, + "step": 9060 + }, + { + "epoch": 131.45, + "learning_rate": 3.7e-05, + "loss": 0.5882, + "step": 9070 + }, + { + "epoch": 131.59, + "learning_rate": 3.6921875e-05, + "loss": 0.5391, + "step": 9080 + }, + { + "epoch": 131.74, + "learning_rate": 3.684375e-05, + "loss": 0.5211, + "step": 9090 + }, + { + "epoch": 131.88, + "learning_rate": 3.6765625e-05, + "loss": 0.5971, + "step": 9100 + }, + { + "epoch": 132.03, + "learning_rate": 3.6687500000000004e-05, + "loss": 0.5314, + "step": 9110 + }, + { + "epoch": 132.17, + "learning_rate": 3.6609375e-05, + "loss": 0.573, + "step": 9120 + }, + { + "epoch": 132.32, + "learning_rate": 3.653125e-05, + "loss": 0.5678, + "step": 9130 + }, + { + "epoch": 132.46, + "learning_rate": 3.6453125e-05, + "loss": 0.5369, + "step": 9140 + }, + { + "epoch": 132.61, + "learning_rate": 3.6375e-05, + "loss": 0.5976, + "step": 9150 + }, + { + "epoch": 132.75, + "learning_rate": 3.6296875e-05, + "loss": 0.609, + "step": 9160 + }, + { + "epoch": 132.9, + "learning_rate": 3.621875e-05, + "loss": 0.5859, + "step": 9170 + }, + { + "epoch": 133.04, + "learning_rate": 3.6140625e-05, + "loss": 0.6005, + "step": 9180 + }, + { + "epoch": 133.19, + "learning_rate": 3.60625e-05, + "loss": 0.5796, + "step": 9190 + }, + { + "epoch": 133.33, + "learning_rate": 3.5984375000000006e-05, + "loss": 0.5125, + "step": 9200 + }, + { + "epoch": 133.48, + "learning_rate": 3.590625e-05, + "loss": 0.5465, + "step": 9210 + }, + { + "epoch": 133.62, + "learning_rate": 3.5828125e-05, + "loss": 0.5985, + "step": 9220 + }, + { + "epoch": 133.77, + "learning_rate": 3.575e-05, + "loss": 0.5687, + "step": 9230 + }, + { + "epoch": 133.91, + "learning_rate": 3.5671875e-05, + "loss": 0.6586, + "step": 9240 + }, + { + "epoch": 134.06, + "learning_rate": 3.559375e-05, + "loss": 0.5656, + "step": 9250 + }, + { + "epoch": 134.2, + "learning_rate": 3.5515625000000004e-05, + "loss": 0.6136, + "step": 9260 + }, + { + "epoch": 134.35, + "learning_rate": 3.54375e-05, + "loss": 0.6063, + "step": 9270 + }, + { + "epoch": 134.49, + "learning_rate": 3.5359375e-05, + "loss": 0.5242, + "step": 9280 + }, + { + "epoch": 134.64, + "learning_rate": 3.528125e-05, + "loss": 0.5605, + "step": 9290 + }, + { + "epoch": 134.78, + "learning_rate": 3.5203125000000004e-05, + "loss": 0.5774, + "step": 9300 + }, + { + "epoch": 134.93, + "learning_rate": 3.5125e-05, + "loss": 0.5937, + "step": 9310 + }, + { + "epoch": 135.07, + "learning_rate": 3.5046875e-05, + "loss": 0.5875, + "step": 9320 + }, + { + "epoch": 135.22, + "learning_rate": 3.496875e-05, + "loss": 0.5483, + "step": 9330 + }, + { + "epoch": 135.36, + "learning_rate": 3.4890624999999996e-05, + "loss": 0.5705, + "step": 9340 + }, + { + "epoch": 135.51, + "learning_rate": 3.4812500000000006e-05, + "loss": 0.6026, + "step": 9350 + }, + { + "epoch": 135.65, + "learning_rate": 3.4734375e-05, + "loss": 0.5888, + "step": 9360 + }, + { + "epoch": 135.8, + "learning_rate": 3.465625e-05, + "loss": 0.5913, + "step": 9370 + }, + { + "epoch": 135.94, + "learning_rate": 3.4578125e-05, + "loss": 0.5792, + "step": 9380 + }, + { + "epoch": 136.09, + "learning_rate": 3.45e-05, + "loss": 0.615, + "step": 9390 + }, + { + "epoch": 136.23, + "learning_rate": 3.4421875e-05, + "loss": 0.5827, + "step": 9400 + }, + { + "epoch": 136.38, + "learning_rate": 3.4343750000000005e-05, + "loss": 0.5956, + "step": 9410 + }, + { + "epoch": 136.52, + "learning_rate": 3.4265625e-05, + "loss": 0.4951, + "step": 9420 + }, + { + "epoch": 136.67, + "learning_rate": 3.41875e-05, + "loss": 0.5786, + "step": 9430 + }, + { + "epoch": 136.81, + "learning_rate": 3.4109375e-05, + "loss": 0.5307, + "step": 9440 + }, + { + "epoch": 136.96, + "learning_rate": 3.4031250000000004e-05, + "loss": 0.6652, + "step": 9450 + }, + { + "epoch": 137.1, + "learning_rate": 3.3953125e-05, + "loss": 0.5879, + "step": 9460 + }, + { + "epoch": 137.25, + "learning_rate": 3.3875000000000003e-05, + "loss": 0.6949, + "step": 9470 + }, + { + "epoch": 137.39, + "learning_rate": 3.3796875e-05, + "loss": 0.5537, + "step": 9480 + }, + { + "epoch": 137.54, + "learning_rate": 3.3718749999999996e-05, + "loss": 0.5923, + "step": 9490 + }, + { + "epoch": 137.68, + "learning_rate": 3.3640625000000006e-05, + "loss": 0.5181, + "step": 9500 + }, + { + "epoch": 137.68, + "eval_loss": 0.23162005841732025, + "eval_runtime": 574.0095, + "eval_samples_per_second": 5.913, + "eval_steps_per_second": 0.74, + "eval_wer": 0.15791895734207692, + "step": 9500 + }, + { + "epoch": 137.83, + "learning_rate": 3.35625e-05, + "loss": 0.5543, + "step": 9510 + }, + { + "epoch": 137.97, + "learning_rate": 3.3484375e-05, + "loss": 0.5875, + "step": 9520 + }, + { + "epoch": 138.12, + "learning_rate": 3.340625e-05, + "loss": 0.5757, + "step": 9530 + }, + { + "epoch": 138.26, + "learning_rate": 3.3328125e-05, + "loss": 0.5366, + "step": 9540 + }, + { + "epoch": 138.41, + "learning_rate": 3.325e-05, + "loss": 0.5328, + "step": 9550 + }, + { + "epoch": 138.55, + "learning_rate": 3.3171875000000005e-05, + "loss": 0.5193, + "step": 9560 + }, + { + "epoch": 138.7, + "learning_rate": 3.309375e-05, + "loss": 0.5377, + "step": 9570 + }, + { + "epoch": 138.84, + "learning_rate": 3.3015625e-05, + "loss": 0.5687, + "step": 9580 + }, + { + "epoch": 138.99, + "learning_rate": 3.29375e-05, + "loss": 0.5416, + "step": 9590 + }, + { + "epoch": 139.13, + "learning_rate": 3.2859375000000004e-05, + "loss": 0.5427, + "step": 9600 + }, + { + "epoch": 139.28, + "learning_rate": 3.278125e-05, + "loss": 0.577, + "step": 9610 + }, + { + "epoch": 139.42, + "learning_rate": 3.2703125000000004e-05, + "loss": 0.5943, + "step": 9620 + }, + { + "epoch": 139.57, + "learning_rate": 3.2625e-05, + "loss": 0.5481, + "step": 9630 + }, + { + "epoch": 139.71, + "learning_rate": 3.2546874999999997e-05, + "loss": 0.5904, + "step": 9640 + }, + { + "epoch": 139.86, + "learning_rate": 3.2468750000000007e-05, + "loss": 0.6591, + "step": 9650 + }, + { + "epoch": 140.0, + "learning_rate": 3.2390625e-05, + "loss": 0.6146, + "step": 9660 + }, + { + "epoch": 140.14, + "learning_rate": 3.23125e-05, + "loss": 0.5974, + "step": 9670 + }, + { + "epoch": 140.29, + "learning_rate": 3.2234375e-05, + "loss": 0.546, + "step": 9680 + }, + { + "epoch": 140.43, + "learning_rate": 3.215625e-05, + "loss": 0.5433, + "step": 9690 + }, + { + "epoch": 140.58, + "learning_rate": 3.2078125e-05, + "loss": 0.5357, + "step": 9700 + }, + { + "epoch": 140.72, + "learning_rate": 3.2000000000000005e-05, + "loss": 0.6074, + "step": 9710 + }, + { + "epoch": 140.87, + "learning_rate": 3.1921875e-05, + "loss": 0.5543, + "step": 9720 + }, + { + "epoch": 141.01, + "learning_rate": 3.184375e-05, + "loss": 0.5415, + "step": 9730 + }, + { + "epoch": 141.16, + "learning_rate": 3.1765625e-05, + "loss": 0.5853, + "step": 9740 + }, + { + "epoch": 141.3, + "learning_rate": 3.1687500000000005e-05, + "loss": 0.5143, + "step": 9750 + }, + { + "epoch": 141.45, + "learning_rate": 3.1609375e-05, + "loss": 0.503, + "step": 9760 + }, + { + "epoch": 141.59, + "learning_rate": 3.1531250000000004e-05, + "loss": 0.5154, + "step": 9770 + }, + { + "epoch": 141.74, + "learning_rate": 3.1453125e-05, + "loss": 0.562, + "step": 9780 + }, + { + "epoch": 141.88, + "learning_rate": 3.1375e-05, + "loss": 0.5454, + "step": 9790 + }, + { + "epoch": 142.03, + "learning_rate": 3.1296875e-05, + "loss": 0.5722, + "step": 9800 + }, + { + "epoch": 142.17, + "learning_rate": 3.121875e-05, + "loss": 0.6911, + "step": 9810 + }, + { + "epoch": 142.32, + "learning_rate": 3.1140625e-05, + "loss": 0.5725, + "step": 9820 + }, + { + "epoch": 142.46, + "learning_rate": 3.10625e-05, + "loss": 0.5842, + "step": 9830 + }, + { + "epoch": 142.61, + "learning_rate": 3.0984375e-05, + "loss": 0.5597, + "step": 9840 + }, + { + "epoch": 142.75, + "learning_rate": 3.090625e-05, + "loss": 0.592, + "step": 9850 + }, + { + "epoch": 142.9, + "learning_rate": 3.0828125e-05, + "loss": 0.5506, + "step": 9860 + }, + { + "epoch": 143.04, + "learning_rate": 3.075e-05, + "loss": 0.6292, + "step": 9870 + }, + { + "epoch": 143.19, + "learning_rate": 3.0671875e-05, + "loss": 0.5672, + "step": 9880 + }, + { + "epoch": 143.33, + "learning_rate": 3.059375e-05, + "loss": 0.5315, + "step": 9890 + }, + { + "epoch": 143.48, + "learning_rate": 3.0515625000000005e-05, + "loss": 0.5563, + "step": 9900 + }, + { + "epoch": 143.62, + "learning_rate": 3.04375e-05, + "loss": 0.5877, + "step": 9910 + }, + { + "epoch": 143.77, + "learning_rate": 3.0359375e-05, + "loss": 0.546, + "step": 9920 + }, + { + "epoch": 143.91, + "learning_rate": 3.028125e-05, + "loss": 0.5425, + "step": 9930 + }, + { + "epoch": 144.06, + "learning_rate": 3.0203124999999997e-05, + "loss": 0.5368, + "step": 9940 + }, + { + "epoch": 144.2, + "learning_rate": 3.0125000000000004e-05, + "loss": 0.538, + "step": 9950 + }, + { + "epoch": 144.35, + "learning_rate": 3.0046875000000004e-05, + "loss": 0.5367, + "step": 9960 + }, + { + "epoch": 144.49, + "learning_rate": 2.996875e-05, + "loss": 0.5591, + "step": 9970 + }, + { + "epoch": 144.64, + "learning_rate": 2.9890625e-05, + "loss": 0.5554, + "step": 9980 + }, + { + "epoch": 144.78, + "learning_rate": 2.98125e-05, + "loss": 0.4985, + "step": 9990 + }, + { + "epoch": 144.93, + "learning_rate": 2.9734375000000003e-05, + "loss": 0.6306, + "step": 10000 + }, + { + "epoch": 144.93, + "eval_loss": 0.23715920746326447, + "eval_runtime": 577.4706, + "eval_samples_per_second": 5.877, + "eval_steps_per_second": 0.736, + "eval_wer": 0.15560157450550172, + "step": 10000 + }, + { + "epoch": 145.07, + "learning_rate": 2.9656250000000003e-05, + "loss": 0.5794, + "step": 10010 + }, + { + "epoch": 145.22, + "learning_rate": 2.9578125000000002e-05, + "loss": 0.5792, + "step": 10020 + }, + { + "epoch": 145.36, + "learning_rate": 2.95e-05, + "loss": 0.5664, + "step": 10030 + }, + { + "epoch": 145.51, + "learning_rate": 2.9421875e-05, + "loss": 0.5737, + "step": 10040 + }, + { + "epoch": 145.65, + "learning_rate": 2.9343750000000002e-05, + "loss": 0.5735, + "step": 10050 + }, + { + "epoch": 145.8, + "learning_rate": 2.9265625e-05, + "loss": 0.5972, + "step": 10060 + }, + { + "epoch": 145.94, + "learning_rate": 2.91875e-05, + "loss": 0.5998, + "step": 10070 + }, + { + "epoch": 146.09, + "learning_rate": 2.9109375e-05, + "loss": 0.5357, + "step": 10080 + }, + { + "epoch": 146.23, + "learning_rate": 2.9031249999999998e-05, + "loss": 0.522, + "step": 10090 + }, + { + "epoch": 146.38, + "learning_rate": 2.8953125000000004e-05, + "loss": 0.5231, + "step": 10100 + }, + { + "epoch": 146.52, + "learning_rate": 2.8875e-05, + "loss": 0.5343, + "step": 10110 + }, + { + "epoch": 146.67, + "learning_rate": 2.8796875e-05, + "loss": 0.5996, + "step": 10120 + }, + { + "epoch": 146.81, + "learning_rate": 2.871875e-05, + "loss": 0.5595, + "step": 10130 + }, + { + "epoch": 146.96, + "learning_rate": 2.8640625e-05, + "loss": 0.5386, + "step": 10140 + }, + { + "epoch": 147.1, + "learning_rate": 2.8562500000000003e-05, + "loss": 0.5338, + "step": 10150 + }, + { + "epoch": 147.25, + "learning_rate": 2.8484375000000003e-05, + "loss": 0.5273, + "step": 10160 + }, + { + "epoch": 147.39, + "learning_rate": 2.840625e-05, + "loss": 0.628, + "step": 10170 + }, + { + "epoch": 147.54, + "learning_rate": 2.8328125e-05, + "loss": 0.5602, + "step": 10180 + }, + { + "epoch": 147.68, + "learning_rate": 2.825e-05, + "loss": 0.621, + "step": 10190 + }, + { + "epoch": 147.83, + "learning_rate": 2.8171875000000002e-05, + "loss": 0.6112, + "step": 10200 + }, + { + "epoch": 147.97, + "learning_rate": 2.8093750000000002e-05, + "loss": 0.4686, + "step": 10210 + }, + { + "epoch": 148.12, + "learning_rate": 2.8015625e-05, + "loss": 0.5582, + "step": 10220 + }, + { + "epoch": 148.26, + "learning_rate": 2.79375e-05, + "loss": 0.5353, + "step": 10230 + }, + { + "epoch": 148.41, + "learning_rate": 2.7859374999999998e-05, + "loss": 0.5545, + "step": 10240 + }, + { + "epoch": 148.55, + "learning_rate": 2.7781250000000004e-05, + "loss": 0.497, + "step": 10250 + }, + { + "epoch": 148.7, + "learning_rate": 2.7703125e-05, + "loss": 0.6022, + "step": 10260 + }, + { + "epoch": 148.84, + "learning_rate": 2.7625e-05, + "loss": 0.5438, + "step": 10270 + }, + { + "epoch": 148.99, + "learning_rate": 2.7546875e-05, + "loss": 0.6067, + "step": 10280 + }, + { + "epoch": 149.13, + "learning_rate": 2.746875e-05, + "loss": 0.5966, + "step": 10290 + }, + { + "epoch": 149.28, + "learning_rate": 2.7390625000000003e-05, + "loss": 0.6345, + "step": 10300 + }, + { + "epoch": 149.42, + "learning_rate": 2.7312500000000003e-05, + "loss": 0.5105, + "step": 10310 + }, + { + "epoch": 149.57, + "learning_rate": 2.7234375e-05, + "loss": 0.573, + "step": 10320 + }, + { + "epoch": 149.71, + "learning_rate": 2.715625e-05, + "loss": 0.5542, + "step": 10330 + }, + { + "epoch": 149.86, + "learning_rate": 2.7078125e-05, + "loss": 0.6154, + "step": 10340 + }, + { + "epoch": 150.0, + "learning_rate": 2.7000000000000002e-05, + "loss": 0.5475, + "step": 10350 + }, + { + "epoch": 150.14, + "learning_rate": 2.6921875000000002e-05, + "loss": 0.5476, + "step": 10360 + }, + { + "epoch": 150.29, + "learning_rate": 2.6843750000000002e-05, + "loss": 0.5667, + "step": 10370 + }, + { + "epoch": 150.43, + "learning_rate": 2.6765625e-05, + "loss": 0.5752, + "step": 10380 + }, + { + "epoch": 150.58, + "learning_rate": 2.6687499999999998e-05, + "loss": 0.5797, + "step": 10390 + }, + { + "epoch": 150.72, + "learning_rate": 2.6609375000000005e-05, + "loss": 0.5585, + "step": 10400 + }, + { + "epoch": 150.87, + "learning_rate": 2.653125e-05, + "loss": 0.6719, + "step": 10410 + }, + { + "epoch": 151.01, + "learning_rate": 2.6453125e-05, + "loss": 0.535, + "step": 10420 + }, + { + "epoch": 151.16, + "learning_rate": 2.6375e-05, + "loss": 0.5853, + "step": 10430 + }, + { + "epoch": 151.3, + "learning_rate": 2.6296874999999997e-05, + "loss": 0.5341, + "step": 10440 + }, + { + "epoch": 151.45, + "learning_rate": 2.6218750000000004e-05, + "loss": 0.4932, + "step": 10450 + }, + { + "epoch": 151.59, + "learning_rate": 2.6140625000000004e-05, + "loss": 0.5924, + "step": 10460 + }, + { + "epoch": 151.74, + "learning_rate": 2.60625e-05, + "loss": 0.5665, + "step": 10470 + }, + { + "epoch": 151.88, + "learning_rate": 2.5984375e-05, + "loss": 0.5202, + "step": 10480 + }, + { + "epoch": 152.03, + "learning_rate": 2.590625e-05, + "loss": 0.5209, + "step": 10490 + }, + { + "epoch": 152.17, + "learning_rate": 2.5828125000000003e-05, + "loss": 0.5874, + "step": 10500 + }, + { + "epoch": 152.17, + "eval_loss": 0.23621943593025208, + "eval_runtime": 575.3543, + "eval_samples_per_second": 5.899, + "eval_steps_per_second": 0.739, + "eval_wer": 0.15330884467782627, + "step": 10500 + }, + { + "epoch": 152.32, + "learning_rate": 2.5750000000000002e-05, + "loss": 0.57, + "step": 10510 + }, + { + "epoch": 152.46, + "learning_rate": 2.5671875000000002e-05, + "loss": 0.58, + "step": 10520 + }, + { + "epoch": 152.61, + "learning_rate": 2.559375e-05, + "loss": 0.5269, + "step": 10530 + }, + { + "epoch": 152.75, + "learning_rate": 2.5515625e-05, + "loss": 0.5519, + "step": 10540 + }, + { + "epoch": 152.9, + "learning_rate": 2.54375e-05, + "loss": 0.497, + "step": 10550 + }, + { + "epoch": 153.04, + "learning_rate": 2.5359375e-05, + "loss": 0.5686, + "step": 10560 + }, + { + "epoch": 153.19, + "learning_rate": 2.528125e-05, + "loss": 0.5219, + "step": 10570 + }, + { + "epoch": 153.33, + "learning_rate": 2.5203125e-05, + "loss": 0.5695, + "step": 10580 + }, + { + "epoch": 153.48, + "learning_rate": 2.5124999999999997e-05, + "loss": 0.5747, + "step": 10590 + }, + { + "epoch": 153.62, + "learning_rate": 2.5046875000000004e-05, + "loss": 0.5297, + "step": 10600 + }, + { + "epoch": 153.77, + "learning_rate": 2.496875e-05, + "loss": 0.5956, + "step": 10610 + }, + { + "epoch": 153.91, + "learning_rate": 2.4890625e-05, + "loss": 0.5752, + "step": 10620 + }, + { + "epoch": 154.06, + "learning_rate": 2.4812500000000003e-05, + "loss": 0.5765, + "step": 10630 + }, + { + "epoch": 154.2, + "learning_rate": 2.4734375e-05, + "loss": 0.5627, + "step": 10640 + }, + { + "epoch": 154.35, + "learning_rate": 2.465625e-05, + "loss": 0.5591, + "step": 10650 + }, + { + "epoch": 154.49, + "learning_rate": 2.4578125000000003e-05, + "loss": 0.5595, + "step": 10660 + }, + { + "epoch": 154.64, + "learning_rate": 2.45e-05, + "loss": 0.5978, + "step": 10670 + }, + { + "epoch": 154.78, + "learning_rate": 2.4421875000000002e-05, + "loss": 0.5249, + "step": 10680 + }, + { + "epoch": 154.93, + "learning_rate": 2.4343750000000002e-05, + "loss": 0.5522, + "step": 10690 + }, + { + "epoch": 155.07, + "learning_rate": 2.4265625e-05, + "loss": 0.5152, + "step": 10700 + }, + { + "epoch": 155.22, + "learning_rate": 2.4187500000000002e-05, + "loss": 0.6, + "step": 10710 + }, + { + "epoch": 155.36, + "learning_rate": 2.4109375e-05, + "loss": 0.588, + "step": 10720 + }, + { + "epoch": 155.51, + "learning_rate": 2.403125e-05, + "loss": 0.52, + "step": 10730 + }, + { + "epoch": 155.65, + "learning_rate": 2.3953125e-05, + "loss": 0.5191, + "step": 10740 + }, + { + "epoch": 155.8, + "learning_rate": 2.3875e-05, + "loss": 0.5456, + "step": 10750 + }, + { + "epoch": 155.94, + "learning_rate": 2.3796875e-05, + "loss": 0.6365, + "step": 10760 + }, + { + "epoch": 156.09, + "learning_rate": 2.371875e-05, + "loss": 0.5344, + "step": 10770 + }, + { + "epoch": 156.23, + "learning_rate": 2.3640625000000004e-05, + "loss": 0.5185, + "step": 10780 + }, + { + "epoch": 156.38, + "learning_rate": 2.35625e-05, + "loss": 0.5619, + "step": 10790 + }, + { + "epoch": 156.52, + "learning_rate": 2.3484375e-05, + "loss": 0.5248, + "step": 10800 + }, + { + "epoch": 156.67, + "learning_rate": 2.3406250000000003e-05, + "loss": 0.499, + "step": 10810 + }, + { + "epoch": 156.81, + "learning_rate": 2.3328125e-05, + "loss": 0.5657, + "step": 10820 + }, + { + "epoch": 156.96, + "learning_rate": 2.3250000000000003e-05, + "loss": 0.52, + "step": 10830 + }, + { + "epoch": 157.1, + "learning_rate": 2.3171875000000003e-05, + "loss": 0.5119, + "step": 10840 + }, + { + "epoch": 157.25, + "learning_rate": 2.309375e-05, + "loss": 0.5749, + "step": 10850 + }, + { + "epoch": 157.39, + "learning_rate": 2.3015625000000002e-05, + "loss": 0.57, + "step": 10860 + }, + { + "epoch": 157.54, + "learning_rate": 2.2937500000000002e-05, + "loss": 0.5096, + "step": 10870 + }, + { + "epoch": 157.68, + "learning_rate": 2.2859375e-05, + "loss": 0.5025, + "step": 10880 + }, + { + "epoch": 157.83, + "learning_rate": 2.278125e-05, + "loss": 0.5884, + "step": 10890 + }, + { + "epoch": 157.97, + "learning_rate": 2.2703125e-05, + "loss": 0.4872, + "step": 10900 + }, + { + "epoch": 158.12, + "learning_rate": 2.2625e-05, + "loss": 0.5518, + "step": 10910 + }, + { + "epoch": 158.26, + "learning_rate": 2.2546875e-05, + "loss": 0.5348, + "step": 10920 + }, + { + "epoch": 158.41, + "learning_rate": 2.246875e-05, + "loss": 0.5997, + "step": 10930 + }, + { + "epoch": 158.55, + "learning_rate": 2.2390625e-05, + "loss": 0.5602, + "step": 10940 + }, + { + "epoch": 158.7, + "learning_rate": 2.23125e-05, + "loss": 0.5996, + "step": 10950 + }, + { + "epoch": 158.84, + "learning_rate": 2.2234375e-05, + "loss": 0.6152, + "step": 10960 + }, + { + "epoch": 158.99, + "learning_rate": 2.215625e-05, + "loss": 0.5291, + "step": 10970 + }, + { + "epoch": 159.13, + "learning_rate": 2.2078125000000003e-05, + "loss": 0.55, + "step": 10980 + }, + { + "epoch": 159.28, + "learning_rate": 2.2000000000000003e-05, + "loss": 0.6242, + "step": 10990 + }, + { + "epoch": 159.42, + "learning_rate": 2.1921875e-05, + "loss": 0.5546, + "step": 11000 + }, + { + "epoch": 159.42, + "eval_loss": 0.23421239852905273, + "eval_runtime": 576.3026, + "eval_samples_per_second": 5.889, + "eval_steps_per_second": 0.737, + "eval_wer": 0.15427852969454922, + "step": 11000 + }, + { + "epoch": 159.57, + "learning_rate": 2.1843750000000002e-05, + "loss": 0.5893, + "step": 11010 + }, + { + "epoch": 159.71, + "learning_rate": 2.1765625000000002e-05, + "loss": 0.5654, + "step": 11020 + }, + { + "epoch": 159.86, + "learning_rate": 2.1687500000000002e-05, + "loss": 0.5748, + "step": 11030 + }, + { + "epoch": 160.0, + "learning_rate": 2.1609375000000002e-05, + "loss": 0.5317, + "step": 11040 + }, + { + "epoch": 160.14, + "learning_rate": 2.153125e-05, + "loss": 0.4892, + "step": 11050 + }, + { + "epoch": 160.29, + "learning_rate": 2.1453125e-05, + "loss": 0.544, + "step": 11060 + }, + { + "epoch": 160.43, + "learning_rate": 2.1375e-05, + "loss": 0.5197, + "step": 11070 + }, + { + "epoch": 160.58, + "learning_rate": 2.1296875e-05, + "loss": 0.5127, + "step": 11080 + }, + { + "epoch": 160.72, + "learning_rate": 2.121875e-05, + "loss": 0.5438, + "step": 11090 + }, + { + "epoch": 160.87, + "learning_rate": 2.1140625e-05, + "loss": 0.5184, + "step": 11100 + }, + { + "epoch": 161.01, + "learning_rate": 2.10625e-05, + "loss": 0.6122, + "step": 11110 + }, + { + "epoch": 161.16, + "learning_rate": 2.0984375e-05, + "loss": 0.5086, + "step": 11120 + }, + { + "epoch": 161.3, + "learning_rate": 2.0906250000000003e-05, + "loss": 0.6104, + "step": 11130 + }, + { + "epoch": 161.45, + "learning_rate": 2.0828125e-05, + "loss": 0.5168, + "step": 11140 + }, + { + "epoch": 161.59, + "learning_rate": 2.075e-05, + "loss": 0.5145, + "step": 11150 + }, + { + "epoch": 161.74, + "learning_rate": 2.0671875000000003e-05, + "loss": 0.5389, + "step": 11160 + }, + { + "epoch": 161.88, + "learning_rate": 2.059375e-05, + "loss": 0.671, + "step": 11170 + }, + { + "epoch": 162.03, + "learning_rate": 2.05234375e-05, + "loss": 0.5659, + "step": 11180 + }, + { + "epoch": 162.17, + "learning_rate": 2.0453125e-05, + "loss": 0.5035, + "step": 11190 + }, + { + "epoch": 162.32, + "learning_rate": 2.0375e-05, + "loss": 0.5038, + "step": 11200 + }, + { + "epoch": 162.46, + "learning_rate": 2.0296875e-05, + "loss": 0.6035, + "step": 11210 + }, + { + "epoch": 162.61, + "learning_rate": 2.021875e-05, + "loss": 0.5472, + "step": 11220 + }, + { + "epoch": 162.75, + "learning_rate": 2.0140625000000003e-05, + "loss": 0.5149, + "step": 11230 + }, + { + "epoch": 162.9, + "learning_rate": 2.00625e-05, + "loss": 0.6043, + "step": 11240 + }, + { + "epoch": 163.04, + "learning_rate": 1.9984375e-05, + "loss": 0.5377, + "step": 11250 + }, + { + "epoch": 163.19, + "learning_rate": 1.9906250000000003e-05, + "loss": 0.5328, + "step": 11260 + }, + { + "epoch": 163.33, + "learning_rate": 1.9828125e-05, + "loss": 0.4993, + "step": 11270 + }, + { + "epoch": 163.48, + "learning_rate": 1.9750000000000002e-05, + "loss": 0.5234, + "step": 11280 + }, + { + "epoch": 163.62, + "learning_rate": 1.9671875000000002e-05, + "loss": 0.6392, + "step": 11290 + }, + { + "epoch": 163.77, + "learning_rate": 1.959375e-05, + "loss": 0.5308, + "step": 11300 + }, + { + "epoch": 163.91, + "learning_rate": 1.9515625000000002e-05, + "loss": 0.5907, + "step": 11310 + }, + { + "epoch": 164.06, + "learning_rate": 1.94375e-05, + "loss": 0.5679, + "step": 11320 + }, + { + "epoch": 164.2, + "learning_rate": 1.9359375e-05, + "loss": 0.5094, + "step": 11330 + }, + { + "epoch": 164.35, + "learning_rate": 1.928125e-05, + "loss": 0.5877, + "step": 11340 + }, + { + "epoch": 164.49, + "learning_rate": 1.9203125e-05, + "loss": 0.5177, + "step": 11350 + }, + { + "epoch": 164.64, + "learning_rate": 1.9125e-05, + "loss": 0.5452, + "step": 11360 + }, + { + "epoch": 164.78, + "learning_rate": 1.9046875e-05, + "loss": 0.568, + "step": 11370 + }, + { + "epoch": 164.93, + "learning_rate": 1.896875e-05, + "loss": 0.5717, + "step": 11380 + }, + { + "epoch": 165.07, + "learning_rate": 1.8890625e-05, + "loss": 0.5861, + "step": 11390 + }, + { + "epoch": 165.22, + "learning_rate": 1.88125e-05, + "loss": 0.5153, + "step": 11400 + }, + { + "epoch": 165.36, + "learning_rate": 1.8734375e-05, + "loss": 0.5797, + "step": 11410 + }, + { + "epoch": 165.51, + "learning_rate": 1.865625e-05, + "loss": 0.5237, + "step": 11420 + }, + { + "epoch": 165.65, + "learning_rate": 1.8578125000000003e-05, + "loss": 0.5606, + "step": 11430 + }, + { + "epoch": 165.8, + "learning_rate": 1.85e-05, + "loss": 0.5389, + "step": 11440 + }, + { + "epoch": 165.94, + "learning_rate": 1.8421875e-05, + "loss": 0.572, + "step": 11450 + }, + { + "epoch": 166.09, + "learning_rate": 1.8343750000000002e-05, + "loss": 0.5169, + "step": 11460 + }, + { + "epoch": 166.23, + "learning_rate": 1.8265625e-05, + "loss": 0.506, + "step": 11470 + }, + { + "epoch": 166.38, + "learning_rate": 1.81875e-05, + "loss": 0.5406, + "step": 11480 + }, + { + "epoch": 166.52, + "learning_rate": 1.8109375e-05, + "loss": 0.5913, + "step": 11490 + }, + { + "epoch": 166.67, + "learning_rate": 1.803125e-05, + "loss": 0.6294, + "step": 11500 + }, + { + "epoch": 166.67, + "eval_loss": 0.23806829750537872, + "eval_runtime": 576.0097, + "eval_samples_per_second": 5.892, + "eval_steps_per_second": 0.738, + "eval_wer": 0.15362933379352284, + "step": 11500 + }, + { + "epoch": 166.81, + "learning_rate": 1.7953125e-05, + "loss": 0.51, + "step": 11510 + }, + { + "epoch": 166.96, + "learning_rate": 1.7875e-05, + "loss": 0.5439, + "step": 11520 + }, + { + "epoch": 167.1, + "learning_rate": 1.7796875e-05, + "loss": 0.4877, + "step": 11530 + }, + { + "epoch": 167.25, + "learning_rate": 1.771875e-05, + "loss": 0.5299, + "step": 11540 + }, + { + "epoch": 167.39, + "learning_rate": 1.7640625e-05, + "loss": 0.5475, + "step": 11550 + }, + { + "epoch": 167.54, + "learning_rate": 1.75625e-05, + "loss": 0.5661, + "step": 11560 + }, + { + "epoch": 167.68, + "learning_rate": 1.7484375e-05, + "loss": 0.5416, + "step": 11570 + }, + { + "epoch": 167.83, + "learning_rate": 1.7406250000000003e-05, + "loss": 0.4929, + "step": 11580 + }, + { + "epoch": 167.97, + "learning_rate": 1.7328125e-05, + "loss": 0.5195, + "step": 11590 + }, + { + "epoch": 168.12, + "learning_rate": 1.725e-05, + "loss": 0.5883, + "step": 11600 + }, + { + "epoch": 168.26, + "learning_rate": 1.7171875000000002e-05, + "loss": 0.5723, + "step": 11610 + }, + { + "epoch": 168.41, + "learning_rate": 1.709375e-05, + "loss": 0.5621, + "step": 11620 + }, + { + "epoch": 168.55, + "learning_rate": 1.7015625000000002e-05, + "loss": 0.5567, + "step": 11630 + }, + { + "epoch": 168.7, + "learning_rate": 1.6937500000000002e-05, + "loss": 0.4995, + "step": 11640 + }, + { + "epoch": 168.84, + "learning_rate": 1.6859374999999998e-05, + "loss": 0.5601, + "step": 11650 + }, + { + "epoch": 168.99, + "learning_rate": 1.678125e-05, + "loss": 0.5122, + "step": 11660 + }, + { + "epoch": 169.13, + "learning_rate": 1.6703125e-05, + "loss": 0.5406, + "step": 11670 + }, + { + "epoch": 169.28, + "learning_rate": 1.6625e-05, + "loss": 0.4914, + "step": 11680 + }, + { + "epoch": 169.42, + "learning_rate": 1.6546875e-05, + "loss": 0.6525, + "step": 11690 + }, + { + "epoch": 169.57, + "learning_rate": 1.646875e-05, + "loss": 0.5181, + "step": 11700 + }, + { + "epoch": 169.71, + "learning_rate": 1.6390625e-05, + "loss": 0.5328, + "step": 11710 + }, + { + "epoch": 169.86, + "learning_rate": 1.63125e-05, + "loss": 0.5583, + "step": 11720 + }, + { + "epoch": 170.0, + "learning_rate": 1.6234375000000003e-05, + "loss": 0.5376, + "step": 11730 + }, + { + "epoch": 170.14, + "learning_rate": 1.615625e-05, + "loss": 0.5182, + "step": 11740 + }, + { + "epoch": 170.29, + "learning_rate": 1.6078125e-05, + "loss": 0.4756, + "step": 11750 + }, + { + "epoch": 170.43, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.5295, + "step": 11760 + }, + { + "epoch": 170.58, + "learning_rate": 1.5921875e-05, + "loss": 0.5482, + "step": 11770 + }, + { + "epoch": 170.72, + "learning_rate": 1.5843750000000002e-05, + "loss": 0.5513, + "step": 11780 + }, + { + "epoch": 170.87, + "learning_rate": 1.5765625000000002e-05, + "loss": 0.5341, + "step": 11790 + }, + { + "epoch": 171.01, + "learning_rate": 1.56875e-05, + "loss": 0.4755, + "step": 11800 + }, + { + "epoch": 171.16, + "learning_rate": 1.5609375e-05, + "loss": 0.5352, + "step": 11810 + }, + { + "epoch": 171.3, + "learning_rate": 1.553125e-05, + "loss": 0.5318, + "step": 11820 + }, + { + "epoch": 171.45, + "learning_rate": 1.5453125e-05, + "loss": 0.4488, + "step": 11830 + }, + { + "epoch": 171.59, + "learning_rate": 1.5375e-05, + "loss": 0.6219, + "step": 11840 + }, + { + "epoch": 171.74, + "learning_rate": 1.5296875e-05, + "loss": 0.5449, + "step": 11850 + }, + { + "epoch": 171.88, + "learning_rate": 1.521875e-05, + "loss": 0.5278, + "step": 11860 + }, + { + "epoch": 172.03, + "learning_rate": 1.5140625e-05, + "loss": 0.539, + "step": 11870 + }, + { + "epoch": 172.17, + "learning_rate": 1.5062500000000002e-05, + "loss": 0.5436, + "step": 11880 + }, + { + "epoch": 172.32, + "learning_rate": 1.4984375e-05, + "loss": 0.5874, + "step": 11890 + }, + { + "epoch": 172.46, + "learning_rate": 1.490625e-05, + "loss": 0.5529, + "step": 11900 + }, + { + "epoch": 172.61, + "learning_rate": 1.4828125000000001e-05, + "loss": 0.5403, + "step": 11910 + }, + { + "epoch": 172.75, + "learning_rate": 1.475e-05, + "loss": 0.4999, + "step": 11920 + }, + { + "epoch": 172.9, + "learning_rate": 1.4671875000000001e-05, + "loss": 0.6045, + "step": 11930 + }, + { + "epoch": 173.04, + "learning_rate": 1.459375e-05, + "loss": 0.5237, + "step": 11940 + }, + { + "epoch": 173.19, + "learning_rate": 1.4515624999999999e-05, + "loss": 0.5556, + "step": 11950 + }, + { + "epoch": 173.33, + "learning_rate": 1.44375e-05, + "loss": 0.5837, + "step": 11960 + }, + { + "epoch": 173.48, + "learning_rate": 1.4359375e-05, + "loss": 0.5021, + "step": 11970 + }, + { + "epoch": 173.62, + "learning_rate": 1.4281250000000002e-05, + "loss": 0.5294, + "step": 11980 + }, + { + "epoch": 173.77, + "learning_rate": 1.4203125e-05, + "loss": 0.5379, + "step": 11990 + }, + { + "epoch": 173.91, + "learning_rate": 1.4125e-05, + "loss": 0.5989, + "step": 12000 + }, + { + "epoch": 173.91, + "eval_loss": 0.23597407341003418, + "eval_runtime": 575.4715, + "eval_samples_per_second": 5.898, + "eval_steps_per_second": 0.739, + "eval_wer": 0.1526843017856996, + "step": 12000 + }, + { + "epoch": 174.06, + "learning_rate": 1.4046875000000001e-05, + "loss": 0.5167, + "step": 12010 + }, + { + "epoch": 174.2, + "learning_rate": 1.396875e-05, + "loss": 0.5127, + "step": 12020 + }, + { + "epoch": 174.35, + "learning_rate": 1.3890625000000002e-05, + "loss": 0.613, + "step": 12030 + }, + { + "epoch": 174.49, + "learning_rate": 1.38125e-05, + "loss": 0.5287, + "step": 12040 + }, + { + "epoch": 174.64, + "learning_rate": 1.3734375e-05, + "loss": 0.5609, + "step": 12050 + }, + { + "epoch": 174.78, + "learning_rate": 1.3656250000000002e-05, + "loss": 0.5278, + "step": 12060 + }, + { + "epoch": 174.93, + "learning_rate": 1.3578125e-05, + "loss": 0.5928, + "step": 12070 + }, + { + "epoch": 175.07, + "learning_rate": 1.3500000000000001e-05, + "loss": 0.56, + "step": 12080 + }, + { + "epoch": 175.22, + "learning_rate": 1.3421875000000001e-05, + "loss": 0.5716, + "step": 12090 + }, + { + "epoch": 175.36, + "learning_rate": 1.3343749999999999e-05, + "loss": 0.5499, + "step": 12100 + }, + { + "epoch": 175.51, + "learning_rate": 1.3265625e-05, + "loss": 0.476, + "step": 12110 + }, + { + "epoch": 175.65, + "learning_rate": 1.31875e-05, + "loss": 0.5134, + "step": 12120 + }, + { + "epoch": 175.8, + "learning_rate": 1.3109375000000002e-05, + "loss": 0.5093, + "step": 12130 + }, + { + "epoch": 175.94, + "learning_rate": 1.303125e-05, + "loss": 0.579, + "step": 12140 + }, + { + "epoch": 176.09, + "learning_rate": 1.2953125e-05, + "loss": 0.4885, + "step": 12150 + }, + { + "epoch": 176.23, + "learning_rate": 1.2875000000000001e-05, + "loss": 0.5433, + "step": 12160 + }, + { + "epoch": 176.38, + "learning_rate": 1.2796875e-05, + "loss": 0.5137, + "step": 12170 + }, + { + "epoch": 176.52, + "learning_rate": 1.271875e-05, + "loss": 0.5202, + "step": 12180 + }, + { + "epoch": 176.67, + "learning_rate": 1.2640625e-05, + "loss": 0.5336, + "step": 12190 + }, + { + "epoch": 176.81, + "learning_rate": 1.2562499999999999e-05, + "loss": 0.5573, + "step": 12200 + }, + { + "epoch": 176.96, + "learning_rate": 1.2484375e-05, + "loss": 0.4605, + "step": 12210 + }, + { + "epoch": 177.1, + "learning_rate": 1.2406250000000002e-05, + "loss": 0.4848, + "step": 12220 + }, + { + "epoch": 177.25, + "learning_rate": 1.2328125e-05, + "loss": 0.5071, + "step": 12230 + }, + { + "epoch": 177.39, + "learning_rate": 1.225e-05, + "loss": 0.4797, + "step": 12240 + }, + { + "epoch": 177.54, + "learning_rate": 1.2171875000000001e-05, + "loss": 0.5308, + "step": 12250 + }, + { + "epoch": 177.68, + "learning_rate": 1.2093750000000001e-05, + "loss": 0.6085, + "step": 12260 + }, + { + "epoch": 177.83, + "learning_rate": 1.2015625e-05, + "loss": 0.5489, + "step": 12270 + }, + { + "epoch": 177.97, + "learning_rate": 1.19375e-05, + "loss": 0.4995, + "step": 12280 + }, + { + "epoch": 178.12, + "learning_rate": 1.1859375e-05, + "loss": 0.5686, + "step": 12290 + }, + { + "epoch": 178.26, + "learning_rate": 1.178125e-05, + "loss": 0.5471, + "step": 12300 + }, + { + "epoch": 178.41, + "learning_rate": 1.1703125000000002e-05, + "loss": 0.5439, + "step": 12310 + }, + { + "epoch": 178.55, + "learning_rate": 1.1625000000000001e-05, + "loss": 0.53, + "step": 12320 + }, + { + "epoch": 178.7, + "learning_rate": 1.1546875e-05, + "loss": 0.5401, + "step": 12330 + }, + { + "epoch": 178.84, + "learning_rate": 1.1468750000000001e-05, + "loss": 0.5493, + "step": 12340 + }, + { + "epoch": 178.99, + "learning_rate": 1.1390625e-05, + "loss": 0.5021, + "step": 12350 + }, + { + "epoch": 179.13, + "learning_rate": 1.13125e-05, + "loss": 0.5518, + "step": 12360 + }, + { + "epoch": 179.28, + "learning_rate": 1.1234375e-05, + "loss": 0.512, + "step": 12370 + }, + { + "epoch": 179.42, + "learning_rate": 1.115625e-05, + "loss": 0.503, + "step": 12380 + }, + { + "epoch": 179.57, + "learning_rate": 1.1078125e-05, + "loss": 0.5771, + "step": 12390 + }, + { + "epoch": 179.71, + "learning_rate": 1.1000000000000001e-05, + "loss": 0.5227, + "step": 12400 + }, + { + "epoch": 179.86, + "learning_rate": 1.0921875000000001e-05, + "loss": 0.5993, + "step": 12410 + }, + { + "epoch": 180.0, + "learning_rate": 1.0843750000000001e-05, + "loss": 0.4581, + "step": 12420 + }, + { + "epoch": 180.14, + "learning_rate": 1.0765625e-05, + "loss": 0.523, + "step": 12430 + }, + { + "epoch": 180.29, + "learning_rate": 1.06875e-05, + "loss": 0.5472, + "step": 12440 + }, + { + "epoch": 180.43, + "learning_rate": 1.0609375e-05, + "loss": 0.595, + "step": 12450 + }, + { + "epoch": 180.58, + "learning_rate": 1.053125e-05, + "loss": 0.5503, + "step": 12460 + }, + { + "epoch": 180.72, + "learning_rate": 1.0453125000000002e-05, + "loss": 0.5245, + "step": 12470 + }, + { + "epoch": 180.87, + "learning_rate": 1.0375e-05, + "loss": 0.5567, + "step": 12480 + }, + { + "epoch": 181.01, + "learning_rate": 1.0296875e-05, + "loss": 0.4907, + "step": 12490 + }, + { + "epoch": 181.16, + "learning_rate": 1.0218750000000001e-05, + "loss": 0.5697, + "step": 12500 + }, + { + "epoch": 181.16, + "eval_loss": 0.23992973566055298, + "eval_runtime": 578.3824, + "eval_samples_per_second": 5.868, + "eval_steps_per_second": 0.735, + "eval_wer": 0.15259390741973392, + "step": 12500 + }, + { + "epoch": 181.3, + "learning_rate": 1.0140625000000001e-05, + "loss": 0.5403, + "step": 12510 + }, + { + "epoch": 181.45, + "learning_rate": 1.00625e-05, + "loss": 0.5109, + "step": 12520 + }, + { + "epoch": 181.59, + "learning_rate": 9.984375e-06, + "loss": 0.4829, + "step": 12530 + }, + { + "epoch": 181.74, + "learning_rate": 9.90625e-06, + "loss": 0.5025, + "step": 12540 + }, + { + "epoch": 181.88, + "learning_rate": 9.828125e-06, + "loss": 0.5442, + "step": 12550 + }, + { + "epoch": 182.03, + "learning_rate": 9.750000000000002e-06, + "loss": 0.5034, + "step": 12560 + }, + { + "epoch": 182.17, + "learning_rate": 9.671875000000001e-06, + "loss": 0.4793, + "step": 12570 + }, + { + "epoch": 182.32, + "learning_rate": 9.59375e-06, + "loss": 0.5074, + "step": 12580 + }, + { + "epoch": 182.46, + "learning_rate": 9.515625000000001e-06, + "loss": 0.5132, + "step": 12590 + }, + { + "epoch": 182.61, + "learning_rate": 9.4375e-06, + "loss": 0.5744, + "step": 12600 + }, + { + "epoch": 182.75, + "learning_rate": 9.359375e-06, + "loss": 0.4714, + "step": 12610 + }, + { + "epoch": 182.9, + "learning_rate": 9.28125e-06, + "loss": 0.566, + "step": 12620 + }, + { + "epoch": 183.04, + "learning_rate": 9.203125e-06, + "loss": 0.5804, + "step": 12630 + }, + { + "epoch": 183.19, + "learning_rate": 9.125e-06, + "loss": 0.4781, + "step": 12640 + }, + { + "epoch": 183.33, + "learning_rate": 9.046875e-06, + "loss": 0.4948, + "step": 12650 + }, + { + "epoch": 183.48, + "learning_rate": 8.968750000000001e-06, + "loss": 0.4981, + "step": 12660 + }, + { + "epoch": 183.62, + "learning_rate": 8.890625000000001e-06, + "loss": 0.5249, + "step": 12670 + }, + { + "epoch": 183.77, + "learning_rate": 8.8125e-06, + "loss": 0.5197, + "step": 12680 + }, + { + "epoch": 183.91, + "learning_rate": 8.734375e-06, + "loss": 0.5002, + "step": 12690 + }, + { + "epoch": 184.06, + "learning_rate": 8.65625e-06, + "loss": 0.5451, + "step": 12700 + }, + { + "epoch": 184.2, + "learning_rate": 8.578125e-06, + "loss": 0.5132, + "step": 12710 + }, + { + "epoch": 184.35, + "learning_rate": 8.500000000000002e-06, + "loss": 0.4806, + "step": 12720 + }, + { + "epoch": 184.49, + "learning_rate": 8.421875e-06, + "loss": 0.5207, + "step": 12730 + }, + { + "epoch": 184.64, + "learning_rate": 8.34375e-06, + "loss": 0.5618, + "step": 12740 + }, + { + "epoch": 184.78, + "learning_rate": 8.265625000000001e-06, + "loss": 0.5574, + "step": 12750 + }, + { + "epoch": 184.93, + "learning_rate": 8.1875e-06, + "loss": 0.5181, + "step": 12760 + }, + { + "epoch": 185.07, + "learning_rate": 8.109375e-06, + "loss": 0.5471, + "step": 12770 + }, + { + "epoch": 185.22, + "learning_rate": 8.03125e-06, + "loss": 0.4863, + "step": 12780 + }, + { + "epoch": 185.36, + "learning_rate": 7.953125e-06, + "loss": 0.547, + "step": 12790 + }, + { + "epoch": 185.51, + "learning_rate": 7.875e-06, + "loss": 0.5103, + "step": 12800 + }, + { + "epoch": 185.65, + "learning_rate": 7.796875000000001e-06, + "loss": 0.5406, + "step": 12810 + }, + { + "epoch": 185.8, + "learning_rate": 7.718750000000001e-06, + "loss": 0.5136, + "step": 12820 + }, + { + "epoch": 185.94, + "learning_rate": 7.640625e-06, + "loss": 0.4991, + "step": 12830 + }, + { + "epoch": 186.09, + "learning_rate": 7.5625e-06, + "loss": 0.5515, + "step": 12840 + }, + { + "epoch": 186.23, + "learning_rate": 7.484375000000001e-06, + "loss": 0.4893, + "step": 12850 + }, + { + "epoch": 186.38, + "learning_rate": 7.4062500000000005e-06, + "loss": 0.6101, + "step": 12860 + }, + { + "epoch": 186.52, + "learning_rate": 7.328125000000001e-06, + "loss": 0.5348, + "step": 12870 + }, + { + "epoch": 186.67, + "learning_rate": 7.25e-06, + "loss": 0.554, + "step": 12880 + }, + { + "epoch": 186.81, + "learning_rate": 7.171875e-06, + "loss": 0.5119, + "step": 12890 + }, + { + "epoch": 186.96, + "learning_rate": 7.0937500000000005e-06, + "loss": 0.5348, + "step": 12900 + }, + { + "epoch": 187.1, + "learning_rate": 7.015625e-06, + "loss": 0.5884, + "step": 12910 + }, + { + "epoch": 187.25, + "learning_rate": 6.937500000000001e-06, + "loss": 0.496, + "step": 12920 + }, + { + "epoch": 187.39, + "learning_rate": 6.859375e-06, + "loss": 0.5119, + "step": 12930 + }, + { + "epoch": 187.54, + "learning_rate": 6.7812500000000005e-06, + "loss": 0.5284, + "step": 12940 + }, + { + "epoch": 187.68, + "learning_rate": 6.703125e-06, + "loss": 0.5597, + "step": 12950 + }, + { + "epoch": 187.83, + "learning_rate": 6.625000000000001e-06, + "loss": 0.5816, + "step": 12960 + }, + { + "epoch": 187.97, + "learning_rate": 6.546875000000001e-06, + "loss": 0.5311, + "step": 12970 + }, + { + "epoch": 188.12, + "learning_rate": 6.46875e-06, + "loss": 0.5023, + "step": 12980 + }, + { + "epoch": 188.26, + "learning_rate": 6.390625e-06, + "loss": 0.5199, + "step": 12990 + }, + { + "epoch": 188.41, + "learning_rate": 6.3125e-06, + "loss": 0.5379, + "step": 13000 + }, + { + "epoch": 188.41, + "eval_loss": 0.2375340759754181, + "eval_runtime": 574.5803, + "eval_samples_per_second": 5.907, + "eval_steps_per_second": 0.74, + "eval_wer": 0.15229807131293707, + "step": 13000 + }, + { + "epoch": 188.55, + "learning_rate": 6.234375000000001e-06, + "loss": 0.5227, + "step": 13010 + }, + { + "epoch": 188.7, + "learning_rate": 6.1562500000000006e-06, + "loss": 0.4946, + "step": 13020 + }, + { + "epoch": 188.84, + "learning_rate": 6.078125e-06, + "loss": 0.5618, + "step": 13030 + }, + { + "epoch": 188.99, + "learning_rate": 6e-06, + "loss": 0.5023, + "step": 13040 + }, + { + "epoch": 189.13, + "learning_rate": 5.921875e-06, + "loss": 0.4884, + "step": 13050 + }, + { + "epoch": 189.28, + "learning_rate": 5.843750000000001e-06, + "loss": 0.4912, + "step": 13060 + }, + { + "epoch": 189.42, + "learning_rate": 5.765625e-06, + "loss": 0.5511, + "step": 13070 + }, + { + "epoch": 189.57, + "learning_rate": 5.6875e-06, + "loss": 0.542, + "step": 13080 + }, + { + "epoch": 189.71, + "learning_rate": 5.609375e-06, + "loss": 0.5275, + "step": 13090 + }, + { + "epoch": 189.86, + "learning_rate": 5.531250000000001e-06, + "loss": 0.5719, + "step": 13100 + }, + { + "epoch": 190.0, + "learning_rate": 5.453125e-06, + "loss": 0.5686, + "step": 13110 + }, + { + "epoch": 190.14, + "learning_rate": 5.375e-06, + "loss": 0.5425, + "step": 13120 + }, + { + "epoch": 190.29, + "learning_rate": 5.296875000000001e-06, + "loss": 0.5322, + "step": 13130 + }, + { + "epoch": 190.43, + "learning_rate": 5.21875e-06, + "loss": 0.5146, + "step": 13140 + }, + { + "epoch": 190.58, + "learning_rate": 5.1406250000000004e-06, + "loss": 0.5364, + "step": 13150 + }, + { + "epoch": 190.72, + "learning_rate": 5.0625e-06, + "loss": 0.496, + "step": 13160 + }, + { + "epoch": 190.87, + "learning_rate": 4.984375e-06, + "loss": 0.6167, + "step": 13170 + }, + { + "epoch": 191.01, + "learning_rate": 4.906250000000001e-06, + "loss": 0.4898, + "step": 13180 + }, + { + "epoch": 191.16, + "learning_rate": 4.828125e-06, + "loss": 0.5216, + "step": 13190 + }, + { + "epoch": 191.3, + "learning_rate": 4.75e-06, + "loss": 0.5122, + "step": 13200 + }, + { + "epoch": 191.45, + "learning_rate": 4.671875000000001e-06, + "loss": 0.5293, + "step": 13210 + }, + { + "epoch": 191.59, + "learning_rate": 4.59375e-06, + "loss": 0.5072, + "step": 13220 + }, + { + "epoch": 191.74, + "learning_rate": 4.5156250000000005e-06, + "loss": 0.4934, + "step": 13230 + }, + { + "epoch": 191.88, + "learning_rate": 4.4375e-06, + "loss": 0.5224, + "step": 13240 + }, + { + "epoch": 192.03, + "learning_rate": 4.359375e-06, + "loss": 0.4804, + "step": 13250 + }, + { + "epoch": 192.17, + "learning_rate": 4.281250000000001e-06, + "loss": 0.6039, + "step": 13260 + }, + { + "epoch": 192.32, + "learning_rate": 4.2031250000000005e-06, + "loss": 0.4833, + "step": 13270 + }, + { + "epoch": 192.46, + "learning_rate": 4.125e-06, + "loss": 0.5236, + "step": 13280 + }, + { + "epoch": 192.61, + "learning_rate": 4.046875e-06, + "loss": 0.5312, + "step": 13290 + }, + { + "epoch": 192.75, + "learning_rate": 3.96875e-06, + "loss": 0.5886, + "step": 13300 + }, + { + "epoch": 192.9, + "learning_rate": 3.8906250000000005e-06, + "loss": 0.5098, + "step": 13310 + }, + { + "epoch": 193.04, + "learning_rate": 3.8125e-06, + "loss": 0.5223, + "step": 13320 + }, + { + "epoch": 193.19, + "learning_rate": 3.7343750000000006e-06, + "loss": 0.581, + "step": 13330 + }, + { + "epoch": 193.33, + "learning_rate": 3.65625e-06, + "loss": 0.4837, + "step": 13340 + }, + { + "epoch": 193.48, + "learning_rate": 3.578125e-06, + "loss": 0.5184, + "step": 13350 + }, + { + "epoch": 193.62, + "learning_rate": 3.5000000000000004e-06, + "loss": 0.5145, + "step": 13360 + }, + { + "epoch": 193.77, + "learning_rate": 3.421875e-06, + "loss": 0.4956, + "step": 13370 + }, + { + "epoch": 193.91, + "learning_rate": 3.3437500000000004e-06, + "loss": 0.5211, + "step": 13380 + }, + { + "epoch": 194.06, + "learning_rate": 3.2656249999999998e-06, + "loss": 0.5218, + "step": 13390 + }, + { + "epoch": 194.2, + "learning_rate": 3.1875000000000004e-06, + "loss": 0.5457, + "step": 13400 + }, + { + "epoch": 194.35, + "learning_rate": 3.109375e-06, + "loss": 0.4737, + "step": 13410 + }, + { + "epoch": 194.49, + "learning_rate": 3.03125e-06, + "loss": 0.5828, + "step": 13420 + }, + { + "epoch": 194.64, + "learning_rate": 2.9531249999999998e-06, + "loss": 0.5597, + "step": 13430 + }, + { + "epoch": 194.78, + "learning_rate": 2.8750000000000004e-06, + "loss": 0.5, + "step": 13440 + }, + { + "epoch": 194.93, + "learning_rate": 2.7968750000000002e-06, + "loss": 0.5694, + "step": 13450 + }, + { + "epoch": 195.07, + "learning_rate": 2.71875e-06, + "loss": 0.5176, + "step": 13460 + }, + { + "epoch": 195.22, + "learning_rate": 2.6406250000000002e-06, + "loss": 0.5216, + "step": 13470 + }, + { + "epoch": 195.36, + "learning_rate": 2.5625e-06, + "loss": 0.489, + "step": 13480 + }, + { + "epoch": 195.51, + "learning_rate": 2.4843750000000002e-06, + "loss": 0.5186, + "step": 13490 + }, + { + "epoch": 195.65, + "learning_rate": 2.40625e-06, + "loss": 0.5022, + "step": 13500 + }, + { + "epoch": 195.65, + "eval_loss": 0.23950409889221191, + "eval_runtime": 592.332, + "eval_samples_per_second": 5.73, + "eval_steps_per_second": 0.718, + "eval_wer": 0.15194471151870753, + "step": 13500 + }, + { + "epoch": 195.8, + "learning_rate": 2.3281250000000003e-06, + "loss": 0.5106, + "step": 13510 + }, + { + "epoch": 195.94, + "learning_rate": 2.25e-06, + "loss": 0.4966, + "step": 13520 + }, + { + "epoch": 196.09, + "learning_rate": 2.171875e-06, + "loss": 0.5053, + "step": 13530 + }, + { + "epoch": 196.23, + "learning_rate": 2.09375e-06, + "loss": 0.5594, + "step": 13540 + }, + { + "epoch": 196.38, + "learning_rate": 2.0156250000000003e-06, + "loss": 0.5047, + "step": 13550 + }, + { + "epoch": 196.52, + "learning_rate": 1.9375e-06, + "loss": 0.5078, + "step": 13560 + }, + { + "epoch": 196.67, + "learning_rate": 1.859375e-06, + "loss": 0.5466, + "step": 13570 + }, + { + "epoch": 196.81, + "learning_rate": 1.7812499999999999e-06, + "loss": 0.5371, + "step": 13580 + }, + { + "epoch": 196.96, + "learning_rate": 1.703125e-06, + "loss": 0.5551, + "step": 13590 + }, + { + "epoch": 197.1, + "learning_rate": 1.6250000000000001e-06, + "loss": 0.5615, + "step": 13600 + }, + { + "epoch": 197.25, + "learning_rate": 1.5468750000000001e-06, + "loss": 0.5464, + "step": 13610 + }, + { + "epoch": 197.39, + "learning_rate": 1.46875e-06, + "loss": 0.5196, + "step": 13620 + }, + { + "epoch": 197.54, + "learning_rate": 1.3906250000000001e-06, + "loss": 0.4493, + "step": 13630 + }, + { + "epoch": 197.68, + "learning_rate": 1.3125e-06, + "loss": 0.5603, + "step": 13640 + }, + { + "epoch": 197.83, + "learning_rate": 1.2343750000000001e-06, + "loss": 0.5227, + "step": 13650 + }, + { + "epoch": 197.97, + "learning_rate": 1.15625e-06, + "loss": 0.4889, + "step": 13660 + }, + { + "epoch": 198.12, + "learning_rate": 1.078125e-06, + "loss": 0.5893, + "step": 13670 + }, + { + "epoch": 198.26, + "learning_rate": 1.0000000000000002e-06, + "loss": 0.5509, + "step": 13680 + }, + { + "epoch": 198.41, + "learning_rate": 9.21875e-07, + "loss": 0.513, + "step": 13690 + }, + { + "epoch": 198.55, + "learning_rate": 8.437500000000001e-07, + "loss": 0.5744, + "step": 13700 + }, + { + "epoch": 198.7, + "learning_rate": 7.65625e-07, + "loss": 0.5457, + "step": 13710 + }, + { + "epoch": 198.84, + "learning_rate": 6.875000000000001e-07, + "loss": 0.5007, + "step": 13720 + }, + { + "epoch": 198.99, + "learning_rate": 6.093750000000001e-07, + "loss": 0.5829, + "step": 13730 + }, + { + "epoch": 199.13, + "learning_rate": 5.312500000000001e-07, + "loss": 0.5047, + "step": 13740 + }, + { + "epoch": 199.28, + "learning_rate": 4.53125e-07, + "loss": 0.5173, + "step": 13750 + }, + { + "epoch": 199.42, + "learning_rate": 3.75e-07, + "loss": 0.533, + "step": 13760 + }, + { + "epoch": 199.57, + "learning_rate": 2.96875e-07, + "loss": 0.5328, + "step": 13770 + }, + { + "epoch": 199.71, + "learning_rate": 2.1875000000000002e-07, + "loss": 0.5111, + "step": 13780 + }, + { + "epoch": 199.86, + "learning_rate": 1.40625e-07, + "loss": 0.5071, + "step": 13790 + }, + { + "epoch": 200.0, + "learning_rate": 6.250000000000001e-08, + "loss": 0.5151, + "step": 13800 + }, + { + "epoch": 200.0, + "step": 13800, + "total_flos": 2.0051176394243018e+20, + "train_loss": 0.32463424516760786, + "train_runtime": 60486.5281, + "train_samples_per_second": 7.255, + "train_steps_per_second": 0.228 } ], - "max_steps": 6900, - "num_train_epochs": 100, - "total_flos": 1.0025325448199992e+20, + "max_steps": 13800, + "num_train_epochs": 200, + "total_flos": 2.0051176394243018e+20, "trial_name": null, "trial_params": null }