|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 256.4102564102564, |
|
"global_step": 20000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 0.000499, |
|
"loss": 1.4757, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"eval_loss": 0.061434268951416016, |
|
"eval_runtime": 68.9385, |
|
"eval_samples_per_second": 26.937, |
|
"eval_steps_per_second": 1.131, |
|
"eval_wer": 0.03469206911829863, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"learning_rate": 0.0004872051282051282, |
|
"loss": 0.0624, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"eval_loss": 0.05250529572367668, |
|
"eval_runtime": 68.4693, |
|
"eval_samples_per_second": 27.122, |
|
"eval_steps_per_second": 1.139, |
|
"eval_wer": 0.027735932653965442, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 19.23, |
|
"learning_rate": 0.0004743846153846154, |
|
"loss": 0.0388, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 19.23, |
|
"eval_loss": 0.06932416558265686, |
|
"eval_runtime": 67.8311, |
|
"eval_samples_per_second": 27.377, |
|
"eval_steps_per_second": 1.15, |
|
"eval_wer": 0.024147097917589722, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 25.64, |
|
"learning_rate": 0.0004615641025641026, |
|
"loss": 0.03, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 25.64, |
|
"eval_loss": 0.0665554478764534, |
|
"eval_runtime": 69.0525, |
|
"eval_samples_per_second": 26.893, |
|
"eval_steps_per_second": 1.13, |
|
"eval_wer": 0.024412937527691626, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 32.05, |
|
"learning_rate": 0.0004487435897435898, |
|
"loss": 0.0235, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 32.05, |
|
"eval_loss": 0.06039978191256523, |
|
"eval_runtime": 67.8702, |
|
"eval_samples_per_second": 27.361, |
|
"eval_steps_per_second": 1.149, |
|
"eval_wer": 0.026007975188303056, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 38.46, |
|
"learning_rate": 0.0004359230769230769, |
|
"loss": 0.0226, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 38.46, |
|
"eval_loss": 0.06249115616083145, |
|
"eval_runtime": 67.3401, |
|
"eval_samples_per_second": 27.576, |
|
"eval_steps_per_second": 1.158, |
|
"eval_wer": 0.022950819672131147, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 44.87, |
|
"learning_rate": 0.0004231025641025641, |
|
"loss": 0.0163, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 44.87, |
|
"eval_loss": 0.06026700139045715, |
|
"eval_runtime": 67.7207, |
|
"eval_samples_per_second": 27.421, |
|
"eval_steps_per_second": 1.152, |
|
"eval_wer": 0.01953921134249003, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 51.28, |
|
"learning_rate": 0.0004102820512820513, |
|
"loss": 0.0157, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 51.28, |
|
"eval_loss": 0.0627710148692131, |
|
"eval_runtime": 67.8528, |
|
"eval_samples_per_second": 27.368, |
|
"eval_steps_per_second": 1.15, |
|
"eval_wer": 0.02091271599468321, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 57.69, |
|
"learning_rate": 0.0003974615384615385, |
|
"loss": 0.0152, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 57.69, |
|
"eval_loss": 0.06921559572219849, |
|
"eval_runtime": 67.9506, |
|
"eval_samples_per_second": 27.329, |
|
"eval_steps_per_second": 1.148, |
|
"eval_wer": 0.023836951705804164, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 64.1, |
|
"learning_rate": 0.00038464102564102567, |
|
"loss": 0.0122, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 64.1, |
|
"eval_loss": 0.06069288030266762, |
|
"eval_runtime": 67.6564, |
|
"eval_samples_per_second": 27.448, |
|
"eval_steps_per_second": 1.153, |
|
"eval_wer": 0.02100132919805051, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 70.51, |
|
"learning_rate": 0.0003718205128205128, |
|
"loss": 0.011, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 70.51, |
|
"eval_loss": 0.06082445755600929, |
|
"eval_runtime": 68.2551, |
|
"eval_samples_per_second": 27.207, |
|
"eval_steps_per_second": 1.143, |
|
"eval_wer": 0.021311475409836064, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 76.92, |
|
"learning_rate": 0.000359, |
|
"loss": 0.0114, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 76.92, |
|
"eval_loss": 0.06809388101100922, |
|
"eval_runtime": 68.8171, |
|
"eval_samples_per_second": 26.985, |
|
"eval_steps_per_second": 1.133, |
|
"eval_wer": 0.021089942401417813, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 83.33, |
|
"learning_rate": 0.00034620512820512825, |
|
"loss": 0.0106, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 83.33, |
|
"eval_loss": 0.061287231743335724, |
|
"eval_runtime": 67.6622, |
|
"eval_samples_per_second": 27.445, |
|
"eval_steps_per_second": 1.153, |
|
"eval_wer": 0.02100132919805051, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 89.74, |
|
"learning_rate": 0.0003333846153846154, |
|
"loss": 0.0081, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 89.74, |
|
"eval_loss": 0.06544984877109528, |
|
"eval_runtime": 68.1647, |
|
"eval_samples_per_second": 27.243, |
|
"eval_steps_per_second": 1.144, |
|
"eval_wer": 0.019627824545857334, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 96.15, |
|
"learning_rate": 0.00032056410256410257, |
|
"loss": 0.0078, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 96.15, |
|
"eval_loss": 0.06121820956468582, |
|
"eval_runtime": 68.399, |
|
"eval_samples_per_second": 27.15, |
|
"eval_steps_per_second": 1.14, |
|
"eval_wer": 0.01905183872396987, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 102.56, |
|
"learning_rate": 0.00030774358974358976, |
|
"loss": 0.0082, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 102.56, |
|
"eval_loss": 0.07580074667930603, |
|
"eval_runtime": 67.4807, |
|
"eval_samples_per_second": 27.519, |
|
"eval_steps_per_second": 1.156, |
|
"eval_wer": 0.02365972529906956, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 108.97, |
|
"learning_rate": 0.0002949230769230769, |
|
"loss": 0.0078, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 108.97, |
|
"eval_loss": 0.06641749292612076, |
|
"eval_runtime": 67.4225, |
|
"eval_samples_per_second": 27.543, |
|
"eval_steps_per_second": 1.157, |
|
"eval_wer": 0.02060256978289765, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 115.38, |
|
"learning_rate": 0.00028210256410256414, |
|
"loss": 0.0075, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 115.38, |
|
"eval_loss": 0.06580852717161179, |
|
"eval_runtime": 67.9347, |
|
"eval_samples_per_second": 27.335, |
|
"eval_steps_per_second": 1.148, |
|
"eval_wer": 0.019716437749224634, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 121.79, |
|
"learning_rate": 0.0002692820512820513, |
|
"loss": 0.0052, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 121.79, |
|
"eval_loss": 0.06690431386232376, |
|
"eval_runtime": 66.8582, |
|
"eval_samples_per_second": 27.775, |
|
"eval_steps_per_second": 1.167, |
|
"eval_wer": 0.021843154630039874, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 128.21, |
|
"learning_rate": 0.00025646153846153847, |
|
"loss": 0.0054, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 128.21, |
|
"eval_loss": 0.06948971748352051, |
|
"eval_runtime": 67.1148, |
|
"eval_samples_per_second": 27.669, |
|
"eval_steps_per_second": 1.162, |
|
"eval_wer": 0.021089942401417813, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 134.62, |
|
"learning_rate": 0.00024364102564102563, |
|
"loss": 0.0053, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 134.62, |
|
"eval_loss": 0.07259159535169601, |
|
"eval_runtime": 66.7066, |
|
"eval_samples_per_second": 27.838, |
|
"eval_steps_per_second": 1.169, |
|
"eval_wer": 0.022684980062029243, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 141.03, |
|
"learning_rate": 0.00023082051282051282, |
|
"loss": 0.0046, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 141.03, |
|
"eval_loss": 0.07016939669847488, |
|
"eval_runtime": 68.2063, |
|
"eval_samples_per_second": 27.226, |
|
"eval_steps_per_second": 1.144, |
|
"eval_wer": 0.021178555604785113, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 147.44, |
|
"learning_rate": 0.000218, |
|
"loss": 0.0043, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 147.44, |
|
"eval_loss": 0.08461024612188339, |
|
"eval_runtime": 66.3999, |
|
"eval_samples_per_second": 27.967, |
|
"eval_steps_per_second": 1.175, |
|
"eval_wer": 0.020026583961010192, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 153.85, |
|
"learning_rate": 0.00020517948717948718, |
|
"loss": 0.0041, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 153.85, |
|
"eval_loss": 0.07643292099237442, |
|
"eval_runtime": 66.6168, |
|
"eval_samples_per_second": 27.876, |
|
"eval_steps_per_second": 1.171, |
|
"eval_wer": 0.020026583961010192, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 160.26, |
|
"learning_rate": 0.00019235897435897437, |
|
"loss": 0.0032, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 160.26, |
|
"eval_loss": 0.0785411074757576, |
|
"eval_runtime": 67.7953, |
|
"eval_samples_per_second": 27.391, |
|
"eval_steps_per_second": 1.151, |
|
"eval_wer": 0.02007089056269384, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 166.67, |
|
"learning_rate": 0.0001795641025641026, |
|
"loss": 0.0028, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 166.67, |
|
"eval_loss": 0.08392436057329178, |
|
"eval_runtime": 67.1052, |
|
"eval_samples_per_second": 27.673, |
|
"eval_steps_per_second": 1.162, |
|
"eval_wer": 0.019672131147540985, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 173.08, |
|
"learning_rate": 0.00016674358974358975, |
|
"loss": 0.0035, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 173.08, |
|
"eval_loss": 0.07846853882074356, |
|
"eval_runtime": 66.3462, |
|
"eval_samples_per_second": 27.99, |
|
"eval_steps_per_second": 1.176, |
|
"eval_wer": 0.02100132919805051, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 179.49, |
|
"learning_rate": 0.00015394871794871794, |
|
"loss": 0.0027, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 179.49, |
|
"eval_loss": 0.07303400337696075, |
|
"eval_runtime": 66.6716, |
|
"eval_samples_per_second": 27.853, |
|
"eval_steps_per_second": 1.17, |
|
"eval_wer": 0.018785999113867965, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 185.9, |
|
"learning_rate": 0.00014112820512820513, |
|
"loss": 0.002, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 185.9, |
|
"eval_loss": 0.07940459251403809, |
|
"eval_runtime": 66.4899, |
|
"eval_samples_per_second": 27.929, |
|
"eval_steps_per_second": 1.173, |
|
"eval_wer": 0.019317678334071775, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 192.31, |
|
"learning_rate": 0.00012830769230769232, |
|
"loss": 0.002, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 192.31, |
|
"eval_loss": 0.08587377518415451, |
|
"eval_runtime": 67.8686, |
|
"eval_samples_per_second": 27.362, |
|
"eval_steps_per_second": 1.149, |
|
"eval_wer": 0.02113424900310146, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 198.72, |
|
"learning_rate": 0.00011548717948717949, |
|
"loss": 0.0019, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 198.72, |
|
"eval_loss": 0.07269652187824249, |
|
"eval_runtime": 66.1737, |
|
"eval_samples_per_second": 28.063, |
|
"eval_steps_per_second": 1.179, |
|
"eval_wer": 0.018342933097031458, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 205.13, |
|
"learning_rate": 0.00010266666666666668, |
|
"loss": 0.0017, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 205.13, |
|
"eval_loss": 0.07843895256519318, |
|
"eval_runtime": 67.2939, |
|
"eval_samples_per_second": 27.595, |
|
"eval_steps_per_second": 1.159, |
|
"eval_wer": 0.018653079308817013, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 211.54, |
|
"learning_rate": 8.984615384615384e-05, |
|
"loss": 0.0016, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 211.54, |
|
"eval_loss": 0.08008446544408798, |
|
"eval_runtime": 67.2442, |
|
"eval_samples_per_second": 27.616, |
|
"eval_steps_per_second": 1.16, |
|
"eval_wer": 0.019627824545857334, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 217.95, |
|
"learning_rate": 7.702564102564103e-05, |
|
"loss": 0.0014, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 217.95, |
|
"eval_loss": 0.0820729061961174, |
|
"eval_runtime": 67.2612, |
|
"eval_samples_per_second": 27.609, |
|
"eval_steps_per_second": 1.16, |
|
"eval_wer": 0.01847585290208241, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 224.36, |
|
"learning_rate": 6.420512820512821e-05, |
|
"loss": 0.0011, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 224.36, |
|
"eval_loss": 0.08215450495481491, |
|
"eval_runtime": 67.1514, |
|
"eval_samples_per_second": 27.654, |
|
"eval_steps_per_second": 1.162, |
|
"eval_wer": 0.017634027470093044, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 230.77, |
|
"learning_rate": 5.1384615384615385e-05, |
|
"loss": 0.001, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 230.77, |
|
"eval_loss": 0.0855555310845375, |
|
"eval_runtime": 67.2619, |
|
"eval_samples_per_second": 27.609, |
|
"eval_steps_per_second": 1.16, |
|
"eval_wer": 0.017058041648205582, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 237.18, |
|
"learning_rate": 3.856410256410257e-05, |
|
"loss": 0.001, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 237.18, |
|
"eval_loss": 0.07919255644083023, |
|
"eval_runtime": 66.733, |
|
"eval_samples_per_second": 27.827, |
|
"eval_steps_per_second": 1.169, |
|
"eval_wer": 0.017589720868409393, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 243.59, |
|
"learning_rate": 2.574358974358974e-05, |
|
"loss": 0.001, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 243.59, |
|
"eval_loss": 0.08259344846010208, |
|
"eval_runtime": 67.3537, |
|
"eval_samples_per_second": 27.571, |
|
"eval_steps_per_second": 1.158, |
|
"eval_wer": 0.017279574656623838, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 250.0, |
|
"learning_rate": 1.2923076923076924e-05, |
|
"loss": 0.0006, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 250.0, |
|
"eval_loss": 0.08542538434267044, |
|
"eval_runtime": 66.6818, |
|
"eval_samples_per_second": 27.849, |
|
"eval_steps_per_second": 1.17, |
|
"eval_wer": 0.016969428444838282, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 256.41, |
|
"learning_rate": 1.282051282051282e-07, |
|
"loss": 0.0007, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 256.41, |
|
"eval_loss": 0.08499366790056229, |
|
"eval_runtime": 66.4242, |
|
"eval_samples_per_second": 27.957, |
|
"eval_steps_per_second": 1.174, |
|
"eval_wer": 0.016747895436420027, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 256.41, |
|
"step": 20000, |
|
"total_flos": 2.406420737737408e+20, |
|
"train_loss": 0.04605116495639086, |
|
"train_runtime": 87629.7749, |
|
"train_samples_per_second": 21.91, |
|
"train_steps_per_second": 0.228 |
|
} |
|
], |
|
"max_steps": 20000, |
|
"num_train_epochs": 257, |
|
"total_flos": 2.406420737737408e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|