{ "best_metric": null, "best_model_checkpoint": null, "epoch": 100.0, "global_step": 20000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.5, "learning_rate": 3.5624999999999998e-06, "loss": 137.0348, "step": 100 }, { "epoch": 1.0, "learning_rate": 7.275e-06, "loss": 106.075, "step": 200 }, { "epoch": 1.5, "learning_rate": 1.1024999999999999e-05, "loss": 95.1437, "step": 300 }, { "epoch": 2.0, "learning_rate": 1.4775e-05, "loss": 88.0386, "step": 400 }, { "epoch": 2.5, "learning_rate": 1.8525e-05, "loss": 82.628, "step": 500 }, { "epoch": 2.5, "eval_loss": 79.55873107910156, "eval_runtime": 24.8027, "eval_samples_per_second": 19.877, "eval_steps_per_second": 9.959, "eval_wer": 1.0, "step": 500 }, { "epoch": 3.0, "learning_rate": 2.2274999999999996e-05, "loss": 73.1146, "step": 600 }, { "epoch": 3.5, "learning_rate": 2.6024999999999996e-05, "loss": 62.7571, "step": 700 }, { "epoch": 4.0, "learning_rate": 2.9775e-05, "loss": 48.9782, "step": 800 }, { "epoch": 4.5, "learning_rate": 3.3524999999999995e-05, "loss": 33.2033, "step": 900 }, { "epoch": 5.0, "learning_rate": 3.7275e-05, "loss": 17.5635, "step": 1000 }, { "epoch": 5.0, "eval_loss": 11.592885971069336, "eval_runtime": 24.3352, "eval_samples_per_second": 20.259, "eval_steps_per_second": 10.15, "eval_wer": 1.0, "step": 1000 }, { "epoch": 5.5, "learning_rate": 4.1025e-05, "loss": 8.6246, "step": 1100 }, { "epoch": 6.0, "learning_rate": 4.4775e-05, "loss": 6.5245, "step": 1200 }, { "epoch": 6.5, "learning_rate": 4.8525e-05, "loss": 6.4322, "step": 1300 }, { "epoch": 7.0, "learning_rate": 5.227499999999999e-05, "loss": 6.4273, "step": 1400 }, { "epoch": 7.5, "learning_rate": 5.602499999999999e-05, "loss": 6.4288, "step": 1500 }, { "epoch": 7.5, "eval_loss": 6.447471618652344, "eval_runtime": 24.3824, "eval_samples_per_second": 20.219, "eval_steps_per_second": 10.13, "eval_wer": 1.0, "step": 1500 }, { "epoch": 8.0, "learning_rate": 5.9774999999999996e-05, "loss": 6.4087, "step": 1600 }, { "epoch": 8.5, "learning_rate": 6.3525e-05, "loss": 6.4099, "step": 1700 }, { "epoch": 9.0, "learning_rate": 6.7275e-05, "loss": 6.4216, "step": 1800 }, { "epoch": 9.5, "learning_rate": 7.102499999999999e-05, "loss": 6.4159, "step": 1900 }, { "epoch": 10.0, "learning_rate": 7.477499999999999e-05, "loss": 6.4092, "step": 2000 }, { "epoch": 10.0, "eval_loss": 6.457908630371094, "eval_runtime": 24.1035, "eval_samples_per_second": 20.453, "eval_steps_per_second": 10.247, "eval_wer": 1.0, "step": 2000 }, { "epoch": 10.5, "learning_rate": 7.460833333333332e-05, "loss": 6.4008, "step": 2100 }, { "epoch": 11.0, "learning_rate": 7.419166666666666e-05, "loss": 6.4257, "step": 2200 }, { "epoch": 11.5, "learning_rate": 7.377499999999999e-05, "loss": 6.4109, "step": 2300 }, { "epoch": 12.0, "learning_rate": 7.335833333333333e-05, "loss": 6.4135, "step": 2400 }, { "epoch": 12.5, "learning_rate": 7.294166666666666e-05, "loss": 6.3982, "step": 2500 }, { "epoch": 12.5, "eval_loss": 6.466182708740234, "eval_runtime": 24.3477, "eval_samples_per_second": 20.248, "eval_steps_per_second": 10.145, "eval_wer": 1.0, "step": 2500 }, { "epoch": 13.0, "learning_rate": 7.252499999999999e-05, "loss": 6.4234, "step": 2600 }, { "epoch": 13.5, "learning_rate": 7.210833333333333e-05, "loss": 6.4333, "step": 2700 }, { "epoch": 14.0, "learning_rate": 7.169166666666666e-05, "loss": 6.3853, "step": 2800 }, { "epoch": 14.5, "learning_rate": 7.1275e-05, "loss": 6.4267, "step": 2900 }, { "epoch": 15.0, "learning_rate": 7.085833333333333e-05, "loss": 6.391, "step": 3000 }, { "epoch": 15.0, "eval_loss": 6.465456008911133, "eval_runtime": 24.1868, "eval_samples_per_second": 20.383, "eval_steps_per_second": 10.212, "eval_wer": 1.0, "step": 3000 }, { "epoch": 15.5, "learning_rate": 7.044166666666665e-05, "loss": 6.4101, "step": 3100 }, { "epoch": 16.0, "learning_rate": 7.0025e-05, "loss": 6.4082, "step": 3200 }, { "epoch": 16.5, "learning_rate": 6.960833333333332e-05, "loss": 6.4155, "step": 3300 }, { "epoch": 17.0, "learning_rate": 6.919166666666666e-05, "loss": 6.4019, "step": 3400 }, { "epoch": 17.5, "learning_rate": 6.877499999999999e-05, "loss": 6.4097, "step": 3500 }, { "epoch": 17.5, "eval_loss": 6.469109535217285, "eval_runtime": 24.7207, "eval_samples_per_second": 19.943, "eval_steps_per_second": 9.992, "eval_wer": 1.0, "step": 3500 }, { "epoch": 18.0, "learning_rate": 6.835833333333332e-05, "loss": 6.4073, "step": 3600 }, { "epoch": 18.5, "learning_rate": 6.794166666666666e-05, "loss": 6.4142, "step": 3700 }, { "epoch": 19.0, "learning_rate": 6.752499999999999e-05, "loss": 6.4013, "step": 3800 }, { "epoch": 19.5, "learning_rate": 6.710833333333333e-05, "loss": 6.4181, "step": 3900 }, { "epoch": 20.0, "learning_rate": 6.669166666666666e-05, "loss": 6.3986, "step": 4000 }, { "epoch": 20.0, "eval_loss": 6.470158100128174, "eval_runtime": 24.0989, "eval_samples_per_second": 20.457, "eval_steps_per_second": 10.249, "eval_wer": 1.0, "step": 4000 }, { "epoch": 20.5, "learning_rate": 6.6275e-05, "loss": 6.4044, "step": 4100 }, { "epoch": 21.0, "learning_rate": 6.585833333333333e-05, "loss": 6.4134, "step": 4200 }, { "epoch": 21.5, "learning_rate": 6.544166666666665e-05, "loss": 6.4066, "step": 4300 }, { "epoch": 22.0, "learning_rate": 6.5025e-05, "loss": 6.4098, "step": 4400 }, { "epoch": 22.5, "learning_rate": 6.460833333333332e-05, "loss": 6.4069, "step": 4500 }, { "epoch": 22.5, "eval_loss": 6.47609806060791, "eval_runtime": 24.5428, "eval_samples_per_second": 20.087, "eval_steps_per_second": 10.064, "eval_wer": 1.0, "step": 4500 }, { "epoch": 23.0, "learning_rate": 6.419166666666666e-05, "loss": 6.4075, "step": 4600 }, { "epoch": 23.5, "learning_rate": 6.377499999999999e-05, "loss": 6.4069, "step": 4700 }, { "epoch": 24.0, "learning_rate": 6.335833333333332e-05, "loss": 6.407, "step": 4800 }, { "epoch": 24.5, "learning_rate": 6.294166666666666e-05, "loss": 6.3966, "step": 4900 }, { "epoch": 25.0, "learning_rate": 6.252499999999999e-05, "loss": 6.4158, "step": 5000 }, { "epoch": 25.0, "eval_loss": 6.47500467300415, "eval_runtime": 24.1455, "eval_samples_per_second": 20.418, "eval_steps_per_second": 10.23, "eval_wer": 1.0, "step": 5000 }, { "epoch": 25.5, "learning_rate": 6.210833333333333e-05, "loss": 6.4081, "step": 5100 }, { "epoch": 26.0, "learning_rate": 6.169166666666667e-05, "loss": 6.4024, "step": 5200 }, { "epoch": 26.5, "learning_rate": 6.127499999999999e-05, "loss": 6.4122, "step": 5300 }, { "epoch": 27.0, "learning_rate": 6.0858333333333326e-05, "loss": 6.4011, "step": 5400 }, { "epoch": 27.5, "learning_rate": 6.044166666666666e-05, "loss": 6.4117, "step": 5500 }, { "epoch": 27.5, "eval_loss": 6.48163366317749, "eval_runtime": 24.2083, "eval_samples_per_second": 20.365, "eval_steps_per_second": 10.203, "eval_wer": 1.0, "step": 5500 }, { "epoch": 28.0, "learning_rate": 6.0024999999999995e-05, "loss": 6.4013, "step": 5600 }, { "epoch": 28.5, "learning_rate": 5.960833333333333e-05, "loss": 6.4158, "step": 5700 }, { "epoch": 29.0, "learning_rate": 5.9191666666666664e-05, "loss": 6.3951, "step": 5800 }, { "epoch": 29.5, "learning_rate": 5.877499999999999e-05, "loss": 6.4047, "step": 5900 }, { "epoch": 30.0, "learning_rate": 5.8358333333333327e-05, "loss": 6.4086, "step": 6000 }, { "epoch": 30.0, "eval_loss": 6.4805707931518555, "eval_runtime": 24.395, "eval_samples_per_second": 20.209, "eval_steps_per_second": 10.125, "eval_wer": 1.0, "step": 6000 }, { "epoch": 30.5, "learning_rate": 5.794166666666666e-05, "loss": 6.3924, "step": 6100 }, { "epoch": 31.0, "learning_rate": 5.7524999999999996e-05, "loss": 6.4216, "step": 6200 }, { "epoch": 31.5, "learning_rate": 5.710833333333333e-05, "loss": 6.3965, "step": 6300 }, { "epoch": 32.0, "learning_rate": 5.6691666666666665e-05, "loss": 6.4165, "step": 6400 }, { "epoch": 32.5, "learning_rate": 5.627499999999999e-05, "loss": 6.3992, "step": 6500 }, { "epoch": 32.5, "eval_loss": 6.487205505371094, "eval_runtime": 24.2919, "eval_samples_per_second": 20.295, "eval_steps_per_second": 10.168, "eval_wer": 1.0, "step": 6500 }, { "epoch": 33.0, "learning_rate": 5.585833333333333e-05, "loss": 6.414, "step": 6600 }, { "epoch": 33.5, "learning_rate": 5.544166666666666e-05, "loss": 6.4138, "step": 6700 }, { "epoch": 34.0, "learning_rate": 5.5024999999999996e-05, "loss": 6.3973, "step": 6800 }, { "epoch": 34.5, "learning_rate": 5.460833333333333e-05, "loss": 6.4154, "step": 6900 }, { "epoch": 35.0, "learning_rate": 5.419166666666666e-05, "loss": 6.3946, "step": 7000 }, { "epoch": 35.0, "eval_loss": 6.486586570739746, "eval_runtime": 24.3253, "eval_samples_per_second": 20.267, "eval_steps_per_second": 10.154, "eval_wer": 1.0, "step": 7000 }, { "epoch": 35.5, "learning_rate": 5.377499999999999e-05, "loss": 6.4203, "step": 7100 }, { "epoch": 36.0, "learning_rate": 5.335833333333333e-05, "loss": 6.3905, "step": 7200 }, { "epoch": 36.5, "learning_rate": 5.294166666666666e-05, "loss": 6.4104, "step": 7300 }, { "epoch": 37.0, "learning_rate": 5.2524999999999996e-05, "loss": 6.4002, "step": 7400 }, { "epoch": 37.5, "learning_rate": 5.210833333333333e-05, "loss": 6.4212, "step": 7500 }, { "epoch": 37.5, "eval_loss": 6.489477634429932, "eval_runtime": 24.1492, "eval_samples_per_second": 20.415, "eval_steps_per_second": 10.228, "eval_wer": 1.0, "step": 7500 }, { "epoch": 38.0, "learning_rate": 5.169166666666666e-05, "loss": 6.3871, "step": 7600 }, { "epoch": 38.5, "learning_rate": 5.127499999999999e-05, "loss": 6.3982, "step": 7700 }, { "epoch": 39.0, "learning_rate": 5.085833333333333e-05, "loss": 6.4097, "step": 7800 }, { "epoch": 39.5, "learning_rate": 5.044166666666666e-05, "loss": 6.4015, "step": 7900 }, { "epoch": 40.0, "learning_rate": 5.0024999999999996e-05, "loss": 6.4051, "step": 8000 }, { "epoch": 40.0, "eval_loss": 6.492602348327637, "eval_runtime": 24.1509, "eval_samples_per_second": 20.413, "eval_steps_per_second": 10.227, "eval_wer": 1.0, "step": 8000 }, { "epoch": 40.5, "learning_rate": 4.9608333333333324e-05, "loss": 6.3881, "step": 8100 }, { "epoch": 41.0, "learning_rate": 4.919166666666666e-05, "loss": 6.4196, "step": 8200 }, { "epoch": 41.5, "learning_rate": 4.877499999999999e-05, "loss": 6.4132, "step": 8300 }, { "epoch": 42.0, "learning_rate": 4.835833333333333e-05, "loss": 6.3967, "step": 8400 }, { "epoch": 42.5, "learning_rate": 4.794166666666666e-05, "loss": 6.398, "step": 8500 }, { "epoch": 42.5, "eval_loss": 6.50146484375, "eval_runtime": 24.2251, "eval_samples_per_second": 20.351, "eval_steps_per_second": 10.196, "eval_wer": 1.0, "step": 8500 }, { "epoch": 43.0, "learning_rate": 4.7524999999999996e-05, "loss": 6.4101, "step": 8600 }, { "epoch": 43.5, "learning_rate": 4.7108333333333324e-05, "loss": 6.3947, "step": 8700 }, { "epoch": 44.0, "learning_rate": 4.669166666666666e-05, "loss": 6.4115, "step": 8800 }, { "epoch": 44.5, "learning_rate": 4.627499999999999e-05, "loss": 6.4088, "step": 8900 }, { "epoch": 45.0, "learning_rate": 4.585833333333333e-05, "loss": 6.3967, "step": 9000 }, { "epoch": 45.0, "eval_loss": 6.496038913726807, "eval_runtime": 24.4306, "eval_samples_per_second": 20.18, "eval_steps_per_second": 10.11, "eval_wer": 1.0, "step": 9000 }, { "epoch": 45.5, "learning_rate": 4.544166666666666e-05, "loss": 6.401, "step": 9100 }, { "epoch": 46.0, "learning_rate": 4.5025e-05, "loss": 6.4063, "step": 9200 }, { "epoch": 46.5, "learning_rate": 4.4608333333333324e-05, "loss": 6.4101, "step": 9300 }, { "epoch": 47.0, "learning_rate": 4.419166666666666e-05, "loss": 6.3957, "step": 9400 }, { "epoch": 47.5, "learning_rate": 4.377499999999999e-05, "loss": 6.4096, "step": 9500 }, { "epoch": 47.5, "eval_loss": 6.500304222106934, "eval_runtime": 24.1983, "eval_samples_per_second": 20.373, "eval_steps_per_second": 10.207, "eval_wer": 1.0, "step": 9500 }, { "epoch": 48.0, "learning_rate": 4.335833333333333e-05, "loss": 6.3966, "step": 9600 }, { "epoch": 48.5, "learning_rate": 4.294166666666667e-05, "loss": 6.409, "step": 9700 }, { "epoch": 49.0, "learning_rate": 4.252499999999999e-05, "loss": 6.3979, "step": 9800 }, { "epoch": 49.5, "learning_rate": 4.2108333333333325e-05, "loss": 6.3985, "step": 9900 }, { "epoch": 50.0, "learning_rate": 4.169166666666666e-05, "loss": 6.4068, "step": 10000 }, { "epoch": 50.0, "eval_loss": 6.502621650695801, "eval_runtime": 24.1622, "eval_samples_per_second": 20.404, "eval_steps_per_second": 10.223, "eval_wer": 1.0, "step": 10000 }, { "epoch": 50.5, "learning_rate": 4.1274999999999994e-05, "loss": 6.4059, "step": 10100 }, { "epoch": 51.0, "learning_rate": 4.0858333333333335e-05, "loss": 6.4012, "step": 10200 }, { "epoch": 51.5, "learning_rate": 4.044166666666667e-05, "loss": 6.4118, "step": 10300 }, { "epoch": 52.0, "learning_rate": 4.002499999999999e-05, "loss": 6.3924, "step": 10400 }, { "epoch": 52.5, "learning_rate": 3.9608333333333325e-05, "loss": 6.4062, "step": 10500 }, { "epoch": 52.5, "eval_loss": 6.507063865661621, "eval_runtime": 24.2765, "eval_samples_per_second": 20.308, "eval_steps_per_second": 10.174, "eval_wer": 1.0, "step": 10500 }, { "epoch": 53.0, "learning_rate": 3.9191666666666666e-05, "loss": 6.3992, "step": 10600 }, { "epoch": 53.5, "learning_rate": 3.8775e-05, "loss": 6.4136, "step": 10700 }, { "epoch": 54.0, "learning_rate": 3.8358333333333335e-05, "loss": 6.3915, "step": 10800 }, { "epoch": 54.5, "learning_rate": 3.794166666666667e-05, "loss": 6.4096, "step": 10900 }, { "epoch": 55.0, "learning_rate": 3.7525e-05, "loss": 6.395, "step": 11000 }, { "epoch": 55.0, "eval_loss": 6.506567001342773, "eval_runtime": 24.2863, "eval_samples_per_second": 20.3, "eval_steps_per_second": 10.17, "eval_wer": 1.0, "step": 11000 }, { "epoch": 55.5, "learning_rate": 3.710833333333333e-05, "loss": 6.4042, "step": 11100 }, { "epoch": 56.0, "learning_rate": 3.6691666666666666e-05, "loss": 6.402, "step": 11200 }, { "epoch": 56.5, "learning_rate": 3.6275e-05, "loss": 6.4056, "step": 11300 }, { "epoch": 57.0, "learning_rate": 3.585833333333333e-05, "loss": 6.4005, "step": 11400 }, { "epoch": 57.5, "learning_rate": 3.544166666666666e-05, "loss": 6.4079, "step": 11500 }, { "epoch": 57.5, "eval_loss": 6.50927734375, "eval_runtime": 24.3197, "eval_samples_per_second": 20.272, "eval_steps_per_second": 10.156, "eval_wer": 1.0, "step": 11500 }, { "epoch": 58.0, "learning_rate": 3.5025e-05, "loss": 6.3957, "step": 11600 }, { "epoch": 58.5, "learning_rate": 3.460833333333333e-05, "loss": 6.4057, "step": 11700 }, { "epoch": 59.0, "learning_rate": 3.4191666666666667e-05, "loss": 6.3981, "step": 11800 }, { "epoch": 59.5, "learning_rate": 3.3774999999999994e-05, "loss": 6.394, "step": 11900 }, { "epoch": 60.0, "learning_rate": 3.335833333333333e-05, "loss": 6.411, "step": 12000 }, { "epoch": 60.0, "eval_loss": 6.510580539703369, "eval_runtime": 24.2221, "eval_samples_per_second": 20.353, "eval_steps_per_second": 10.197, "eval_wer": 1.0, "step": 12000 }, { "epoch": 60.5, "learning_rate": 3.294166666666666e-05, "loss": 6.3902, "step": 12100 }, { "epoch": 61.0, "learning_rate": 3.2525e-05, "loss": 6.413, "step": 12200 }, { "epoch": 61.5, "learning_rate": 3.210833333333333e-05, "loss": 6.3998, "step": 12300 }, { "epoch": 62.0, "learning_rate": 3.169166666666667e-05, "loss": 6.404, "step": 12400 }, { "epoch": 62.5, "learning_rate": 3.1274999999999994e-05, "loss": 6.4023, "step": 12500 }, { "epoch": 62.5, "eval_loss": 6.511165142059326, "eval_runtime": 24.2209, "eval_samples_per_second": 20.354, "eval_steps_per_second": 10.198, "eval_wer": 1.0, "step": 12500 }, { "epoch": 63.0, "learning_rate": 3.085833333333333e-05, "loss": 6.3992, "step": 12600 }, { "epoch": 63.5, "learning_rate": 3.0441666666666667e-05, "loss": 6.4093, "step": 12700 }, { "epoch": 64.0, "learning_rate": 3.0024999999999995e-05, "loss": 6.3919, "step": 12800 }, { "epoch": 64.5, "learning_rate": 2.9608333333333332e-05, "loss": 6.3972, "step": 12900 }, { "epoch": 65.0, "learning_rate": 2.9191666666666667e-05, "loss": 6.4053, "step": 13000 }, { "epoch": 65.0, "eval_loss": 6.51427698135376, "eval_runtime": 24.1929, "eval_samples_per_second": 20.378, "eval_steps_per_second": 10.21, "eval_wer": 1.0, "step": 13000 }, { "epoch": 65.5, "learning_rate": 2.8774999999999998e-05, "loss": 6.3993, "step": 13100 }, { "epoch": 66.0, "learning_rate": 2.8358333333333333e-05, "loss": 6.403, "step": 13200 }, { "epoch": 66.5, "learning_rate": 2.7941666666666664e-05, "loss": 6.3973, "step": 13300 }, { "epoch": 67.0, "learning_rate": 2.7524999999999998e-05, "loss": 6.405, "step": 13400 }, { "epoch": 67.5, "learning_rate": 2.7108333333333333e-05, "loss": 6.4103, "step": 13500 }, { "epoch": 67.5, "eval_loss": 6.517162322998047, "eval_runtime": 24.3081, "eval_samples_per_second": 20.281, "eval_steps_per_second": 10.161, "eval_wer": 1.0, "step": 13500 }, { "epoch": 68.0, "learning_rate": 2.6691666666666664e-05, "loss": 6.3907, "step": 13600 }, { "epoch": 68.5, "learning_rate": 2.6275e-05, "loss": 6.3932, "step": 13700 }, { "epoch": 69.0, "learning_rate": 2.5858333333333333e-05, "loss": 6.4082, "step": 13800 }, { "epoch": 69.5, "learning_rate": 2.5441666666666664e-05, "loss": 6.4119, "step": 13900 }, { "epoch": 70.0, "learning_rate": 2.5025e-05, "loss": 6.3899, "step": 14000 }, { "epoch": 70.0, "eval_loss": 6.518171310424805, "eval_runtime": 24.1998, "eval_samples_per_second": 20.372, "eval_steps_per_second": 10.207, "eval_wer": 1.0, "step": 14000 }, { "epoch": 70.5, "learning_rate": 2.4608333333333333e-05, "loss": 6.4104, "step": 14100 }, { "epoch": 71.0, "learning_rate": 2.4191666666666664e-05, "loss": 6.3909, "step": 14200 }, { "epoch": 71.5, "learning_rate": 2.3775e-05, "loss": 6.3933, "step": 14300 }, { "epoch": 72.0, "learning_rate": 2.3358333333333333e-05, "loss": 6.4065, "step": 14400 }, { "epoch": 72.5, "learning_rate": 2.2941666666666664e-05, "loss": 6.4054, "step": 14500 }, { "epoch": 72.5, "eval_loss": 6.519650936126709, "eval_runtime": 24.4435, "eval_samples_per_second": 20.169, "eval_steps_per_second": 10.105, "eval_wer": 1.0, "step": 14500 }, { "epoch": 73.0, "learning_rate": 2.2525e-05, "loss": 6.3945, "step": 14600 }, { "epoch": 73.5, "learning_rate": 2.2108333333333333e-05, "loss": 6.3972, "step": 14700 }, { "epoch": 74.0, "learning_rate": 2.1691666666666664e-05, "loss": 6.4027, "step": 14800 }, { "epoch": 74.5, "learning_rate": 2.1275e-05, "loss": 6.4082, "step": 14900 }, { "epoch": 75.0, "learning_rate": 2.085833333333333e-05, "loss": 6.391, "step": 15000 }, { "epoch": 75.0, "eval_loss": 6.51995849609375, "eval_runtime": 24.299, "eval_samples_per_second": 20.289, "eval_steps_per_second": 10.165, "eval_wer": 1.0, "step": 15000 }, { "epoch": 75.5, "learning_rate": 2.0441666666666664e-05, "loss": 6.4077, "step": 15100 }, { "epoch": 76.0, "learning_rate": 2.0025e-05, "loss": 6.3905, "step": 15200 }, { "epoch": 76.5, "learning_rate": 1.960833333333333e-05, "loss": 6.3987, "step": 15300 }, { "epoch": 77.0, "learning_rate": 1.9191666666666664e-05, "loss": 6.4004, "step": 15400 }, { "epoch": 77.5, "learning_rate": 1.8775e-05, "loss": 6.3988, "step": 15500 }, { "epoch": 77.5, "eval_loss": 6.522036075592041, "eval_runtime": 24.4796, "eval_samples_per_second": 20.139, "eval_steps_per_second": 10.09, "eval_wer": 1.0, "step": 15500 }, { "epoch": 78.0, "learning_rate": 1.8358333333333333e-05, "loss": 6.3996, "step": 15600 }, { "epoch": 78.5, "learning_rate": 1.7941666666666665e-05, "loss": 6.404, "step": 15700 }, { "epoch": 79.0, "learning_rate": 1.7525e-05, "loss": 6.3918, "step": 15800 }, { "epoch": 79.5, "learning_rate": 1.710833333333333e-05, "loss": 6.3915, "step": 15900 }, { "epoch": 80.0, "learning_rate": 1.6691666666666665e-05, "loss": 6.4059, "step": 16000 }, { "epoch": 80.0, "eval_loss": 6.522817611694336, "eval_runtime": 24.2749, "eval_samples_per_second": 20.309, "eval_steps_per_second": 10.175, "eval_wer": 1.0, "step": 16000 }, { "epoch": 80.5, "learning_rate": 1.6275e-05, "loss": 6.3983, "step": 16100 }, { "epoch": 81.0, "learning_rate": 1.585833333333333e-05, "loss": 6.3996, "step": 16200 }, { "epoch": 81.5, "learning_rate": 1.5445833333333334e-05, "loss": 6.4038, "step": 16300 }, { "epoch": 82.0, "learning_rate": 1.5029166666666665e-05, "loss": 6.3918, "step": 16400 }, { "epoch": 82.5, "learning_rate": 1.4612499999999998e-05, "loss": 6.392, "step": 16500 }, { "epoch": 82.5, "eval_loss": 6.523283004760742, "eval_runtime": 24.4538, "eval_samples_per_second": 20.16, "eval_steps_per_second": 10.101, "eval_wer": 1.0, "step": 16500 }, { "epoch": 83.0, "learning_rate": 1.419583333333333e-05, "loss": 6.4043, "step": 16600 }, { "epoch": 83.5, "learning_rate": 1.3779166666666667e-05, "loss": 6.3989, "step": 16700 }, { "epoch": 84.0, "learning_rate": 1.33625e-05, "loss": 6.3956, "step": 16800 }, { "epoch": 84.5, "learning_rate": 1.2945833333333332e-05, "loss": 6.403, "step": 16900 }, { "epoch": 85.0, "learning_rate": 1.2529166666666667e-05, "loss": 6.3947, "step": 17000 }, { "epoch": 85.0, "eval_loss": 6.525250434875488, "eval_runtime": 24.2494, "eval_samples_per_second": 20.33, "eval_steps_per_second": 10.186, "eval_wer": 1.0, "step": 17000 }, { "epoch": 85.5, "learning_rate": 1.21125e-05, "loss": 6.4083, "step": 17100 }, { "epoch": 86.0, "learning_rate": 1.1695833333333332e-05, "loss": 6.3874, "step": 17200 }, { "epoch": 86.5, "learning_rate": 1.1279166666666665e-05, "loss": 6.3944, "step": 17300 }, { "epoch": 87.0, "learning_rate": 1.08625e-05, "loss": 6.3974, "step": 17400 }, { "epoch": 87.5, "learning_rate": 1.0445833333333333e-05, "loss": 6.3966, "step": 17500 }, { "epoch": 87.5, "eval_loss": 6.525856971740723, "eval_runtime": 24.4696, "eval_samples_per_second": 20.147, "eval_steps_per_second": 10.094, "eval_wer": 1.0, "step": 17500 }, { "epoch": 88.0, "learning_rate": 1.0029166666666665e-05, "loss": 6.397, "step": 17600 }, { "epoch": 88.5, "learning_rate": 9.6125e-06, "loss": 6.4091, "step": 17700 }, { "epoch": 89.0, "learning_rate": 9.195833333333333e-06, "loss": 6.3844, "step": 17800 }, { "epoch": 89.5, "learning_rate": 8.779166666666665e-06, "loss": 6.4037, "step": 17900 }, { "epoch": 90.0, "learning_rate": 8.3625e-06, "loss": 6.3905, "step": 18000 }, { "epoch": 90.0, "eval_loss": 6.526439666748047, "eval_runtime": 24.3911, "eval_samples_per_second": 20.212, "eval_steps_per_second": 10.127, "eval_wer": 1.0, "step": 18000 }, { "epoch": 90.5, "learning_rate": 7.945833333333333e-06, "loss": 6.4008, "step": 18100 }, { "epoch": 91.0, "learning_rate": 7.529166666666666e-06, "loss": 6.3926, "step": 18200 }, { "epoch": 91.5, "learning_rate": 7.116666666666666e-06, "loss": 6.3982, "step": 18300 }, { "epoch": 92.0, "learning_rate": 6.699999999999999e-06, "loss": 6.3951, "step": 18400 }, { "epoch": 92.5, "learning_rate": 6.283333333333333e-06, "loss": 6.4003, "step": 18500 }, { "epoch": 92.5, "eval_loss": 6.527181148529053, "eval_runtime": 24.2624, "eval_samples_per_second": 20.32, "eval_steps_per_second": 10.18, "eval_wer": 1.0, "step": 18500 }, { "epoch": 93.0, "learning_rate": 5.866666666666666e-06, "loss": 6.3932, "step": 18600 }, { "epoch": 93.5, "learning_rate": 5.4499999999999995e-06, "loss": 6.4076, "step": 18700 }, { "epoch": 94.0, "learning_rate": 5.033333333333332e-06, "loss": 6.3857, "step": 18800 }, { "epoch": 94.5, "learning_rate": 4.616666666666667e-06, "loss": 6.4045, "step": 18900 }, { "epoch": 95.0, "learning_rate": 4.2e-06, "loss": 6.3877, "step": 19000 }, { "epoch": 95.0, "eval_loss": 6.527469635009766, "eval_runtime": 24.1993, "eval_samples_per_second": 20.372, "eval_steps_per_second": 10.207, "eval_wer": 1.0, "step": 19000 }, { "epoch": 95.5, "learning_rate": 3.783333333333333e-06, "loss": 6.4, "step": 19100 }, { "epoch": 96.0, "learning_rate": 3.3666666666666665e-06, "loss": 6.3918, "step": 19200 }, { "epoch": 96.5, "learning_rate": 2.9499999999999997e-06, "loss": 6.4001, "step": 19300 }, { "epoch": 97.0, "learning_rate": 2.533333333333333e-06, "loss": 6.3935, "step": 19400 }, { "epoch": 97.5, "learning_rate": 2.1166666666666666e-06, "loss": 6.3903, "step": 19500 }, { "epoch": 97.5, "eval_loss": 6.527697563171387, "eval_runtime": 24.3683, "eval_samples_per_second": 20.231, "eval_steps_per_second": 10.136, "eval_wer": 1.0, "step": 19500 }, { "epoch": 98.0, "learning_rate": 1.7e-06, "loss": 6.4019, "step": 19600 }, { "epoch": 98.5, "learning_rate": 1.2833333333333333e-06, "loss": 6.4, "step": 19700 }, { "epoch": 99.0, "learning_rate": 8.666666666666666e-07, "loss": 6.3896, "step": 19800 }, { "epoch": 99.5, "learning_rate": 4.5e-07, "loss": 6.3972, "step": 19900 }, { "epoch": 100.0, "learning_rate": 3.3333333333333334e-08, "loss": 6.3944, "step": 20000 }, { "epoch": 100.0, "eval_loss": 6.5278449058532715, "eval_runtime": 24.1079, "eval_samples_per_second": 20.45, "eval_steps_per_second": 10.246, "eval_wer": 1.0, "step": 20000 }, { "epoch": 100.0, "step": 20000, "total_flos": 2.051320516956903e+19, "train_loss": 9.81739658203125, "train_runtime": 23393.5837, "train_samples_per_second": 27.272, "train_steps_per_second": 0.855 } ], "max_steps": 20000, "num_train_epochs": 100, "total_flos": 2.051320516956903e+19, "trial_name": null, "trial_params": null }