{ "best_metric": null, "best_model_checkpoint": null, "epoch": 60.0, "global_step": 6660, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.9, "learning_rate": 7.275e-06, "loss": 13.5875, "step": 100 }, { "epoch": 1.8, "learning_rate": 1.4775e-05, "loss": 5.5556, "step": 200 }, { "epoch": 2.7, "learning_rate": 2.2274999999999996e-05, "loss": 4.0171, "step": 300 }, { "epoch": 3.6, "learning_rate": 2.9775e-05, "loss": 3.4219, "step": 400 }, { "epoch": 3.6, "eval_loss": 3.3127081394195557, "eval_runtime": 73.7584, "eval_samples_per_second": 22.031, "eval_steps_per_second": 1.383, "eval_wer": 1.0, "step": 400 }, { "epoch": 4.5, "learning_rate": 3.7275e-05, "loss": 3.2104, "step": 500 }, { "epoch": 5.41, "learning_rate": 4.4775e-05, "loss": 3.152, "step": 600 }, { "epoch": 6.31, "learning_rate": 5.227499999999999e-05, "loss": 3.0987, "step": 700 }, { "epoch": 7.21, "learning_rate": 5.9774999999999996e-05, "loss": 3.0399, "step": 800 }, { "epoch": 7.21, "eval_loss": 3.0330417156219482, "eval_runtime": 71.6796, "eval_samples_per_second": 22.67, "eval_steps_per_second": 1.423, "eval_wer": 1.0, "step": 800 }, { "epoch": 8.11, "learning_rate": 6.7275e-05, "loss": 3.0035, "step": 900 }, { "epoch": 9.01, "learning_rate": 7.477499999999999e-05, "loss": 2.7063, "step": 1000 }, { "epoch": 9.91, "learning_rate": 7.371466431095406e-05, "loss": 2.0137, "step": 1100 }, { "epoch": 10.81, "learning_rate": 7.238957597173144e-05, "loss": 1.5756, "step": 1200 }, { "epoch": 10.81, "eval_loss": 0.6108289361000061, "eval_runtime": 70.7969, "eval_samples_per_second": 22.953, "eval_steps_per_second": 1.441, "eval_wer": 0.572425678586816, "step": 1200 }, { "epoch": 11.71, "learning_rate": 7.106448763250883e-05, "loss": 1.3527, "step": 1300 }, { "epoch": 12.61, "learning_rate": 6.973939929328621e-05, "loss": 1.2067, "step": 1400 }, { "epoch": 13.51, "learning_rate": 6.841431095406359e-05, "loss": 1.1508, "step": 1500 }, { "epoch": 14.41, "learning_rate": 6.708922261484098e-05, "loss": 1.0995, "step": 1600 }, { "epoch": 14.41, "eval_loss": 0.30910709500312805, "eval_runtime": 70.2841, "eval_samples_per_second": 23.12, "eval_steps_per_second": 1.451, "eval_wer": 0.31538130116329166, "step": 1600 }, { "epoch": 15.32, "learning_rate": 6.576413427561838e-05, "loss": 1.0495, "step": 1700 }, { "epoch": 16.22, "learning_rate": 6.443904593639576e-05, "loss": 1.0183, "step": 1800 }, { "epoch": 17.12, "learning_rate": 6.311395759717314e-05, "loss": 0.9867, "step": 1900 }, { "epoch": 18.02, "learning_rate": 6.178886925795053e-05, "loss": 0.9639, "step": 2000 }, { "epoch": 18.02, "eval_loss": 0.2596471905708313, "eval_runtime": 70.5333, "eval_samples_per_second": 23.039, "eval_steps_per_second": 1.446, "eval_wer": 0.2841016803102111, "step": 2000 }, { "epoch": 18.92, "learning_rate": 6.046378091872791e-05, "loss": 0.9383, "step": 2100 }, { "epoch": 19.82, "learning_rate": 5.913869257950529e-05, "loss": 0.9041, "step": 2200 }, { "epoch": 20.72, "learning_rate": 5.781360424028268e-05, "loss": 0.8936, "step": 2300 }, { "epoch": 21.62, "learning_rate": 5.648851590106007e-05, "loss": 0.9032, "step": 2400 }, { "epoch": 21.62, "eval_loss": 0.22702418267726898, "eval_runtime": 71.1235, "eval_samples_per_second": 22.848, "eval_steps_per_second": 1.434, "eval_wer": 0.2513571736320552, "step": 2400 }, { "epoch": 22.52, "learning_rate": 5.516342756183745e-05, "loss": 0.8739, "step": 2500 }, { "epoch": 23.42, "learning_rate": 5.383833922261484e-05, "loss": 0.8602, "step": 2600 }, { "epoch": 24.32, "learning_rate": 5.2513250883392223e-05, "loss": 0.8483, "step": 2700 }, { "epoch": 25.23, "learning_rate": 5.11881625441696e-05, "loss": 0.8145, "step": 2800 }, { "epoch": 25.23, "eval_loss": 0.21715673804283142, "eval_runtime": 70.4062, "eval_samples_per_second": 23.08, "eval_steps_per_second": 1.449, "eval_wer": 0.24834123222748816, "step": 2800 }, { "epoch": 26.13, "learning_rate": 4.986307420494699e-05, "loss": 0.8245, "step": 2900 }, { "epoch": 27.03, "learning_rate": 4.853798586572438e-05, "loss": 0.8085, "step": 3000 }, { "epoch": 27.93, "learning_rate": 4.721289752650177e-05, "loss": 0.8101, "step": 3100 }, { "epoch": 28.83, "learning_rate": 4.588780918727915e-05, "loss": 0.7845, "step": 3200 }, { "epoch": 28.83, "eval_loss": 0.2083793580532074, "eval_runtime": 70.1439, "eval_samples_per_second": 23.167, "eval_steps_per_second": 1.454, "eval_wer": 0.23326152520465315, "step": 3200 }, { "epoch": 29.73, "learning_rate": 4.4562720848056537e-05, "loss": 0.7816, "step": 3300 }, { "epoch": 30.63, "learning_rate": 4.3237632508833916e-05, "loss": 0.7655, "step": 3400 }, { "epoch": 31.53, "learning_rate": 4.19125441696113e-05, "loss": 0.762, "step": 3500 }, { "epoch": 32.43, "learning_rate": 4.058745583038869e-05, "loss": 0.7694, "step": 3600 }, { "epoch": 32.43, "eval_loss": 0.19744575023651123, "eval_runtime": 70.5594, "eval_samples_per_second": 23.03, "eval_steps_per_second": 1.446, "eval_wer": 0.22343817320120637, "step": 3600 }, { "epoch": 33.33, "learning_rate": 3.926236749116607e-05, "loss": 0.7517, "step": 3700 }, { "epoch": 34.23, "learning_rate": 3.7937279151943456e-05, "loss": 0.7401, "step": 3800 }, { "epoch": 35.14, "learning_rate": 3.661219081272084e-05, "loss": 0.7196, "step": 3900 }, { "epoch": 36.04, "learning_rate": 3.528710247349823e-05, "loss": 0.7333, "step": 4000 }, { "epoch": 36.04, "eval_loss": 0.20204411447048187, "eval_runtime": 69.6934, "eval_samples_per_second": 23.316, "eval_steps_per_second": 1.464, "eval_wer": 0.21852649719948297, "step": 4000 }, { "epoch": 36.94, "learning_rate": 3.3962014134275616e-05, "loss": 0.7251, "step": 4100 }, { "epoch": 37.84, "learning_rate": 3.2636925795053e-05, "loss": 0.7116, "step": 4200 }, { "epoch": 38.74, "learning_rate": 3.131183745583038e-05, "loss": 0.7236, "step": 4300 }, { "epoch": 39.64, "learning_rate": 2.998674911660777e-05, "loss": 0.693, "step": 4400 }, { "epoch": 39.64, "eval_loss": 0.194662407040596, "eval_runtime": 69.8883, "eval_samples_per_second": 23.251, "eval_steps_per_second": 1.459, "eval_wer": 0.21482119775958639, "step": 4400 }, { "epoch": 40.54, "learning_rate": 2.866166077738516e-05, "loss": 0.6943, "step": 4500 }, { "epoch": 41.44, "learning_rate": 2.7336572438162543e-05, "loss": 0.6938, "step": 4600 }, { "epoch": 42.34, "learning_rate": 2.6011484098939926e-05, "loss": 0.6831, "step": 4700 }, { "epoch": 43.24, "learning_rate": 2.469964664310954e-05, "loss": 0.6802, "step": 4800 }, { "epoch": 43.24, "eval_loss": 0.19601202011108398, "eval_runtime": 69.3985, "eval_samples_per_second": 23.416, "eval_steps_per_second": 1.47, "eval_wer": 0.2101680310211116, "step": 4800 }, { "epoch": 44.14, "learning_rate": 2.3374558303886924e-05, "loss": 0.6756, "step": 4900 }, { "epoch": 45.05, "learning_rate": 2.2049469964664307e-05, "loss": 0.67, "step": 5000 }, { "epoch": 45.95, "learning_rate": 2.0724381625441694e-05, "loss": 0.6753, "step": 5100 }, { "epoch": 46.85, "learning_rate": 1.939929328621908e-05, "loss": 0.667, "step": 5200 }, { "epoch": 46.85, "eval_loss": 0.19041335582733154, "eval_runtime": 70.1706, "eval_samples_per_second": 23.158, "eval_steps_per_second": 1.454, "eval_wer": 0.2072382593709608, "step": 5200 }, { "epoch": 47.75, "learning_rate": 1.8074204946996464e-05, "loss": 0.6562, "step": 5300 }, { "epoch": 48.65, "learning_rate": 1.674911660777385e-05, "loss": 0.666, "step": 5400 }, { "epoch": 49.55, "learning_rate": 1.5424028268551237e-05, "loss": 0.6572, "step": 5500 }, { "epoch": 50.45, "learning_rate": 1.409893992932862e-05, "loss": 0.6486, "step": 5600 }, { "epoch": 50.45, "eval_loss": 0.18806982040405273, "eval_runtime": 70.1099, "eval_samples_per_second": 23.178, "eval_steps_per_second": 1.455, "eval_wer": 0.2009478672985782, "step": 5600 }, { "epoch": 51.35, "learning_rate": 1.2773851590106007e-05, "loss": 0.6484, "step": 5700 }, { "epoch": 52.25, "learning_rate": 1.1448763250883392e-05, "loss": 0.6549, "step": 5800 }, { "epoch": 53.15, "learning_rate": 1.0123674911660777e-05, "loss": 0.6322, "step": 5900 }, { "epoch": 54.05, "learning_rate": 8.811837455830388e-06, "loss": 0.6339, "step": 6000 }, { "epoch": 54.05, "eval_loss": 0.1877446174621582, "eval_runtime": 70.1417, "eval_samples_per_second": 23.167, "eval_steps_per_second": 1.454, "eval_wer": 0.1988797931925894, "step": 6000 }, { "epoch": 54.95, "learning_rate": 7.486749116607773e-06, "loss": 0.6413, "step": 6100 }, { "epoch": 55.86, "learning_rate": 6.161660777385159e-06, "loss": 0.6392, "step": 6200 }, { "epoch": 56.76, "learning_rate": 4.836572438162544e-06, "loss": 0.6363, "step": 6300 }, { "epoch": 57.66, "learning_rate": 3.511484098939929e-06, "loss": 0.6254, "step": 6400 }, { "epoch": 57.66, "eval_loss": 0.18933533132076263, "eval_runtime": 69.6111, "eval_samples_per_second": 23.344, "eval_steps_per_second": 1.465, "eval_wer": 0.2002585092632486, "step": 6400 }, { "epoch": 58.56, "learning_rate": 2.1863957597173144e-06, "loss": 0.625, "step": 6500 }, { "epoch": 59.46, "learning_rate": 8.613074204946996e-07, "loss": 0.6379, "step": 6600 }, { "epoch": 60.0, "step": 6660, "total_flos": 3.0269424793140363e+19, "train_loss": 1.3688410950852585, "train_runtime": 13291.4016, "train_samples_per_second": 16.016, "train_steps_per_second": 0.501 } ], "max_steps": 6660, "num_train_epochs": 60, "total_flos": 3.0269424793140363e+19, "trial_name": null, "trial_params": null }