{ "best_metric": null, "best_model_checkpoint": null, "epoch": 15.0, "eval_steps": 300, "global_step": 16425, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.27, "eval_loss": 7.266334533691406, "eval_runtime": 325.0132, "eval_samples_per_second": 33.952, "eval_steps_per_second": 1.061, "eval_wer": 1.0, "step": 300 }, { "epoch": 0.46, "learning_rate": 7.14420628442998e-05, "loss": 10.5256, "step": 500 }, { "epoch": 0.55, "eval_loss": 3.0892837047576904, "eval_runtime": 316.7833, "eval_samples_per_second": 34.835, "eval_steps_per_second": 1.089, "eval_wer": 1.0, "step": 600 }, { "epoch": 0.82, "eval_loss": 3.061225652694702, "eval_runtime": 483.1327, "eval_samples_per_second": 22.841, "eval_steps_per_second": 0.714, "eval_wer": 1.0, "step": 900 }, { "epoch": 0.91, "learning_rate": 0.0001434602713566988, "loss": 2.9795, "step": 1000 }, { "epoch": 1.1, "eval_loss": 2.9936766624450684, "eval_runtime": 320.3828, "eval_samples_per_second": 34.443, "eval_steps_per_second": 1.077, "eval_wer": 1.0, "step": 1200 }, { "epoch": 1.37, "learning_rate": 0.0002154784798690978, "loss": 2.9564, "step": 1500 }, { "epoch": 1.37, "eval_loss": 3.242413282394409, "eval_runtime": 328.1378, "eval_samples_per_second": 33.629, "eval_steps_per_second": 1.051, "eval_wer": 1.0, "step": 1500 }, { "epoch": 1.64, "eval_loss": 3.2866387367248535, "eval_runtime": 325.8189, "eval_samples_per_second": 33.869, "eval_steps_per_second": 1.059, "eval_wer": 1.0, "step": 1800 }, { "epoch": 1.83, "learning_rate": 0.0002874966883814968, "loss": 3.1552, "step": 2000 }, { "epoch": 1.92, "eval_loss": 3.6338589191436768, "eval_runtime": 322.0052, "eval_samples_per_second": 34.27, "eval_steps_per_second": 1.071, "eval_wer": 1.0, "step": 2100 }, { "epoch": 2.19, "eval_loss": 3.1184866428375244, "eval_runtime": 316.2742, "eval_samples_per_second": 34.891, "eval_steps_per_second": 1.091, "eval_wer": 1.0, "step": 2400 }, { "epoch": 2.28, "learning_rate": 0.00035951489689389575, "loss": 3.2079, "step": 2500 }, { "epoch": 2.47, "eval_loss": 3.183176040649414, "eval_runtime": 316.7437, "eval_samples_per_second": 34.839, "eval_steps_per_second": 1.089, "eval_wer": 1.0, "step": 2700 }, { "epoch": 2.74, "learning_rate": 0.00043153310540629475, "loss": 3.1275, "step": 3000 }, { "epoch": 2.74, "eval_loss": 3.3952367305755615, "eval_runtime": 319.4373, "eval_samples_per_second": 34.545, "eval_steps_per_second": 1.08, "eval_wer": 1.0, "step": 3000 }, { "epoch": 3.01, "eval_loss": 3.2981579303741455, "eval_runtime": 327.568, "eval_samples_per_second": 33.688, "eval_steps_per_second": 1.053, "eval_wer": 1.0, "step": 3300 }, { "epoch": 3.2, "learning_rate": 0.0005035513139186939, "loss": 3.0987, "step": 3500 }, { "epoch": 3.29, "eval_loss": 3.103595733642578, "eval_runtime": 327.3342, "eval_samples_per_second": 33.712, "eval_steps_per_second": 1.054, "eval_wer": 1.0, "step": 3600 }, { "epoch": 3.56, "eval_loss": 3.1222941875457764, "eval_runtime": 312.8357, "eval_samples_per_second": 35.274, "eval_steps_per_second": 1.103, "eval_wer": 1.0, "step": 3900 }, { "epoch": 3.65, "learning_rate": 0.0005755695224310928, "loss": 2.9301, "step": 4000 }, { "epoch": 3.84, "eval_loss": 3.114525556564331, "eval_runtime": 308.5965, "eval_samples_per_second": 35.759, "eval_steps_per_second": 1.118, "eval_wer": 1.0, "step": 4200 }, { "epoch": 4.11, "learning_rate": 0.0006475877309434917, "loss": 2.9197, "step": 4500 }, { "epoch": 4.11, "eval_loss": 3.0324432849884033, "eval_runtime": 308.9122, "eval_samples_per_second": 35.722, "eval_steps_per_second": 1.117, "eval_wer": 1.0, "step": 4500 }, { "epoch": 4.38, "eval_loss": 2.999401807785034, "eval_runtime": 308.612, "eval_samples_per_second": 35.757, "eval_steps_per_second": 1.118, "eval_wer": 1.9598550067965563, "step": 4800 }, { "epoch": 4.57, "learning_rate": 0.0007196059394558908, "loss": 2.9023, "step": 5000 }, { "epoch": 4.66, "eval_loss": 2.991722822189331, "eval_runtime": 309.103, "eval_samples_per_second": 35.7, "eval_steps_per_second": 1.116, "eval_wer": 1.8240144993203444, "step": 5100 }, { "epoch": 4.93, "eval_loss": 2.9946165084838867, "eval_runtime": 308.4148, "eval_samples_per_second": 35.78, "eval_steps_per_second": 1.119, "eval_wer": 1.958948799275034, "step": 5400 }, { "epoch": 5.02, "learning_rate": 0.0006889164121067909, "loss": 2.9007, "step": 5500 }, { "epoch": 5.21, "eval_loss": 3.195502519607544, "eval_runtime": 308.3861, "eval_samples_per_second": 35.783, "eval_steps_per_second": 1.119, "eval_wer": 1.0, "step": 5700 }, { "epoch": 5.48, "learning_rate": 0.0006574616321963427, "loss": 3.1887, "step": 6000 }, { "epoch": 5.48, "eval_loss": 3.1901698112487793, "eval_runtime": 308.4713, "eval_samples_per_second": 35.773, "eval_steps_per_second": 1.118, "eval_wer": 1.0, "step": 6000 }, { "epoch": 5.75, "eval_loss": 3.167245864868164, "eval_runtime": 308.5576, "eval_samples_per_second": 35.763, "eval_steps_per_second": 1.118, "eval_wer": 1.0, "step": 6300 }, { "epoch": 5.94, "learning_rate": 0.0006259438166548115, "loss": 3.135, "step": 6500 }, { "epoch": 6.03, "eval_loss": 3.2076234817504883, "eval_runtime": 308.784, "eval_samples_per_second": 35.737, "eval_steps_per_second": 1.117, "eval_wer": 1.0, "step": 6600 }, { "epoch": 6.3, "eval_loss": 3.212040424346924, "eval_runtime": 309.3257, "eval_samples_per_second": 35.674, "eval_steps_per_second": 1.115, "eval_wer": 1.0, "step": 6900 }, { "epoch": 6.39, "learning_rate": 0.0005944260011132802, "loss": 3.1482, "step": 7000 }, { "epoch": 6.58, "eval_loss": 3.1832025051116943, "eval_runtime": 308.2925, "eval_samples_per_second": 35.794, "eval_steps_per_second": 1.119, "eval_wer": 1.0, "step": 7200 }, { "epoch": 6.85, "learning_rate": 0.0005629081855717488, "loss": 3.1546, "step": 7500 }, { "epoch": 6.85, "eval_loss": 3.1799111366271973, "eval_runtime": 308.7059, "eval_samples_per_second": 35.746, "eval_steps_per_second": 1.118, "eval_wer": 1.0, "step": 7500 }, { "epoch": 7.12, "eval_loss": 3.2451581954956055, "eval_runtime": 307.3881, "eval_samples_per_second": 35.899, "eval_steps_per_second": 1.122, "eval_wer": 1.0, "step": 7800 }, { "epoch": 7.31, "learning_rate": 0.0005313903700302176, "loss": 3.1567, "step": 8000 }, { "epoch": 7.4, "eval_loss": 3.2318718433380127, "eval_runtime": 308.2983, "eval_samples_per_second": 35.793, "eval_steps_per_second": 1.119, "eval_wer": 1.0, "step": 8100 }, { "epoch": 7.67, "eval_loss": 3.222830057144165, "eval_runtime": 308.3145, "eval_samples_per_second": 35.791, "eval_steps_per_second": 1.119, "eval_wer": 1.0, "step": 8400 }, { "epoch": 7.76, "learning_rate": 0.0004998725544886862, "loss": 3.1719, "step": 8500 }, { "epoch": 7.95, "eval_loss": 3.2054970264434814, "eval_runtime": 308.0159, "eval_samples_per_second": 35.826, "eval_steps_per_second": 1.12, "eval_wer": 1.0, "step": 8700 }, { "epoch": 8.22, "learning_rate": 0.00046835473894715497, "loss": 3.168, "step": 9000 }, { "epoch": 8.22, "eval_loss": 3.2552778720855713, "eval_runtime": 303.2884, "eval_samples_per_second": 36.385, "eval_steps_per_second": 1.138, "eval_wer": 1.0, "step": 9000 }, { "epoch": 8.49, "eval_loss": 3.197523593902588, "eval_runtime": 305.1147, "eval_samples_per_second": 36.167, "eval_steps_per_second": 1.131, "eval_wer": 1.0, "step": 9300 }, { "epoch": 8.68, "learning_rate": 0.0004368369234056237, "loss": 3.1643, "step": 9500 }, { "epoch": 8.77, "eval_loss": 3.2445874214172363, "eval_runtime": 303.4296, "eval_samples_per_second": 36.368, "eval_steps_per_second": 1.137, "eval_wer": 1.0, "step": 9600 }, { "epoch": 9.04, "eval_loss": 3.2781076431274414, "eval_runtime": 305.7237, "eval_samples_per_second": 36.095, "eval_steps_per_second": 1.128, "eval_wer": 1.0, "step": 9900 }, { "epoch": 9.13, "learning_rate": 0.0004053191078640924, "loss": 3.169, "step": 10000 }, { "epoch": 9.32, "eval_loss": 3.2596964836120605, "eval_runtime": 306.6385, "eval_samples_per_second": 35.987, "eval_steps_per_second": 1.125, "eval_wer": 1.0, "step": 10200 }, { "epoch": 9.59, "learning_rate": 0.00037380129232256106, "loss": 3.1789, "step": 10500 }, { "epoch": 9.59, "eval_loss": 3.2585501670837402, "eval_runtime": 307.6539, "eval_samples_per_second": 35.868, "eval_steps_per_second": 1.121, "eval_wer": 1.0, "step": 10500 }, { "epoch": 9.86, "eval_loss": 3.2689764499664307, "eval_runtime": 307.738, "eval_samples_per_second": 35.858, "eval_steps_per_second": 1.121, "eval_wer": 1.0, "step": 10800 }, { "epoch": 10.05, "learning_rate": 0.0003422834767810298, "loss": 3.1701, "step": 11000 }, { "epoch": 10.14, "eval_loss": 3.273723602294922, "eval_runtime": 308.5346, "eval_samples_per_second": 35.766, "eval_steps_per_second": 1.118, "eval_wer": 1.0, "step": 11100 }, { "epoch": 10.41, "eval_loss": 3.273848533630371, "eval_runtime": 308.8614, "eval_samples_per_second": 35.728, "eval_steps_per_second": 1.117, "eval_wer": 1.0, "step": 11400 }, { "epoch": 10.5, "learning_rate": 0.00031076566123949855, "loss": 3.1698, "step": 11500 }, { "epoch": 10.68, "eval_loss": 3.2595293521881104, "eval_runtime": 308.7628, "eval_samples_per_second": 35.739, "eval_steps_per_second": 1.117, "eval_wer": 1.0, "step": 11700 }, { "epoch": 10.96, "learning_rate": 0.00027924784569796727, "loss": 3.1595, "step": 12000 }, { "epoch": 10.96, "eval_loss": 3.2467362880706787, "eval_runtime": 308.3094, "eval_samples_per_second": 35.792, "eval_steps_per_second": 1.119, "eval_wer": 1.0, "step": 12000 }, { "epoch": 11.23, "eval_loss": 3.252420663833618, "eval_runtime": 309.0904, "eval_samples_per_second": 35.702, "eval_steps_per_second": 1.116, "eval_wer": 1.0, "step": 12300 }, { "epoch": 11.42, "learning_rate": 0.00024773003015643593, "loss": 3.15, "step": 12500 }, { "epoch": 11.51, "eval_loss": 3.2327377796173096, "eval_runtime": 308.9397, "eval_samples_per_second": 35.719, "eval_steps_per_second": 1.117, "eval_wer": 1.0, "step": 12600 }, { "epoch": 11.78, "eval_loss": 3.219557046890259, "eval_runtime": 309.2594, "eval_samples_per_second": 35.682, "eval_steps_per_second": 1.116, "eval_wer": 1.0, "step": 12900 }, { "epoch": 11.87, "learning_rate": 0.00021621221461490465, "loss": 3.1444, "step": 13000 }, { "epoch": 12.05, "eval_loss": 3.1942968368530273, "eval_runtime": 309.871, "eval_samples_per_second": 35.612, "eval_steps_per_second": 1.113, "eval_wer": 1.0, "step": 13200 }, { "epoch": 12.33, "learning_rate": 0.00018469439907337336, "loss": 3.132, "step": 13500 }, { "epoch": 12.33, "eval_loss": 3.191138744354248, "eval_runtime": 309.3206, "eval_samples_per_second": 35.675, "eval_steps_per_second": 1.115, "eval_wer": 1.0, "step": 13500 }, { "epoch": 12.6, "eval_loss": 3.207465648651123, "eval_runtime": 309.5517, "eval_samples_per_second": 35.648, "eval_steps_per_second": 1.115, "eval_wer": 1.0, "step": 13800 }, { "epoch": 12.79, "learning_rate": 0.00015323961916292511, "loss": 3.1153, "step": 14000 }, { "epoch": 12.88, "eval_loss": 3.1938300132751465, "eval_runtime": 310.0376, "eval_samples_per_second": 35.592, "eval_steps_per_second": 1.113, "eval_wer": 1.0, "step": 14100 }, { "epoch": 13.15, "eval_loss": 3.1638731956481934, "eval_runtime": 308.9592, "eval_samples_per_second": 35.717, "eval_steps_per_second": 1.117, "eval_wer": 1.0, "step": 14400 }, { "epoch": 13.24, "learning_rate": 0.00012172180362139385, "loss": 3.1039, "step": 14500 }, { "epoch": 13.42, "eval_loss": 3.15146803855896, "eval_runtime": 308.6922, "eval_samples_per_second": 35.748, "eval_steps_per_second": 1.118, "eval_wer": 1.0, "step": 14700 }, { "epoch": 13.7, "learning_rate": 9.020398807986256e-05, "loss": 3.0839, "step": 15000 }, { "epoch": 13.7, "eval_loss": 3.153453826904297, "eval_runtime": 309.1197, "eval_samples_per_second": 35.698, "eval_steps_per_second": 1.116, "eval_wer": 1.0, "step": 15000 }, { "epoch": 13.97, "eval_loss": 3.130723237991333, "eval_runtime": 309.6167, "eval_samples_per_second": 35.641, "eval_steps_per_second": 1.114, "eval_wer": 1.0, "step": 15300 }, { "epoch": 14.16, "learning_rate": 5.8686172538331265e-05, "loss": 3.0632, "step": 15500 }, { "epoch": 14.25, "eval_loss": 3.1138317584991455, "eval_runtime": 309.4562, "eval_samples_per_second": 35.659, "eval_steps_per_second": 1.115, "eval_wer": 1.0, "step": 15600 }, { "epoch": 14.52, "eval_loss": 3.128912925720215, "eval_runtime": 309.4874, "eval_samples_per_second": 35.656, "eval_steps_per_second": 1.115, "eval_wer": 1.0, "step": 15900 }, { "epoch": 14.61, "learning_rate": 2.7168356996799972e-05, "loss": 3.0518, "step": 16000 }, { "epoch": 14.79, "eval_loss": 3.081491708755493, "eval_runtime": 308.8218, "eval_samples_per_second": 35.733, "eval_steps_per_second": 1.117, "eval_wer": 1.0, "step": 16200 }, { "epoch": 15.0, "step": 16425, "total_flos": 6.442470243808035e+19, "train_loss": 3.3253096312547563, "train_runtime": 44962.3834, "train_samples_per_second": 14.612, "train_steps_per_second": 0.365 } ], "logging_steps": 500, "max_steps": 16425, "num_input_tokens_seen": 0, "num_train_epochs": 15, "save_steps": 400, "total_flos": 6.442470243808035e+19, "train_batch_size": 20, "trial_name": null, "trial_params": null }