{ "best_metric": 4.192136287689209, "best_model_checkpoint": "./checkpoint-1000", "epoch": 12.658227848101266, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.13, "learning_rate": 7.500000000000001e-09, "loss": 22.5894, "step": 10 }, { "epoch": 0.25, "learning_rate": 1.5000000000000002e-08, "loss": 20.4369, "step": 20 }, { "epoch": 0.38, "learning_rate": 2.25e-08, "loss": 22.056, "step": 30 }, { "epoch": 0.51, "learning_rate": 3.0000000000000004e-08, "loss": 19.983, "step": 40 }, { "epoch": 0.63, "learning_rate": 3.7500000000000005e-08, "loss": 21.5341, "step": 50 }, { "epoch": 0.76, "learning_rate": 4.4249999999999996e-08, "loss": 20.8669, "step": 60 }, { "epoch": 0.89, "learning_rate": 5.1750000000000003e-08, "loss": 21.3542, "step": 70 }, { "epoch": 1.01, "learning_rate": 5.9250000000000004e-08, "loss": 20.587, "step": 80 }, { "epoch": 1.14, "learning_rate": 6.675e-08, "loss": 21.7896, "step": 90 }, { "epoch": 1.27, "learning_rate": 7.425e-08, "loss": 19.9558, "step": 100 }, { "epoch": 1.27, "eval_cer": 3.2660447130816737, "eval_loss": 20.919719696044922, "eval_runtime": 51.6953, "eval_samples_per_second": 9.305, "eval_steps_per_second": 2.341, "eval_wer": 1.0, "step": 100 }, { "epoch": 1.39, "learning_rate": 8.175e-08, "loss": 21.8368, "step": 110 }, { "epoch": 1.52, "learning_rate": 8.925e-08, "loss": 20.2781, "step": 120 }, { "epoch": 1.65, "learning_rate": 9.675e-08, "loss": 20.5284, "step": 130 }, { "epoch": 1.77, "learning_rate": 1.0425000000000001e-07, "loss": 20.2507, "step": 140 }, { "epoch": 1.9, "learning_rate": 1.1174999999999999e-07, "loss": 20.063, "step": 150 }, { "epoch": 2.03, "learning_rate": 1.1925e-07, "loss": 20.3082, "step": 160 }, { "epoch": 2.15, "learning_rate": 1.2675000000000002e-07, "loss": 20.5946, "step": 170 }, { "epoch": 2.28, "learning_rate": 1.3425e-07, "loss": 19.3427, "step": 180 }, { "epoch": 2.41, "learning_rate": 1.4175e-07, "loss": 19.4337, "step": 190 }, { "epoch": 2.53, "learning_rate": 1.4925000000000002e-07, "loss": 19.7186, "step": 200 }, { "epoch": 2.53, "eval_cer": 1.1691851406904061, "eval_loss": 19.244659423828125, "eval_runtime": 51.3422, "eval_samples_per_second": 9.369, "eval_steps_per_second": 2.357, "eval_wer": 1.0, "step": 200 }, { "epoch": 2.66, "learning_rate": 1.56e-07, "loss": 19.3518, "step": 210 }, { "epoch": 2.78, "learning_rate": 1.635e-07, "loss": 18.8406, "step": 220 }, { "epoch": 2.91, "learning_rate": 1.71e-07, "loss": 18.1267, "step": 230 }, { "epoch": 3.04, "learning_rate": 1.785e-07, "loss": 18.2315, "step": 240 }, { "epoch": 3.16, "learning_rate": 1.86e-07, "loss": 17.8997, "step": 250 }, { "epoch": 3.29, "learning_rate": 1.935e-07, "loss": 17.5011, "step": 260 }, { "epoch": 3.42, "learning_rate": 2.01e-07, "loss": 16.8782, "step": 270 }, { "epoch": 3.54, "learning_rate": 2.0850000000000002e-07, "loss": 16.628, "step": 280 }, { "epoch": 3.67, "learning_rate": 2.16e-07, "loss": 15.2879, "step": 290 }, { "epoch": 3.8, "learning_rate": 2.2349999999999998e-07, "loss": 15.203, "step": 300 }, { "epoch": 3.8, "eval_cer": 0.9687083513224335, "eval_loss": 15.005261421203613, "eval_runtime": 54.4312, "eval_samples_per_second": 8.837, "eval_steps_per_second": 1.121, "eval_wer": 0.9998294679399727, "step": 300 }, { "epoch": 3.92, "learning_rate": 2.31e-07, "loss": 13.8484, "step": 310 }, { "epoch": 4.05, "learning_rate": 2.385e-07, "loss": 13.6877, "step": 320 }, { "epoch": 4.18, "learning_rate": 2.46e-07, "loss": 12.1026, "step": 330 }, { "epoch": 4.3, "learning_rate": 2.5350000000000004e-07, "loss": 11.1649, "step": 340 }, { "epoch": 4.43, "learning_rate": 2.6099999999999997e-07, "loss": 9.5398, "step": 350 }, { "epoch": 4.56, "learning_rate": 2.6775e-07, "loss": 8.7484, "step": 360 }, { "epoch": 4.68, "learning_rate": 2.7525e-07, "loss": 7.7766, "step": 370 }, { "epoch": 4.81, "learning_rate": 2.8275e-07, "loss": 7.2303, "step": 380 }, { "epoch": 4.94, "learning_rate": 2.9025e-07, "loss": 6.5927, "step": 390 }, { "epoch": 5.06, "learning_rate": 2.9775000000000003e-07, "loss": 6.4303, "step": 400 }, { "epoch": 5.06, "eval_cer": 0.9911212058641338, "eval_loss": 6.543659687042236, "eval_runtime": 55.4335, "eval_samples_per_second": 8.677, "eval_steps_per_second": 1.1, "eval_wer": 0.9631650750341064, "step": 400 }, { "epoch": 5.19, "learning_rate": 3.0525e-07, "loss": 5.9555, "step": 410 }, { "epoch": 5.32, "learning_rate": 3.1275e-07, "loss": 5.9212, "step": 420 }, { "epoch": 5.44, "learning_rate": 3.2025e-07, "loss": 5.4995, "step": 430 }, { "epoch": 5.57, "learning_rate": 3.2775e-07, "loss": 5.5903, "step": 440 }, { "epoch": 5.7, "learning_rate": 3.3525000000000004e-07, "loss": 5.1376, "step": 450 }, { "epoch": 5.82, "learning_rate": 3.4275e-07, "loss": 4.9402, "step": 460 }, { "epoch": 5.95, "learning_rate": 3.5025e-07, "loss": 4.9112, "step": 470 }, { "epoch": 6.08, "learning_rate": 3.5775e-07, "loss": 4.8001, "step": 480 }, { "epoch": 6.2, "learning_rate": 3.6525e-07, "loss": 4.482, "step": 490 }, { "epoch": 6.33, "learning_rate": 3.7275e-07, "loss": 4.5712, "step": 500 }, { "epoch": 6.33, "eval_cer": 0.9545736113978939, "eval_loss": 4.904029369354248, "eval_runtime": 64.6266, "eval_samples_per_second": 7.443, "eval_steps_per_second": 0.944, "eval_wer": 0.9322987721691678, "step": 500 }, { "epoch": 6.46, "learning_rate": 3.8025000000000003e-07, "loss": 4.3992, "step": 510 }, { "epoch": 6.58, "learning_rate": 3.8775e-07, "loss": 4.1942, "step": 520 }, { "epoch": 6.71, "learning_rate": 3.9525000000000005e-07, "loss": 4.339, "step": 530 }, { "epoch": 6.84, "learning_rate": 4.0275000000000003e-07, "loss": 4.2826, "step": 540 }, { "epoch": 6.96, "learning_rate": 4.1025000000000006e-07, "loss": 4.3752, "step": 550 }, { "epoch": 7.09, "learning_rate": 4.1775000000000004e-07, "loss": 3.9576, "step": 560 }, { "epoch": 7.22, "learning_rate": 4.2524999999999997e-07, "loss": 4.0814, "step": 570 }, { "epoch": 7.34, "learning_rate": 4.3274999999999995e-07, "loss": 4.3274, "step": 580 }, { "epoch": 7.47, "learning_rate": 4.4025e-07, "loss": 3.9749, "step": 590 }, { "epoch": 7.59, "learning_rate": 4.4774999999999997e-07, "loss": 3.8373, "step": 600 }, { "epoch": 7.72, "learning_rate": 4.5525e-07, "loss": 4.1076, "step": 610 }, { "epoch": 7.85, "learning_rate": 4.6275e-07, "loss": 3.8279, "step": 620 }, { "epoch": 7.97, "learning_rate": 4.7025e-07, "loss": 4.0263, "step": 630 }, { "epoch": 8.1, "learning_rate": 4.7775e-07, "loss": 3.8141, "step": 640 }, { "epoch": 8.23, "learning_rate": 4.8525e-07, "loss": 3.8791, "step": 650 }, { "epoch": 8.35, "learning_rate": 4.927500000000001e-07, "loss": 3.6906, "step": 660 }, { "epoch": 8.48, "learning_rate": 5.0025e-07, "loss": 4.0741, "step": 670 }, { "epoch": 8.61, "learning_rate": 5.0775e-07, "loss": 3.7322, "step": 680 }, { "epoch": 8.73, "learning_rate": 5.152500000000001e-07, "loss": 4.1834, "step": 690 }, { "epoch": 8.86, "learning_rate": 5.2275e-07, "loss": 3.8571, "step": 700 }, { "epoch": 8.99, "learning_rate": 5.3025e-07, "loss": 3.9223, "step": 710 }, { "epoch": 9.11, "learning_rate": 5.3775e-07, "loss": 3.6997, "step": 720 }, { "epoch": 9.24, "learning_rate": 5.4525e-07, "loss": 4.0024, "step": 730 }, { "epoch": 9.37, "learning_rate": 5.5275e-07, "loss": 3.7653, "step": 740 }, { "epoch": 9.49, "learning_rate": 5.602500000000001e-07, "loss": 3.873, "step": 750 }, { "epoch": 9.62, "learning_rate": 5.6775e-07, "loss": 3.597, "step": 760 }, { "epoch": 9.75, "learning_rate": 5.7525e-07, "loss": 3.7902, "step": 770 }, { "epoch": 9.87, "learning_rate": 5.827500000000001e-07, "loss": 3.8314, "step": 780 }, { "epoch": 10.0, "learning_rate": 5.902500000000001e-07, "loss": 3.956, "step": 790 }, { "epoch": 10.13, "learning_rate": 5.9775e-07, "loss": 3.6297, "step": 800 }, { "epoch": 10.25, "learning_rate": 6.0525e-07, "loss": 4.0449, "step": 810 }, { "epoch": 10.38, "learning_rate": 6.1275e-07, "loss": 3.5669, "step": 820 }, { "epoch": 10.51, "learning_rate": 6.2025e-07, "loss": 3.6994, "step": 830 }, { "epoch": 10.63, "learning_rate": 6.277499999999999e-07, "loss": 3.6146, "step": 840 }, { "epoch": 10.76, "learning_rate": 6.3525e-07, "loss": 3.8637, "step": 850 }, { "epoch": 10.89, "learning_rate": 6.4275e-07, "loss": 3.7, "step": 860 }, { "epoch": 11.01, "learning_rate": 6.5025e-07, "loss": 3.8526, "step": 870 }, { "epoch": 11.14, "learning_rate": 6.5775e-07, "loss": 3.7276, "step": 880 }, { "epoch": 11.27, "learning_rate": 6.6525e-07, "loss": 3.989, "step": 890 }, { "epoch": 11.39, "learning_rate": 6.7275e-07, "loss": 3.7838, "step": 900 }, { "epoch": 11.52, "learning_rate": 6.802500000000001e-07, "loss": 3.698, "step": 910 }, { "epoch": 11.65, "learning_rate": 6.8775e-07, "loss": 3.4391, "step": 920 }, { "epoch": 11.77, "learning_rate": 6.9525e-07, "loss": 3.7371, "step": 930 }, { "epoch": 11.9, "learning_rate": 7.027500000000001e-07, "loss": 3.4201, "step": 940 }, { "epoch": 12.03, "learning_rate": 7.1025e-07, "loss": 3.7721, "step": 950 }, { "epoch": 12.15, "learning_rate": 7.1775e-07, "loss": 3.7842, "step": 960 }, { "epoch": 12.28, "learning_rate": 7.2525e-07, "loss": 3.8125, "step": 970 }, { "epoch": 12.41, "learning_rate": 7.3275e-07, "loss": 3.7673, "step": 980 }, { "epoch": 12.53, "learning_rate": 7.395e-07, "loss": 3.7048, "step": 990 }, { "epoch": 12.66, "learning_rate": 7.47e-07, "loss": 3.3986, "step": 1000 }, { "epoch": 12.66, "eval_cer": 0.9608244326394233, "eval_loss": 4.192136287689209, "eval_runtime": 52.6187, "eval_samples_per_second": 9.141, "eval_steps_per_second": 1.159, "eval_wer": 0.9294849931787176, "step": 1000 }, { "epoch": 12.66, "step": 1000, "total_flos": 3.5131981783950033e+19, "train_loss": 1.9340453338623047, "train_runtime": 2875.5301, "train_samples_per_second": 11.128, "train_steps_per_second": 0.348 } ], "max_steps": 1000, "num_train_epochs": 13, "total_flos": 3.5131981783950033e+19, "trial_name": null, "trial_params": null }