{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "global_step": 1682, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.9970273483947685e-05, "loss": 9.5696, "step": 1 }, { "epoch": 0.03, "learning_rate": 4.913793103448276e-05, "loss": 2.0231, "step": 29 }, { "epoch": 0.07, "learning_rate": 4.827586206896552e-05, "loss": 1.2689, "step": 58 }, { "epoch": 0.1, "learning_rate": 4.741379310344828e-05, "loss": 1.1252, "step": 87 }, { "epoch": 0.14, "learning_rate": 4.655172413793104e-05, "loss": 1.0745, "step": 116 }, { "epoch": 0.17, "learning_rate": 4.5689655172413794e-05, "loss": 1.1696, "step": 145 }, { "epoch": 0.21, "learning_rate": 4.482758620689655e-05, "loss": 1.0006, "step": 174 }, { "epoch": 0.24, "learning_rate": 4.396551724137931e-05, "loss": 1.0273, "step": 203 }, { "epoch": 0.28, "learning_rate": 4.3103448275862066e-05, "loss": 1.1105, "step": 232 }, { "epoch": 0.31, "learning_rate": 4.224137931034483e-05, "loss": 1.2629, "step": 261 }, { "epoch": 0.34, "learning_rate": 4.1379310344827587e-05, "loss": 0.9306, "step": 290 }, { "epoch": 0.38, "learning_rate": 4.0517241379310344e-05, "loss": 0.9233, "step": 319 }, { "epoch": 0.41, "learning_rate": 3.965517241379311e-05, "loss": 0.9108, "step": 348 }, { "epoch": 0.45, "learning_rate": 3.8793103448275865e-05, "loss": 0.9462, "step": 377 }, { "epoch": 0.48, "learning_rate": 3.793103448275862e-05, "loss": 0.9254, "step": 406 }, { "epoch": 0.52, "learning_rate": 3.7068965517241385e-05, "loss": 0.7106, "step": 435 }, { "epoch": 0.55, "learning_rate": 3.620689655172414e-05, "loss": 0.7342, "step": 464 }, { "epoch": 0.59, "learning_rate": 3.53448275862069e-05, "loss": 0.7573, "step": 493 }, { "epoch": 0.62, "learning_rate": 3.4482758620689657e-05, "loss": 0.7451, "step": 522 }, { "epoch": 0.66, "learning_rate": 3.3620689655172414e-05, "loss": 0.714, "step": 551 }, { "epoch": 0.69, "learning_rate": 3.275862068965517e-05, "loss": 0.8191, "step": 580 }, { "epoch": 0.72, "learning_rate": 3.1896551724137935e-05, "loss": 0.6586, "step": 609 }, { "epoch": 0.76, "learning_rate": 3.103448275862069e-05, "loss": 0.7097, "step": 638 }, { "epoch": 0.79, "learning_rate": 3.017241379310345e-05, "loss": 0.6619, "step": 667 }, { "epoch": 0.83, "learning_rate": 2.9310344827586206e-05, "loss": 0.7002, "step": 696 }, { "epoch": 0.86, "learning_rate": 2.844827586206897e-05, "loss": 0.6465, "step": 725 }, { "epoch": 0.9, "learning_rate": 2.7586206896551727e-05, "loss": 0.7087, "step": 754 }, { "epoch": 0.93, "learning_rate": 2.672413793103448e-05, "loss": 0.5199, "step": 783 }, { "epoch": 0.97, "learning_rate": 2.5862068965517244e-05, "loss": 0.5276, "step": 812 }, { "epoch": 1.0, "learning_rate": 2.5e-05, "loss": 0.486, "step": 841 }, { "epoch": 1.0, "eval_cer": 0.042814229249011855, "eval_loss": 0.5168413519859314, "eval_runtime": 53945.1969, "eval_samples_per_second": 0.031, "eval_steps_per_second": 0.004, "step": 841 }, { "epoch": 1.03, "learning_rate": 2.413793103448276e-05, "loss": 0.4464, "step": 870 }, { "epoch": 1.07, "learning_rate": 2.327586206896552e-05, "loss": 0.4375, "step": 899 }, { "epoch": 1.1, "learning_rate": 2.2413793103448276e-05, "loss": 0.411, "step": 928 }, { "epoch": 1.14, "learning_rate": 2.1551724137931033e-05, "loss": 0.4487, "step": 957 }, { "epoch": 1.17, "learning_rate": 2.0689655172413793e-05, "loss": 0.388, "step": 986 }, { "epoch": 1.21, "learning_rate": 1.9827586206896554e-05, "loss": 0.4448, "step": 1015 }, { "epoch": 1.24, "learning_rate": 1.896551724137931e-05, "loss": 0.5106, "step": 1044 }, { "epoch": 1.28, "learning_rate": 1.810344827586207e-05, "loss": 0.3352, "step": 1073 }, { "epoch": 1.31, "learning_rate": 1.7241379310344828e-05, "loss": 0.3674, "step": 1102 }, { "epoch": 1.34, "learning_rate": 1.6379310344827585e-05, "loss": 0.3654, "step": 1131 }, { "epoch": 1.38, "learning_rate": 1.5517241379310346e-05, "loss": 0.3391, "step": 1160 }, { "epoch": 1.41, "learning_rate": 1.4655172413793103e-05, "loss": 0.3538, "step": 1189 }, { "epoch": 1.45, "learning_rate": 1.3793103448275863e-05, "loss": 0.3327, "step": 1218 }, { "epoch": 1.48, "learning_rate": 1.2931034482758622e-05, "loss": 0.2937, "step": 1247 }, { "epoch": 1.52, "learning_rate": 1.206896551724138e-05, "loss": 0.3375, "step": 1276 }, { "epoch": 1.55, "learning_rate": 1.1206896551724138e-05, "loss": 0.3083, "step": 1305 }, { "epoch": 1.59, "learning_rate": 1.0344827586206897e-05, "loss": 0.2782, "step": 1334 }, { "epoch": 1.62, "learning_rate": 9.482758620689655e-06, "loss": 0.2708, "step": 1363 }, { "epoch": 1.66, "learning_rate": 8.620689655172414e-06, "loss": 0.2553, "step": 1392 }, { "epoch": 1.69, "learning_rate": 7.758620689655173e-06, "loss": 0.2646, "step": 1421 }, { "epoch": 1.72, "learning_rate": 6.896551724137932e-06, "loss": 0.2812, "step": 1450 }, { "epoch": 1.76, "learning_rate": 6.03448275862069e-06, "loss": 0.243, "step": 1479 }, { "epoch": 1.79, "learning_rate": 5.172413793103448e-06, "loss": 0.2263, "step": 1508 }, { "epoch": 1.83, "learning_rate": 4.310344827586207e-06, "loss": 0.2212, "step": 1537 }, { "epoch": 1.86, "learning_rate": 3.448275862068966e-06, "loss": 0.2268, "step": 1566 }, { "epoch": 1.9, "learning_rate": 2.586206896551724e-06, "loss": 0.2148, "step": 1595 }, { "epoch": 1.93, "learning_rate": 1.724137931034483e-06, "loss": 0.2399, "step": 1624 }, { "epoch": 1.97, "learning_rate": 8.620689655172415e-07, "loss": 0.2214, "step": 1653 }, { "epoch": 2.0, "learning_rate": 0.0, "loss": 0.2187, "step": 1682 }, { "epoch": 2.0, "eval_cer": 0.0036363636363636364, "eval_loss": 0.24321582913398743, "eval_runtime": 55499.6966, "eval_samples_per_second": 0.03, "eval_steps_per_second": 0.004, "step": 1682 }, { "epoch": 2.0, "step": 1682, "total_flos": 1.9906356553640313e+19, "train_loss": 0.6093255166633234, "train_runtime": 257236.9869, "train_samples_per_second": 0.052, "train_steps_per_second": 0.007 } ], "max_steps": 1682, "num_train_epochs": 2, "total_flos": 1.9906356553640313e+19, "trial_name": null, "trial_params": null }