{ "best_metric": 43.44465912227436, "best_model_checkpoint": "/cosmos/home/sp-operator/ai/training/models/huggingface/scripts/../breeze-dsw-tiny-id/checkpoint-900", "epoch": 6.064, "eval_steps": 100, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 4.898977360288234e-06, "loss": 1.7848, "step": 25 }, { "epoch": 0.05, "learning_rate": 6.160712527409633e-06, "loss": 1.3566, "step": 50 }, { "epoch": 0.07, "learning_rate": 6.85912902234906e-06, "loss": 1.1406, "step": 75 }, { "epoch": 0.1, "learning_rate": 7.344547104469332e-06, "loss": 0.99, "step": 100 }, { "epoch": 0.1, "eval_loss": 0.8486328125, "eval_runtime": 1287.4485, "eval_samples_per_second": 2.829, "eval_steps_per_second": 0.177, "eval_wer": 54.24602079308124, "step": 100 }, { "epoch": 0.12, "learning_rate": 7.716963756434345e-06, "loss": 0.765, "step": 125 }, { "epoch": 0.15, "learning_rate": 8.019180844200955e-06, "loss": 0.5849, "step": 150 }, { "epoch": 1.02, "learning_rate": 8.27351214279797e-06, "loss": 0.7834, "step": 175 }, { "epoch": 1.04, "learning_rate": 8.49307723936858e-06, "loss": 0.7896, "step": 200 }, { "epoch": 1.04, "eval_loss": 0.7578125, "eval_runtime": 1159.7848, "eval_samples_per_second": 3.14, "eval_steps_per_second": 0.197, "eval_wer": 48.389916275646335, "step": 200 }, { "epoch": 1.07, "learning_rate": 8.686247975778677e-06, "loss": 0.8414, "step": 225 }, { "epoch": 1.09, "learning_rate": 8.858694625217149e-06, "loss": 0.6799, "step": 250 }, { "epoch": 1.12, "learning_rate": 9.014436199608479e-06, "loss": 0.5851, "step": 275 }, { "epoch": 1.14, "learning_rate": 9.156425255148058e-06, "loss": 0.4164, "step": 300 }, { "epoch": 1.14, "eval_loss": 0.73876953125, "eval_runtime": 1190.5006, "eval_samples_per_second": 3.059, "eval_steps_per_second": 0.192, "eval_wer": 49.3283650749839, "step": 300 }, { "epoch": 2.01, "learning_rate": 9.28689473531776e-06, "loss": 0.5103, "step": 325 }, { "epoch": 2.04, "learning_rate": 9.407574351377137e-06, "loss": 0.6019, "step": 350 }, { "epoch": 2.06, "learning_rate": 9.519831289296397e-06, "loss": 0.6399, "step": 375 }, { "epoch": 2.09, "learning_rate": 9.624764935335318e-06, "loss": 0.5456, "step": 400 }, { "epoch": 2.09, "eval_loss": 0.7177734375, "eval_runtime": 1156.6175, "eval_samples_per_second": 3.149, "eval_steps_per_second": 0.197, "eval_wer": 45.795381359830714, "step": 400 }, { "epoch": 2.11, "learning_rate": 9.723272550712454e-06, "loss": 0.48, "step": 425 }, { "epoch": 2.14, "learning_rate": 9.816095971633122e-06, "loss": 0.3325, "step": 450 }, { "epoch": 3.01, "learning_rate": 9.90385555539545e-06, "loss": 0.3389, "step": 475 }, { "epoch": 3.03, "learning_rate": 9.987075336738768e-06, "loss": 0.4761, "step": 500 }, { "epoch": 3.03, "eval_loss": 0.7109375, "eval_runtime": 1128.5074, "eval_samples_per_second": 3.227, "eval_steps_per_second": 0.202, "eval_wer": 45.21575121906339, "step": 500 }, { "epoch": 3.06, "learning_rate": 9.600000000000001e-06, "loss": 0.4891, "step": 525 }, { "epoch": 3.08, "learning_rate": 9.100000000000001e-06, "loss": 0.4411, "step": 550 }, { "epoch": 3.11, "learning_rate": 8.6e-06, "loss": 0.367, "step": 575 }, { "epoch": 3.13, "learning_rate": 8.1e-06, "loss": 0.2674, "step": 600 }, { "epoch": 3.13, "eval_loss": 0.70068359375, "eval_runtime": 1151.1688, "eval_samples_per_second": 3.164, "eval_steps_per_second": 0.198, "eval_wer": 44.843131842855826, "step": 600 }, { "epoch": 4.0, "learning_rate": 7.600000000000001e-06, "loss": 0.2263, "step": 625 }, { "epoch": 4.03, "learning_rate": 7.100000000000001e-06, "loss": 0.3527, "step": 650 }, { "epoch": 4.05, "learning_rate": 6.600000000000001e-06, "loss": 0.369, "step": 675 }, { "epoch": 4.08, "learning_rate": 6.1e-06, "loss": 0.3628, "step": 700 }, { "epoch": 4.08, "eval_loss": 0.70263671875, "eval_runtime": 1168.9802, "eval_samples_per_second": 3.116, "eval_steps_per_second": 0.195, "eval_wer": 44.24970098445119, "step": 700 }, { "epoch": 4.1, "learning_rate": 5.600000000000001e-06, "loss": 0.2981, "step": 725 }, { "epoch": 4.13, "learning_rate": 5.1e-06, "loss": 0.2296, "step": 750 }, { "epoch": 4.15, "learning_rate": 4.600000000000001e-06, "loss": 0.1694, "step": 775 }, { "epoch": 5.02, "learning_rate": 4.1e-06, "loss": 0.2565, "step": 800 }, { "epoch": 5.02, "eval_loss": 0.70849609375, "eval_runtime": 1151.0035, "eval_samples_per_second": 3.164, "eval_steps_per_second": 0.198, "eval_wer": 44.50731438034778, "step": 800 }, { "epoch": 5.04, "learning_rate": 3.6000000000000003e-06, "loss": 0.2965, "step": 825 }, { "epoch": 5.07, "learning_rate": 3.1000000000000004e-06, "loss": 0.2987, "step": 850 }, { "epoch": 5.09, "learning_rate": 2.6e-06, "loss": 0.2593, "step": 875 }, { "epoch": 5.12, "learning_rate": 2.1000000000000002e-06, "loss": 0.2147, "step": 900 }, { "epoch": 5.12, "eval_loss": 0.708984375, "eval_runtime": 1111.0874, "eval_samples_per_second": 3.278, "eval_steps_per_second": 0.205, "eval_wer": 43.44465912227436, "step": 900 }, { "epoch": 5.14, "learning_rate": 1.6000000000000001e-06, "loss": 0.1495, "step": 925 }, { "epoch": 6.01, "learning_rate": 1.1e-06, "loss": 0.1969, "step": 950 }, { "epoch": 6.04, "learning_rate": 6.000000000000001e-07, "loss": 0.2518, "step": 975 }, { "epoch": 6.06, "learning_rate": 1.0000000000000001e-07, "loss": 0.28, "step": 1000 }, { "epoch": 6.06, "eval_loss": 0.71337890625, "eval_runtime": 1142.0113, "eval_samples_per_second": 3.189, "eval_steps_per_second": 0.2, "eval_wer": 43.95528567485509, "step": 1000 }, { "epoch": 6.06, "step": 1000, "total_flos": 7.844068896339395e+17, "train_loss": 0.5104879150390625, "train_runtime": 24718.0782, "train_samples_per_second": 1.295, "train_steps_per_second": 0.04 } ], "logging_steps": 25, "max_steps": 1000, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 100, "total_flos": 7.844068896339395e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }