{ "best_metric": null, "best_model_checkpoint": null, "epoch": 18.823529411764707, "global_step": 5600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.67, "eval_loss": 0.8018701076507568, "eval_runtime": 123.0686, "eval_samples_per_second": 17.892, "eval_steps_per_second": 2.243, "eval_wer": 0.8813849519642347, "step": 200 }, { "epoch": 1.34, "learning_rate": 0.0007940000000000001, "loss": 1.228, "step": 400 }, { "epoch": 1.34, "eval_loss": 1.0962814092636108, "eval_runtime": 123.3569, "eval_samples_per_second": 17.851, "eval_steps_per_second": 2.237, "eval_wer": 0.9425473223627889, "step": 400 }, { "epoch": 2.02, "eval_loss": 1.104669451713562, "eval_runtime": 123.8837, "eval_samples_per_second": 17.775, "eval_steps_per_second": 2.228, "eval_wer": 0.9653762008941311, "step": 600 }, { "epoch": 2.69, "learning_rate": 0.0009454044117647059, "loss": 0.7814, "step": 800 }, { "epoch": 2.69, "eval_loss": 0.9275535345077515, "eval_runtime": 124.6162, "eval_samples_per_second": 17.67, "eval_steps_per_second": 2.215, "eval_wer": 0.9294207172072672, "step": 800 }, { "epoch": 3.36, "eval_loss": 0.8205996751785278, "eval_runtime": 124.5979, "eval_samples_per_second": 17.673, "eval_steps_per_second": 2.215, "eval_wer": 0.9016455816608009, "step": 1000 }, { "epoch": 4.03, "learning_rate": 0.0008718749999999999, "loss": 0.617, "step": 1200 }, { "epoch": 4.03, "eval_loss": 0.7664089798927307, "eval_runtime": 124.9893, "eval_samples_per_second": 17.618, "eval_steps_per_second": 2.208, "eval_wer": 0.9076381622752783, "step": 1200 }, { "epoch": 4.71, "eval_loss": 0.8046385049819946, "eval_runtime": 124.9226, "eval_samples_per_second": 17.627, "eval_steps_per_second": 2.209, "eval_wer": 0.902787025587368, "step": 1400 }, { "epoch": 5.38, "learning_rate": 0.0007983455882352941, "loss": 0.5312, "step": 1600 }, { "epoch": 5.38, "eval_loss": 0.7806461453437805, "eval_runtime": 125.1179, "eval_samples_per_second": 17.599, "eval_steps_per_second": 2.206, "eval_wer": 0.9512983924664701, "step": 1600 }, { "epoch": 6.05, "eval_loss": 0.7088819146156311, "eval_runtime": 127.0024, "eval_samples_per_second": 17.338, "eval_steps_per_second": 2.173, "eval_wer": 0.8616950442309521, "step": 1800 }, { "epoch": 6.72, "learning_rate": 0.0007248161764705882, "loss": 0.4404, "step": 2000 }, { "epoch": 6.72, "eval_loss": 0.7011399269104004, "eval_runtime": 125.0855, "eval_samples_per_second": 17.604, "eval_steps_per_second": 2.206, "eval_wer": 0.8952725197374679, "step": 2000 }, { "epoch": 7.39, "eval_loss": 0.6890342235565186, "eval_runtime": 125.964, "eval_samples_per_second": 17.481, "eval_steps_per_second": 2.191, "eval_wer": 0.8790069437838867, "step": 2200 }, { "epoch": 8.07, "learning_rate": 0.0006512867647058823, "loss": 0.3961, "step": 2400 }, { "epoch": 8.07, "eval_loss": 0.6665045619010925, "eval_runtime": 125.5126, "eval_samples_per_second": 17.544, "eval_steps_per_second": 2.199, "eval_wer": 0.8834775991629411, "step": 2400 }, { "epoch": 8.74, "eval_loss": 0.6596990823745728, "eval_runtime": 125.1836, "eval_samples_per_second": 17.59, "eval_steps_per_second": 2.205, "eval_wer": 0.8537049367449824, "step": 2600 }, { "epoch": 9.41, "learning_rate": 0.0005777573529411765, "loss": 0.3402, "step": 2800 }, { "epoch": 9.41, "eval_loss": 0.6772456765174866, "eval_runtime": 125.8045, "eval_samples_per_second": 17.503, "eval_steps_per_second": 2.194, "eval_wer": 0.8308760582136403, "step": 2800 }, { "epoch": 10.08, "eval_loss": 0.6295859813690186, "eval_runtime": 125.3009, "eval_samples_per_second": 17.574, "eval_steps_per_second": 2.203, "eval_wer": 0.8315419005041378, "step": 3000 }, { "epoch": 10.76, "learning_rate": 0.0005042279411764706, "loss": 0.2939, "step": 3200 }, { "epoch": 10.76, "eval_loss": 0.6442817449569702, "eval_runtime": 125.5719, "eval_samples_per_second": 17.536, "eval_steps_per_second": 2.198, "eval_wer": 0.8326833444307048, "step": 3200 }, { "epoch": 11.43, "eval_loss": 0.5965331196784973, "eval_runtime": 126.3146, "eval_samples_per_second": 17.433, "eval_steps_per_second": 2.185, "eval_wer": 0.8219347474555313, "step": 3400 }, { "epoch": 12.1, "learning_rate": 0.00043069852941176473, "loss": 0.2325, "step": 3600 }, { "epoch": 12.1, "eval_loss": 0.613932728767395, "eval_runtime": 125.4233, "eval_samples_per_second": 17.557, "eval_steps_per_second": 2.201, "eval_wer": 0.8033862836488157, "step": 3600 }, { "epoch": 12.77, "eval_loss": 0.5964781641960144, "eval_runtime": 125.9783, "eval_samples_per_second": 17.479, "eval_steps_per_second": 2.191, "eval_wer": 0.8020545990678208, "step": 3800 }, { "epoch": 13.45, "learning_rate": 0.0003571691176470588, "loss": 0.1823, "step": 4000 }, { "epoch": 13.45, "eval_loss": 0.640591561794281, "eval_runtime": 125.8961, "eval_samples_per_second": 17.491, "eval_steps_per_second": 2.192, "eval_wer": 0.8089032626272235, "step": 4000 }, { "epoch": 14.12, "eval_loss": 0.6114814877510071, "eval_runtime": 126.0685, "eval_samples_per_second": 17.467, "eval_steps_per_second": 2.189, "eval_wer": 0.797964424997622, "step": 4200 }, { "epoch": 14.79, "learning_rate": 0.0002836397058823529, "loss": 0.138, "step": 4400 }, { "epoch": 14.79, "eval_loss": 0.6256727576255798, "eval_runtime": 126.3311, "eval_samples_per_second": 17.43, "eval_steps_per_second": 2.185, "eval_wer": 0.7777989156282697, "step": 4400 }, { "epoch": 15.46, "eval_loss": 0.6141476035118103, "eval_runtime": 126.0957, "eval_samples_per_second": 17.463, "eval_steps_per_second": 2.189, "eval_wer": 0.7941596119090649, "step": 4600 }, { "epoch": 16.13, "learning_rate": 0.00021011029411764707, "loss": 0.0784, "step": 4800 }, { "epoch": 16.13, "eval_loss": 0.6295885443687439, "eval_runtime": 126.3665, "eval_samples_per_second": 17.425, "eval_steps_per_second": 2.184, "eval_wer": 0.7922572053647865, "step": 4800 }, { "epoch": 16.81, "eval_loss": 0.5918190479278564, "eval_runtime": 126.3879, "eval_samples_per_second": 17.423, "eval_steps_per_second": 2.184, "eval_wer": 0.7728526586131457, "step": 5000 }, { "epoch": 17.48, "learning_rate": 0.00013658088235294117, "loss": 0.0344, "step": 5200 }, { "epoch": 17.48, "eval_loss": 0.5919037461280823, "eval_runtime": 126.2976, "eval_samples_per_second": 17.435, "eval_steps_per_second": 2.185, "eval_wer": 0.7636259868733949, "step": 5200 }, { "epoch": 18.15, "eval_loss": 0.576080858707428, "eval_runtime": 126.7739, "eval_samples_per_second": 17.37, "eval_steps_per_second": 2.177, "eval_wer": 0.7495481784457338, "step": 5400 }, { "epoch": 18.82, "learning_rate": 6.30514705882353e-05, "loss": -0.0038, "step": 5600 }, { "epoch": 18.82, "eval_loss": 0.5868940949440002, "eval_runtime": 126.4596, "eval_samples_per_second": 17.413, "eval_steps_per_second": 2.183, "eval_wer": 0.7588699705126986, "step": 5600 } ], "max_steps": 5940, "num_train_epochs": 20, "total_flos": 4.6199251281753915e+19, "trial_name": null, "trial_params": null }