{ "best_metric": null, "best_model_checkpoint": null, "epoch": 19.946666666666665, "eval_steps": 500, "global_step": 3740, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.53, "learning_rate": 0.0001, "loss": 0.5411, "step": 100 }, { "epoch": 1.0, "eval_accuracy": 0.7903686274509804, "eval_loss": 0.39375776052474976, "eval_runtime": 7.7091, "eval_samples_per_second": 64.858, "eval_steps_per_second": 8.172, "step": 187 }, { "epoch": 1.07, "learning_rate": 0.0001, "loss": 0.4081, "step": 200 }, { "epoch": 1.6, "learning_rate": 0.0001, "loss": 0.362, "step": 300 }, { "epoch": 2.0, "eval_accuracy": 0.7917960784313726, "eval_loss": 0.3804325461387634, "eval_runtime": 7.4651, "eval_samples_per_second": 66.979, "eval_steps_per_second": 8.439, "step": 375 }, { "epoch": 2.13, "learning_rate": 0.0001, "loss": 0.3376, "step": 400 }, { "epoch": 2.67, "learning_rate": 0.0001, "loss": 0.3047, "step": 500 }, { "epoch": 3.0, "eval_accuracy": 0.7891450980392157, "eval_loss": 0.39341700077056885, "eval_runtime": 7.1191, "eval_samples_per_second": 70.233, "eval_steps_per_second": 8.849, "step": 562 }, { "epoch": 3.2, "learning_rate": 0.0001, "loss": 0.2848, "step": 600 }, { "epoch": 3.73, "learning_rate": 0.0001, "loss": 0.2469, "step": 700 }, { "epoch": 4.0, "eval_accuracy": 0.7846274509803921, "eval_loss": 0.42259183526039124, "eval_runtime": 6.9941, "eval_samples_per_second": 71.489, "eval_steps_per_second": 9.008, "step": 750 }, { "epoch": 4.27, "learning_rate": 0.0001, "loss": 0.2215, "step": 800 }, { "epoch": 4.8, "learning_rate": 0.0001, "loss": 0.2022, "step": 900 }, { "epoch": 5.0, "eval_accuracy": 0.7803058823529412, "eval_loss": 0.4660645127296448, "eval_runtime": 6.6511, "eval_samples_per_second": 75.175, "eval_steps_per_second": 9.472, "step": 937 }, { "epoch": 5.33, "learning_rate": 0.0001, "loss": 0.1783, "step": 1000 }, { "epoch": 5.87, "learning_rate": 0.0001, "loss": 0.1681, "step": 1100 }, { "epoch": 6.0, "eval_accuracy": 0.7761333333333333, "eval_loss": 0.5122596621513367, "eval_runtime": 7.6517, "eval_samples_per_second": 65.345, "eval_steps_per_second": 8.233, "step": 1125 }, { "epoch": 6.4, "learning_rate": 0.0001, "loss": 0.1437, "step": 1200 }, { "epoch": 6.93, "learning_rate": 0.0001, "loss": 0.1404, "step": 1300 }, { "epoch": 7.0, "eval_accuracy": 0.7720627450980392, "eval_loss": 0.5730750560760498, "eval_runtime": 7.0342, "eval_samples_per_second": 71.081, "eval_steps_per_second": 8.956, "step": 1312 }, { "epoch": 7.47, "learning_rate": 0.0001, "loss": 0.1179, "step": 1400 }, { "epoch": 8.0, "learning_rate": 0.0001, "loss": 0.1197, "step": 1500 }, { "epoch": 8.0, "eval_accuracy": 0.770078431372549, "eval_loss": 0.6074877977371216, "eval_runtime": 6.9934, "eval_samples_per_second": 71.496, "eval_steps_per_second": 9.008, "step": 1500 }, { "epoch": 8.53, "learning_rate": 0.0001, "loss": 0.1, "step": 1600 }, { "epoch": 9.0, "eval_accuracy": 0.7688470588235294, "eval_loss": 0.631741464138031, "eval_runtime": 7.3864, "eval_samples_per_second": 67.692, "eval_steps_per_second": 8.529, "step": 1687 }, { "epoch": 9.07, "learning_rate": 0.0001, "loss": 0.1027, "step": 1700 }, { "epoch": 9.6, "learning_rate": 0.0001, "loss": 0.089, "step": 1800 }, { "epoch": 10.0, "eval_accuracy": 0.7663764705882353, "eval_loss": 0.6717921495437622, "eval_runtime": 6.6214, "eval_samples_per_second": 75.513, "eval_steps_per_second": 9.515, "step": 1875 }, { "epoch": 10.13, "learning_rate": 0.0001, "loss": 0.0911, "step": 1900 }, { "epoch": 10.67, "learning_rate": 0.0001, "loss": 0.0837, "step": 2000 }, { "epoch": 11.0, "eval_accuracy": 0.7653333333333333, "eval_loss": 0.6921772956848145, "eval_runtime": 6.6402, "eval_samples_per_second": 75.299, "eval_steps_per_second": 9.488, "step": 2062 }, { "epoch": 11.2, "learning_rate": 0.0001, "loss": 0.0823, "step": 2100 }, { "epoch": 11.73, "learning_rate": 0.0001, "loss": 0.0788, "step": 2200 }, { "epoch": 12.0, "eval_accuracy": 0.7631764705882353, "eval_loss": 0.7253576517105103, "eval_runtime": 7.6814, "eval_samples_per_second": 65.092, "eval_steps_per_second": 8.202, "step": 2250 }, { "epoch": 12.27, "learning_rate": 0.0001, "loss": 0.078, "step": 2300 }, { "epoch": 12.8, "learning_rate": 0.0001, "loss": 0.0761, "step": 2400 }, { "epoch": 13.0, "eval_accuracy": 0.7628627450980392, "eval_loss": 0.7256088256835938, "eval_runtime": 7.6806, "eval_samples_per_second": 65.099, "eval_steps_per_second": 8.202, "step": 2437 }, { "epoch": 13.33, "learning_rate": 0.0001, "loss": 0.0743, "step": 2500 }, { "epoch": 13.87, "learning_rate": 0.0001, "loss": 0.0749, "step": 2600 }, { "epoch": 14.0, "eval_accuracy": 0.7620627450980392, "eval_loss": 0.7533740401268005, "eval_runtime": 7.644, "eval_samples_per_second": 65.41, "eval_steps_per_second": 8.242, "step": 2625 }, { "epoch": 14.4, "learning_rate": 0.0001, "loss": 0.0719, "step": 2700 }, { "epoch": 14.93, "learning_rate": 0.0001, "loss": 0.0741, "step": 2800 }, { "epoch": 15.0, "eval_accuracy": 0.7620078431372549, "eval_loss": 0.7529163360595703, "eval_runtime": 7.4208, "eval_samples_per_second": 67.378, "eval_steps_per_second": 8.49, "step": 2812 }, { "epoch": 15.47, "learning_rate": 0.0001, "loss": 0.0704, "step": 2900 }, { "epoch": 16.0, "learning_rate": 0.0001, "loss": 0.0726, "step": 3000 }, { "epoch": 16.0, "eval_accuracy": 0.7610745098039216, "eval_loss": 0.7678206562995911, "eval_runtime": 6.6228, "eval_samples_per_second": 75.497, "eval_steps_per_second": 9.513, "step": 3000 }, { "epoch": 16.53, "learning_rate": 0.0001, "loss": 0.0687, "step": 3100 }, { "epoch": 17.0, "eval_accuracy": 0.7610274509803922, "eval_loss": 0.7728469371795654, "eval_runtime": 7.7738, "eval_samples_per_second": 64.319, "eval_steps_per_second": 8.104, "step": 3187 }, { "epoch": 17.07, "learning_rate": 0.0001, "loss": 0.0706, "step": 3200 }, { "epoch": 17.6, "learning_rate": 0.0001, "loss": 0.0682, "step": 3300 }, { "epoch": 18.0, "eval_accuracy": 0.7603058823529412, "eval_loss": 0.7807328104972839, "eval_runtime": 7.6425, "eval_samples_per_second": 65.424, "eval_steps_per_second": 8.243, "step": 3375 }, { "epoch": 18.13, "learning_rate": 0.0001, "loss": 0.069, "step": 3400 }, { "epoch": 18.67, "learning_rate": 0.0001, "loss": 0.0682, "step": 3500 }, { "epoch": 19.0, "eval_accuracy": 0.7609882352941176, "eval_loss": 0.7872016429901123, "eval_runtime": 7.0778, "eval_samples_per_second": 70.644, "eval_steps_per_second": 8.901, "step": 3562 }, { "epoch": 19.2, "learning_rate": 0.0001, "loss": 0.0682, "step": 3600 }, { "epoch": 19.73, "learning_rate": 0.0001, "loss": 0.0682, "step": 3700 }, { "epoch": 19.95, "eval_accuracy": 0.7597490196078431, "eval_loss": 0.8054783940315247, "eval_runtime": 7.0912, "eval_samples_per_second": 70.51, "eval_steps_per_second": 8.884, "step": 3740 }, { "epoch": 19.95, "step": 3740, "total_flos": 8.667972359988183e+17, "train_loss": 0.1472176224152672, "train_runtime": 2965.7973, "train_samples_per_second": 40.461, "train_steps_per_second": 1.261 } ], "logging_steps": 100, "max_steps": 3740, "num_train_epochs": 20, "save_steps": 500, "total_flos": 8.667972359988183e+17, "trial_name": null, "trial_params": null }