|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 19.946666666666665, |
|
"eval_steps": 500, |
|
"global_step": 3740, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5411, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7903686274509804, |
|
"eval_loss": 0.39375776052474976, |
|
"eval_runtime": 7.7091, |
|
"eval_samples_per_second": 64.858, |
|
"eval_steps_per_second": 8.172, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4081, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0001, |
|
"loss": 0.362, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7917960784313726, |
|
"eval_loss": 0.3804325461387634, |
|
"eval_runtime": 7.4651, |
|
"eval_samples_per_second": 66.979, |
|
"eval_steps_per_second": 8.439, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3376, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3047, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7891450980392157, |
|
"eval_loss": 0.39341700077056885, |
|
"eval_runtime": 7.1191, |
|
"eval_samples_per_second": 70.233, |
|
"eval_steps_per_second": 8.849, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2848, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2469, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7846274509803921, |
|
"eval_loss": 0.42259183526039124, |
|
"eval_runtime": 6.9941, |
|
"eval_samples_per_second": 71.489, |
|
"eval_steps_per_second": 9.008, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2215, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2022, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7803058823529412, |
|
"eval_loss": 0.4660645127296448, |
|
"eval_runtime": 6.6511, |
|
"eval_samples_per_second": 75.175, |
|
"eval_steps_per_second": 9.472, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1783, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1681, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7761333333333333, |
|
"eval_loss": 0.5122596621513367, |
|
"eval_runtime": 7.6517, |
|
"eval_samples_per_second": 65.345, |
|
"eval_steps_per_second": 8.233, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1437, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1404, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7720627450980392, |
|
"eval_loss": 0.5730750560760498, |
|
"eval_runtime": 7.0342, |
|
"eval_samples_per_second": 71.081, |
|
"eval_steps_per_second": 8.956, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1179, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1197, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.770078431372549, |
|
"eval_loss": 0.6074877977371216, |
|
"eval_runtime": 6.9934, |
|
"eval_samples_per_second": 71.496, |
|
"eval_steps_per_second": 9.008, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7688470588235294, |
|
"eval_loss": 0.631741464138031, |
|
"eval_runtime": 7.3864, |
|
"eval_samples_per_second": 67.692, |
|
"eval_steps_per_second": 8.529, |
|
"step": 1687 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1027, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 0.0001, |
|
"loss": 0.089, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7663764705882353, |
|
"eval_loss": 0.6717921495437622, |
|
"eval_runtime": 6.6214, |
|
"eval_samples_per_second": 75.513, |
|
"eval_steps_per_second": 9.515, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 10.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0911, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 10.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0837, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.7653333333333333, |
|
"eval_loss": 0.6921772956848145, |
|
"eval_runtime": 6.6402, |
|
"eval_samples_per_second": 75.299, |
|
"eval_steps_per_second": 9.488, |
|
"step": 2062 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0823, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 11.73, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0788, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7631764705882353, |
|
"eval_loss": 0.7253576517105103, |
|
"eval_runtime": 7.6814, |
|
"eval_samples_per_second": 65.092, |
|
"eval_steps_per_second": 8.202, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 12.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.078, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0761, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.7628627450980392, |
|
"eval_loss": 0.7256088256835938, |
|
"eval_runtime": 7.6806, |
|
"eval_samples_per_second": 65.099, |
|
"eval_steps_per_second": 8.202, |
|
"step": 2437 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0743, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 13.87, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0749, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.7620627450980392, |
|
"eval_loss": 0.7533740401268005, |
|
"eval_runtime": 7.644, |
|
"eval_samples_per_second": 65.41, |
|
"eval_steps_per_second": 8.242, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0719, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 14.93, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0741, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.7620078431372549, |
|
"eval_loss": 0.7529163360595703, |
|
"eval_runtime": 7.4208, |
|
"eval_samples_per_second": 67.378, |
|
"eval_steps_per_second": 8.49, |
|
"step": 2812 |
|
}, |
|
{ |
|
"epoch": 15.47, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0704, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0726, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.7610745098039216, |
|
"eval_loss": 0.7678206562995911, |
|
"eval_runtime": 6.6228, |
|
"eval_samples_per_second": 75.497, |
|
"eval_steps_per_second": 9.513, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 16.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0687, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.7610274509803922, |
|
"eval_loss": 0.7728469371795654, |
|
"eval_runtime": 7.7738, |
|
"eval_samples_per_second": 64.319, |
|
"eval_steps_per_second": 8.104, |
|
"step": 3187 |
|
}, |
|
{ |
|
"epoch": 17.07, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0706, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 17.6, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0682, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.7603058823529412, |
|
"eval_loss": 0.7807328104972839, |
|
"eval_runtime": 7.6425, |
|
"eval_samples_per_second": 65.424, |
|
"eval_steps_per_second": 8.243, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 18.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.069, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 18.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0682, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.7609882352941176, |
|
"eval_loss": 0.7872016429901123, |
|
"eval_runtime": 7.0778, |
|
"eval_samples_per_second": 70.644, |
|
"eval_steps_per_second": 8.901, |
|
"step": 3562 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0682, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 19.73, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0682, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 19.95, |
|
"eval_accuracy": 0.7597490196078431, |
|
"eval_loss": 0.8054783940315247, |
|
"eval_runtime": 7.0912, |
|
"eval_samples_per_second": 70.51, |
|
"eval_steps_per_second": 8.884, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 19.95, |
|
"step": 3740, |
|
"total_flos": 8.667972359988183e+17, |
|
"train_loss": 0.1472176224152672, |
|
"train_runtime": 2965.7973, |
|
"train_samples_per_second": 40.461, |
|
"train_steps_per_second": 1.261 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 3740, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 8.667972359988183e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|