|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 5000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0003, |
|
"loss": 1.9296, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0003, |
|
"loss": 1.7554, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6093164556962025, |
|
"eval_loss": 1.7939746379852295, |
|
"eval_runtime": 4.7481, |
|
"eval_samples_per_second": 105.304, |
|
"eval_steps_per_second": 13.268, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.0003, |
|
"loss": 1.6394, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0003, |
|
"loss": 1.5315, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.0003, |
|
"loss": 1.5248, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6085063291139241, |
|
"eval_loss": 1.8273799419403076, |
|
"eval_runtime": 4.904, |
|
"eval_samples_per_second": 101.957, |
|
"eval_steps_per_second": 12.847, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.0003, |
|
"loss": 1.1692, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 0.0003, |
|
"loss": 1.2054, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6027088607594937, |
|
"eval_loss": 1.9717934131622314, |
|
"eval_runtime": 4.6675, |
|
"eval_samples_per_second": 107.124, |
|
"eval_steps_per_second": 13.498, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 0.0003, |
|
"loss": 1.0252, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 0.0003, |
|
"loss": 0.8608, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.0003, |
|
"loss": 0.8989, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.5986835443037974, |
|
"eval_loss": 2.151914596557617, |
|
"eval_runtime": 5.4432, |
|
"eval_samples_per_second": 91.857, |
|
"eval_steps_per_second": 11.574, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 0.0003, |
|
"loss": 0.5842, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 0.0003, |
|
"loss": 0.6306, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.5960506329113924, |
|
"eval_loss": 2.329284191131592, |
|
"eval_runtime": 4.4146, |
|
"eval_samples_per_second": 113.261, |
|
"eval_steps_per_second": 14.271, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 0.0003, |
|
"loss": 0.5368, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 0.0003, |
|
"loss": 0.4495, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 0.0003, |
|
"loss": 0.4712, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.5935696202531645, |
|
"eval_loss": 2.5598793029785156, |
|
"eval_runtime": 4.5669, |
|
"eval_samples_per_second": 109.484, |
|
"eval_steps_per_second": 13.795, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3625, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3797, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.5935696202531645, |
|
"eval_loss": 2.732860803604126, |
|
"eval_runtime": 4.5141, |
|
"eval_samples_per_second": 110.763, |
|
"eval_steps_per_second": 13.956, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3582, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3399, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3527, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.5912911392405064, |
|
"eval_loss": 2.8185083866119385, |
|
"eval_runtime": 4.7158, |
|
"eval_samples_per_second": 106.026, |
|
"eval_steps_per_second": 13.359, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3202, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3314, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.592, |
|
"eval_loss": 2.824962854385376, |
|
"eval_runtime": 4.8288, |
|
"eval_samples_per_second": 103.546, |
|
"eval_steps_per_second": 13.047, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3174, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3157, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3265, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.5911392405063292, |
|
"eval_loss": 2.9242382049560547, |
|
"eval_runtime": 4.7138, |
|
"eval_samples_per_second": 106.072, |
|
"eval_steps_per_second": 13.365, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"learning_rate": 0.0003, |
|
"loss": 0.2989, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 10.8, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3148, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.591240506329114, |
|
"eval_loss": 3.0012593269348145, |
|
"eval_runtime": 5.1318, |
|
"eval_samples_per_second": 97.431, |
|
"eval_steps_per_second": 12.276, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3028, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 11.6, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3047, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3184, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.590632911392405, |
|
"eval_loss": 2.931525707244873, |
|
"eval_runtime": 4.7893, |
|
"eval_samples_per_second": 104.399, |
|
"eval_steps_per_second": 13.154, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 12.4, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3101, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.5896962025316456, |
|
"eval_loss": 2.9116382598876953, |
|
"eval_runtime": 4.5748, |
|
"eval_samples_per_second": 109.295, |
|
"eval_steps_per_second": 13.771, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 13.2, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3063, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 13.6, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3041, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3164, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.5902025316455696, |
|
"eval_loss": 2.920793056488037, |
|
"eval_runtime": 4.718, |
|
"eval_samples_per_second": 105.977, |
|
"eval_steps_per_second": 13.353, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"learning_rate": 0.0003, |
|
"loss": 0.2957, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 14.8, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3074, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.5908607594936709, |
|
"eval_loss": 2.9385440349578857, |
|
"eval_runtime": 5.108, |
|
"eval_samples_per_second": 97.887, |
|
"eval_steps_per_second": 12.334, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 15.2, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3013, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 15.6, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3002, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3107, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.5891645569620253, |
|
"eval_loss": 2.9519243240356445, |
|
"eval_runtime": 4.8208, |
|
"eval_samples_per_second": 103.717, |
|
"eval_steps_per_second": 13.068, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 16.4, |
|
"learning_rate": 0.0003, |
|
"loss": 0.2892, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 16.8, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3054, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.5898227848101266, |
|
"eval_loss": 3.010847568511963, |
|
"eval_runtime": 5.1511, |
|
"eval_samples_per_second": 97.066, |
|
"eval_steps_per_second": 12.23, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 17.2, |
|
"learning_rate": 0.0003, |
|
"loss": 0.2959, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 17.6, |
|
"learning_rate": 0.0003, |
|
"loss": 0.297, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 0.0003, |
|
"loss": 0.309, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.5903544303797469, |
|
"eval_loss": 3.003683567047119, |
|
"eval_runtime": 4.8904, |
|
"eval_samples_per_second": 102.241, |
|
"eval_steps_per_second": 12.882, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 18.4, |
|
"learning_rate": 0.0003, |
|
"loss": 0.2883, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 18.8, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3005, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.5898481012658228, |
|
"eval_loss": 3.0279438495635986, |
|
"eval_runtime": 5.1333, |
|
"eval_samples_per_second": 97.403, |
|
"eval_steps_per_second": 12.273, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"learning_rate": 0.0003, |
|
"loss": 0.2959, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 19.6, |
|
"learning_rate": 0.0003, |
|
"loss": 0.2911, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3127, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.5883291139240506, |
|
"eval_loss": 2.9650285243988037, |
|
"eval_runtime": 4.8904, |
|
"eval_samples_per_second": 102.242, |
|
"eval_steps_per_second": 12.882, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 5000, |
|
"total_flos": 3.1967425075347456e+17, |
|
"train_loss": 0.5438678237915039, |
|
"train_runtime": 3497.065, |
|
"train_samples_per_second": 45.753, |
|
"train_steps_per_second": 1.43 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 5000, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 3.1967425075347456e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|