|
{ |
|
"best_metric": 0.27745380997657776, |
|
"best_model_checkpoint": "uk-mt5-base-gec/checkpoint-1350", |
|
"epoch": 3.0177409816676524, |
|
"global_step": 2550, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.000970414201183432, |
|
"loss": 2.8559, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_google_bleu": 0.4161949252310698, |
|
"eval_loss": 0.3385647237300873, |
|
"eval_runtime": 574.9325, |
|
"eval_samples_per_second": 5.227, |
|
"eval_steps_per_second": 0.327, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.000940828402366864, |
|
"loss": 0.353, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_google_bleu": 0.4197968462318859, |
|
"eval_loss": 0.30946752429008484, |
|
"eval_runtime": 574.235, |
|
"eval_samples_per_second": 5.233, |
|
"eval_steps_per_second": 0.327, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0009112426035502958, |
|
"loss": 0.3433, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_google_bleu": 0.42082026039416087, |
|
"eval_loss": 0.30230990052223206, |
|
"eval_runtime": 573.7714, |
|
"eval_samples_per_second": 5.237, |
|
"eval_steps_per_second": 0.328, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0008816568047337278, |
|
"loss": 0.3248, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_google_bleu": 0.4194576852971206, |
|
"eval_loss": 0.2984682619571686, |
|
"eval_runtime": 574.3927, |
|
"eval_samples_per_second": 5.232, |
|
"eval_steps_per_second": 0.327, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.0008520710059171598, |
|
"loss": 0.3046, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_google_bleu": 0.4217920913982863, |
|
"eval_loss": 0.28489378094673157, |
|
"eval_runtime": 574.7617, |
|
"eval_samples_per_second": 5.228, |
|
"eval_steps_per_second": 0.327, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.0008224852071005917, |
|
"loss": 0.2625, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_google_bleu": 0.4213197969543147, |
|
"eval_loss": 0.29553136229515076, |
|
"eval_runtime": 573.965, |
|
"eval_samples_per_second": 5.236, |
|
"eval_steps_per_second": 0.328, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.0007928994082840238, |
|
"loss": 0.2127, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_google_bleu": 0.4211682670038433, |
|
"eval_loss": 0.30292925238609314, |
|
"eval_runtime": 574.6793, |
|
"eval_samples_per_second": 5.229, |
|
"eval_steps_per_second": 0.327, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.0007633136094674556, |
|
"loss": 0.224, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_google_bleu": 0.4224696723929531, |
|
"eval_loss": 0.3068563640117645, |
|
"eval_runtime": 574.5278, |
|
"eval_samples_per_second": 5.23, |
|
"eval_steps_per_second": 0.327, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0007337278106508876, |
|
"loss": 0.2332, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_google_bleu": 0.41897146578336036, |
|
"eval_loss": 0.27745380997657776, |
|
"eval_runtime": 575.5901, |
|
"eval_samples_per_second": 5.221, |
|
"eval_steps_per_second": 0.327, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.0007041420118343196, |
|
"loss": 0.238, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"eval_google_bleu": 0.4164072628882445, |
|
"eval_loss": 0.2903579771518707, |
|
"eval_runtime": 573.985, |
|
"eval_samples_per_second": 5.235, |
|
"eval_steps_per_second": 0.328, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.0006745562130177515, |
|
"loss": 0.2297, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_google_bleu": 0.41891172732452214, |
|
"eval_loss": 0.2825988233089447, |
|
"eval_runtime": 574.7646, |
|
"eval_samples_per_second": 5.228, |
|
"eval_steps_per_second": 0.327, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 0.0006449704142011834, |
|
"loss": 0.1649, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"eval_google_bleu": 0.41988701131139533, |
|
"eval_loss": 0.304867148399353, |
|
"eval_runtime": 575.4878, |
|
"eval_samples_per_second": 5.222, |
|
"eval_steps_per_second": 0.327, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 0.0006153846153846154, |
|
"loss": 0.1458, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"eval_google_bleu": 0.4201902185823143, |
|
"eval_loss": 0.3138478994369507, |
|
"eval_runtime": 575.2695, |
|
"eval_samples_per_second": 5.224, |
|
"eval_steps_per_second": 0.327, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 0.0005857988165680473, |
|
"loss": 0.1564, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_google_bleu": 0.415792735992587, |
|
"eval_loss": 0.3027360141277313, |
|
"eval_runtime": 566.4822, |
|
"eval_samples_per_second": 5.305, |
|
"eval_steps_per_second": 0.332, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 0.0005562130177514793, |
|
"loss": 0.1572, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"eval_google_bleu": 0.4212085345156907, |
|
"eval_loss": 0.3020596504211426, |
|
"eval_runtime": 574.5482, |
|
"eval_samples_per_second": 5.23, |
|
"eval_steps_per_second": 0.327, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 0.0005266272189349113, |
|
"loss": 0.159, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"eval_google_bleu": 0.41866493031642477, |
|
"eval_loss": 0.29025933146476746, |
|
"eval_runtime": 574.442, |
|
"eval_samples_per_second": 5.231, |
|
"eval_steps_per_second": 0.327, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 0.0004970414201183431, |
|
"loss": 0.159, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"eval_google_bleu": 0.4191764521684672, |
|
"eval_loss": 0.3248673677444458, |
|
"eval_runtime": 573.9254, |
|
"eval_samples_per_second": 5.236, |
|
"eval_steps_per_second": 0.328, |
|
"step": 2550 |
|
} |
|
], |
|
"max_steps": 5070, |
|
"num_train_epochs": 6, |
|
"total_flos": 2.984077609323725e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|