nllb-200-distilled-600M-th / trainer_state.json
Xmm's picture
Upload 13 files
05f8090
raw
history blame
1.62 kB
{
"best_metric": 0.3143588602542877,
"best_model_checkpoint": "./checkpoint-th/checkpoint-1500",
"epoch": 0.2763894327106894,
"eval_steps": 500,
"global_step": 1500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.09,
"learning_rate": 1.963147226828819e-05,
"loss": 4.6049,
"step": 500
},
{
"epoch": 0.09,
"eval_bleu": 24.7133,
"eval_gen_len": 33.9437,
"eval_loss": 1.1478698253631592,
"eval_runtime": 214.6895,
"eval_samples_per_second": 4.714,
"eval_steps_per_second": 1.178,
"step": 500
},
{
"epoch": 0.18,
"learning_rate": 1.9262944536576377e-05,
"loss": 0.3574,
"step": 1000
},
{
"epoch": 0.18,
"eval_bleu": 25.1845,
"eval_gen_len": 34.0662,
"eval_loss": 0.31802815198898315,
"eval_runtime": 214.1787,
"eval_samples_per_second": 4.725,
"eval_steps_per_second": 1.181,
"step": 1000
},
{
"epoch": 0.28,
"learning_rate": 1.8894416804864568e-05,
"loss": 0.1925,
"step": 1500
},
{
"epoch": 0.28,
"eval_bleu": 25.5018,
"eval_gen_len": 33.7075,
"eval_loss": 0.3143588602542877,
"eval_runtime": 211.869,
"eval_samples_per_second": 4.777,
"eval_steps_per_second": 1.194,
"step": 1500
}
],
"logging_steps": 500,
"max_steps": 27135,
"num_train_epochs": 5,
"save_steps": 500,
"total_flos": 1.04021020901376e+17,
"trial_name": null,
"trial_params": null
}