deberta-v3-large-cola / trainer_state.json
yevheniimaslov's picture
fine tuned model, 0.27 loss, 2 epochs, lr 8e-6
74381b3
{
"best_metric": 0.27896466851234436,
"best_model_checkpoint": "out/checkpoint-900",
"epoch": 0.8419083255378859,
"global_step": 900,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.09,
"eval_loss": 0.3386548161506653,
"eval_runtime": 3.4462,
"eval_samples_per_second": 152.924,
"eval_steps_per_second": 19.152,
"step": 100
},
{
"epoch": 0.19,
"eval_loss": 0.32248201966285706,
"eval_runtime": 3.2636,
"eval_samples_per_second": 161.477,
"eval_steps_per_second": 20.223,
"step": 200
},
{
"epoch": 0.28,
"eval_loss": 0.3344081938266754,
"eval_runtime": 3.3846,
"eval_samples_per_second": 155.706,
"eval_steps_per_second": 19.5,
"step": 300
},
{
"epoch": 0.37,
"eval_loss": 0.37741541862487793,
"eval_runtime": 3.4216,
"eval_samples_per_second": 154.02,
"eval_steps_per_second": 19.289,
"step": 400
},
{
"epoch": 0.47,
"learning_rate": 6.9799095266062985e-06,
"loss": 0.3934,
"step": 500
},
{
"epoch": 0.47,
"eval_loss": 0.41931670904159546,
"eval_runtime": 3.2614,
"eval_samples_per_second": 161.586,
"eval_steps_per_second": 20.237,
"step": 500
},
{
"epoch": 0.56,
"eval_loss": 0.3693557381629944,
"eval_runtime": 3.2679,
"eval_samples_per_second": 161.266,
"eval_steps_per_second": 20.196,
"step": 600
},
{
"epoch": 0.65,
"eval_loss": 0.34475621581077576,
"eval_runtime": 3.2639,
"eval_samples_per_second": 161.465,
"eval_steps_per_second": 20.221,
"step": 700
},
{
"epoch": 0.75,
"eval_loss": 0.34064173698425293,
"eval_runtime": 3.2698,
"eval_samples_per_second": 161.174,
"eval_steps_per_second": 20.185,
"step": 800
},
{
"epoch": 0.84,
"eval_loss": 0.27896466851234436,
"eval_runtime": 3.2598,
"eval_samples_per_second": 161.668,
"eval_steps_per_second": 20.247,
"step": 900
}
],
"max_steps": 2138,
"num_train_epochs": 2,
"total_flos": 148483879153632.0,
"trial_name": null,
"trial_params": null
}