vietcuna-3b_1024 / checkpoint-600 /trainer_state.json
duyvt6663's picture
Training in progress, step 600, checkpoint
7f21ff6
raw
history blame
No virus
4.92 kB
{
"best_metric": 0.665068507194519,
"best_model_checkpoint": "output/checkpoint-50",
"epoch": 2.455242966751918,
"eval_steps": 50,
"global_step": 600,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 7.575757575757576e-07,
"loss": 0.4968,
"step": 1
},
{
"epoch": 0.2,
"learning_rate": 3.484848484848485e-05,
"loss": 0.5254,
"step": 50
},
{
"epoch": 0.2,
"eval_accuracy": 0.6507462686567164,
"eval_loss": 0.665068507194519,
"eval_runtime": 61.5592,
"eval_samples_per_second": 5.442,
"eval_steps_per_second": 1.365,
"step": 50
},
{
"epoch": 0.41,
"learning_rate": 7.272727272727273e-05,
"loss": 0.4971,
"step": 100
},
{
"epoch": 0.41,
"eval_accuracy": 0.6656716417910448,
"eval_loss": 0.8002150058746338,
"eval_runtime": 61.211,
"eval_samples_per_second": 5.473,
"eval_steps_per_second": 1.372,
"step": 100
},
{
"epoch": 0.61,
"learning_rate": 9.783333333333334e-05,
"loss": 0.5039,
"step": 150
},
{
"epoch": 0.61,
"eval_accuracy": 0.6865671641791045,
"eval_loss": 0.7404947280883789,
"eval_runtime": 61.4487,
"eval_samples_per_second": 5.452,
"eval_steps_per_second": 1.367,
"step": 150
},
{
"epoch": 0.82,
"learning_rate": 8.950000000000001e-05,
"loss": 0.4944,
"step": 200
},
{
"epoch": 0.82,
"eval_accuracy": 0.6238805970149254,
"eval_loss": 0.9986834526062012,
"eval_runtime": 60.5135,
"eval_samples_per_second": 5.536,
"eval_steps_per_second": 1.388,
"step": 200
},
{
"epoch": 1.02,
"learning_rate": 8.116666666666667e-05,
"loss": 0.508,
"step": 250
},
{
"epoch": 1.02,
"eval_accuracy": 0.6268656716417911,
"eval_loss": 0.9990125894546509,
"eval_runtime": 56.4381,
"eval_samples_per_second": 5.936,
"eval_steps_per_second": 1.488,
"step": 250
},
{
"epoch": 1.23,
"learning_rate": 7.283333333333335e-05,
"loss": 0.4904,
"step": 300
},
{
"epoch": 1.23,
"eval_accuracy": 0.6119402985074627,
"eval_loss": 1.0483125448226929,
"eval_runtime": 56.4117,
"eval_samples_per_second": 5.938,
"eval_steps_per_second": 1.489,
"step": 300
},
{
"epoch": 1.43,
"learning_rate": 6.450000000000001e-05,
"loss": 0.4694,
"step": 350
},
{
"epoch": 1.43,
"eval_accuracy": 0.6417910447761194,
"eval_loss": 0.9735069274902344,
"eval_runtime": 56.422,
"eval_samples_per_second": 5.937,
"eval_steps_per_second": 1.489,
"step": 350
},
{
"epoch": 1.64,
"learning_rate": 5.6166666666666665e-05,
"loss": 0.4937,
"step": 400
},
{
"epoch": 1.64,
"eval_accuracy": 0.5880597014925373,
"eval_loss": 1.1042370796203613,
"eval_runtime": 56.4248,
"eval_samples_per_second": 5.937,
"eval_steps_per_second": 1.489,
"step": 400
},
{
"epoch": 1.84,
"learning_rate": 4.7833333333333335e-05,
"loss": 0.4877,
"step": 450
},
{
"epoch": 1.84,
"eval_accuracy": 0.6477611940298508,
"eval_loss": 0.9591583609580994,
"eval_runtime": 56.4028,
"eval_samples_per_second": 5.939,
"eval_steps_per_second": 1.489,
"step": 450
},
{
"epoch": 2.05,
"learning_rate": 3.9500000000000005e-05,
"loss": 0.482,
"step": 500
},
{
"epoch": 2.05,
"eval_accuracy": 0.6716417910447762,
"eval_loss": 0.89241623878479,
"eval_runtime": 56.408,
"eval_samples_per_second": 5.939,
"eval_steps_per_second": 1.489,
"step": 500
},
{
"epoch": 2.25,
"learning_rate": 3.116666666666667e-05,
"loss": 0.4688,
"step": 550
},
{
"epoch": 2.25,
"eval_accuracy": 0.6447761194029851,
"eval_loss": 0.9692543148994446,
"eval_runtime": 56.4147,
"eval_samples_per_second": 5.938,
"eval_steps_per_second": 1.489,
"step": 550
},
{
"epoch": 2.46,
"learning_rate": 2.2833333333333334e-05,
"loss": 0.4818,
"step": 600
},
{
"epoch": 2.46,
"eval_accuracy": 0.6417910447761194,
"eval_loss": 0.981917679309845,
"eval_runtime": 56.4148,
"eval_samples_per_second": 5.938,
"eval_steps_per_second": 1.489,
"step": 600
}
],
"logging_steps": 50,
"max_steps": 732,
"num_train_epochs": 3,
"save_steps": 50,
"total_flos": 2.5561235391596544e+17,
"trial_name": null,
"trial_params": null
}