|
{ |
|
"best_metric": 0.665068507194519, |
|
"best_model_checkpoint": "output/checkpoint-50", |
|
"epoch": 2.455242966751918, |
|
"eval_steps": 50, |
|
"global_step": 600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 7.575757575757576e-07, |
|
"loss": 0.4968, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.484848484848485e-05, |
|
"loss": 0.5254, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.6507462686567164, |
|
"eval_loss": 0.665068507194519, |
|
"eval_runtime": 61.5592, |
|
"eval_samples_per_second": 5.442, |
|
"eval_steps_per_second": 1.365, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 7.272727272727273e-05, |
|
"loss": 0.4971, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_accuracy": 0.6656716417910448, |
|
"eval_loss": 0.8002150058746338, |
|
"eval_runtime": 61.211, |
|
"eval_samples_per_second": 5.473, |
|
"eval_steps_per_second": 1.372, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 9.783333333333334e-05, |
|
"loss": 0.5039, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_accuracy": 0.6865671641791045, |
|
"eval_loss": 0.7404947280883789, |
|
"eval_runtime": 61.4487, |
|
"eval_samples_per_second": 5.452, |
|
"eval_steps_per_second": 1.367, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.950000000000001e-05, |
|
"loss": 0.4944, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_accuracy": 0.6238805970149254, |
|
"eval_loss": 0.9986834526062012, |
|
"eval_runtime": 60.5135, |
|
"eval_samples_per_second": 5.536, |
|
"eval_steps_per_second": 1.388, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 8.116666666666667e-05, |
|
"loss": 0.508, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_accuracy": 0.6268656716417911, |
|
"eval_loss": 0.9990125894546509, |
|
"eval_runtime": 56.4381, |
|
"eval_samples_per_second": 5.936, |
|
"eval_steps_per_second": 1.488, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 7.283333333333335e-05, |
|
"loss": 0.4904, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"eval_accuracy": 0.6119402985074627, |
|
"eval_loss": 1.0483125448226929, |
|
"eval_runtime": 56.4117, |
|
"eval_samples_per_second": 5.938, |
|
"eval_steps_per_second": 1.489, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 6.450000000000001e-05, |
|
"loss": 0.4694, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"eval_accuracy": 0.6417910447761194, |
|
"eval_loss": 0.9735069274902344, |
|
"eval_runtime": 56.422, |
|
"eval_samples_per_second": 5.937, |
|
"eval_steps_per_second": 1.489, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 5.6166666666666665e-05, |
|
"loss": 0.4937, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_accuracy": 0.5880597014925373, |
|
"eval_loss": 1.1042370796203613, |
|
"eval_runtime": 56.4248, |
|
"eval_samples_per_second": 5.937, |
|
"eval_steps_per_second": 1.489, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 4.7833333333333335e-05, |
|
"loss": 0.4877, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"eval_accuracy": 0.6477611940298508, |
|
"eval_loss": 0.9591583609580994, |
|
"eval_runtime": 56.4028, |
|
"eval_samples_per_second": 5.939, |
|
"eval_steps_per_second": 1.489, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 3.9500000000000005e-05, |
|
"loss": 0.482, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"eval_accuracy": 0.6716417910447762, |
|
"eval_loss": 0.89241623878479, |
|
"eval_runtime": 56.408, |
|
"eval_samples_per_second": 5.939, |
|
"eval_steps_per_second": 1.489, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 3.116666666666667e-05, |
|
"loss": 0.4688, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"eval_accuracy": 0.6447761194029851, |
|
"eval_loss": 0.9692543148994446, |
|
"eval_runtime": 56.4147, |
|
"eval_samples_per_second": 5.938, |
|
"eval_steps_per_second": 1.489, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 2.2833333333333334e-05, |
|
"loss": 0.4818, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_accuracy": 0.6417910447761194, |
|
"eval_loss": 0.981917679309845, |
|
"eval_runtime": 56.4148, |
|
"eval_samples_per_second": 5.938, |
|
"eval_steps_per_second": 1.489, |
|
"step": 600 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 732, |
|
"num_train_epochs": 3, |
|
"save_steps": 50, |
|
"total_flos": 2.5561235391596544e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|