problem2-test / trainer_state.json
holylovenia's picture
Training in progress, epoch 0
0785668
raw
history blame
2.18 kB
{
"best_metric": 0.5901639344262295,
"best_model_checkpoint": "problem2-test/checkpoint-21",
"epoch": 4.903225806451613,
"global_step": 35,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.9,
"eval_accuracy": 0.0,
"eval_loss": 6.274798393249512,
"eval_runtime": 1.2157,
"eval_samples_per_second": 50.175,
"eval_steps_per_second": 6.58,
"step": 7
},
{
"epoch": 1.39,
"learning_rate": 4.032258064516129e-05,
"loss": 7.4671,
"step": 10
},
{
"epoch": 1.9,
"eval_accuracy": 0.5737704918032787,
"eval_loss": 5.177652835845947,
"eval_runtime": 1.19,
"eval_samples_per_second": 51.26,
"eval_steps_per_second": 6.723,
"step": 14
},
{
"epoch": 2.77,
"learning_rate": 2.4193548387096777e-05,
"loss": 5.3033,
"step": 20
},
{
"epoch": 2.9,
"eval_accuracy": 0.5901639344262295,
"eval_loss": 4.461605072021484,
"eval_runtime": 1.0892,
"eval_samples_per_second": 56.006,
"eval_steps_per_second": 7.345,
"step": 21
},
{
"epoch": 3.9,
"eval_accuracy": 0.5245901639344263,
"eval_loss": 5.181081295013428,
"eval_runtime": 1.1367,
"eval_samples_per_second": 53.664,
"eval_steps_per_second": 7.038,
"step": 28
},
{
"epoch": 4.26,
"learning_rate": 8.064516129032258e-06,
"loss": 4.1105,
"step": 30
},
{
"epoch": 4.9,
"eval_accuracy": 0.5573770491803278,
"eval_loss": 4.228121757507324,
"eval_runtime": 1.1738,
"eval_samples_per_second": 51.966,
"eval_steps_per_second": 6.815,
"step": 35
},
{
"epoch": 4.9,
"step": 35,
"total_flos": 2.76987521728512e+16,
"train_loss": 5.3129854474748885,
"train_runtime": 102.3477,
"train_samples_per_second": 11.92,
"train_steps_per_second": 0.342
}
],
"max_steps": 35,
"num_train_epochs": 5,
"total_flos": 2.76987521728512e+16,
"trial_name": null,
"trial_params": null
}