|
{ |
|
"best_metric": 0.6615384615384615, |
|
"best_model_checkpoint": "./results/checkpoint-43", |
|
"epoch": 33.333333333333336, |
|
"eval_steps": 500, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5076923076923077, |
|
"eval_loss": 1.161078929901123, |
|
"eval_runtime": 0.2665, |
|
"eval_samples_per_second": 243.858, |
|
"eval_steps_per_second": 11.255, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5076923076923077, |
|
"eval_loss": 1.1464765071868896, |
|
"eval_runtime": 0.3457, |
|
"eval_samples_per_second": 188.041, |
|
"eval_steps_per_second": 8.679, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.5076923076923077, |
|
"eval_loss": 1.1057497262954712, |
|
"eval_runtime": 0.3211, |
|
"eval_samples_per_second": 202.425, |
|
"eval_steps_per_second": 9.343, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.5076923076923077, |
|
"eval_loss": 1.113421082496643, |
|
"eval_runtime": 0.2679, |
|
"eval_samples_per_second": 242.627, |
|
"eval_steps_per_second": 11.198, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.5076923076923077, |
|
"eval_loss": 1.0864346027374268, |
|
"eval_runtime": 0.3081, |
|
"eval_samples_per_second": 210.961, |
|
"eval_steps_per_second": 9.737, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.5384615384615384, |
|
"eval_loss": 0.9837771058082581, |
|
"eval_runtime": 0.3805, |
|
"eval_samples_per_second": 170.821, |
|
"eval_steps_per_second": 7.884, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 0.5404, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.5538461538461539, |
|
"eval_loss": 0.9654892683029175, |
|
"eval_runtime": 0.2642, |
|
"eval_samples_per_second": 246.029, |
|
"eval_steps_per_second": 11.355, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.5692307692307692, |
|
"eval_loss": 0.9630248546600342, |
|
"eval_runtime": 0.2668, |
|
"eval_samples_per_second": 243.599, |
|
"eval_steps_per_second": 11.243, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.5538461538461539, |
|
"eval_loss": 0.9631487131118774, |
|
"eval_runtime": 0.4451, |
|
"eval_samples_per_second": 146.049, |
|
"eval_steps_per_second": 6.741, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.5384615384615384, |
|
"eval_loss": 1.0176990032196045, |
|
"eval_runtime": 0.373, |
|
"eval_samples_per_second": 174.264, |
|
"eval_steps_per_second": 8.043, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.5538461538461539, |
|
"eval_loss": 1.0124413967132568, |
|
"eval_runtime": 0.2736, |
|
"eval_samples_per_second": 237.589, |
|
"eval_steps_per_second": 10.966, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.5692307692307692, |
|
"eval_loss": 0.9905163049697876, |
|
"eval_runtime": 0.3001, |
|
"eval_samples_per_second": 216.589, |
|
"eval_steps_per_second": 9.996, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.6153846153846154, |
|
"eval_loss": 0.9472815990447998, |
|
"eval_runtime": 0.2929, |
|
"eval_samples_per_second": 221.921, |
|
"eval_steps_per_second": 10.243, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.5207, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 0.9549360871315002, |
|
"eval_runtime": 0.2676, |
|
"eval_samples_per_second": 242.87, |
|
"eval_steps_per_second": 11.209, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.5846153846153846, |
|
"eval_loss": 0.934834897518158, |
|
"eval_runtime": 0.2803, |
|
"eval_samples_per_second": 231.892, |
|
"eval_steps_per_second": 10.703, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.5846153846153846, |
|
"eval_loss": 0.9018541574478149, |
|
"eval_runtime": 0.3596, |
|
"eval_samples_per_second": 180.772, |
|
"eval_steps_per_second": 8.343, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.5846153846153846, |
|
"eval_loss": 0.8686960339546204, |
|
"eval_runtime": 0.3401, |
|
"eval_samples_per_second": 191.114, |
|
"eval_steps_per_second": 8.821, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.5846153846153846, |
|
"eval_loss": 0.846206784248352, |
|
"eval_runtime": 0.2696, |
|
"eval_samples_per_second": 241.07, |
|
"eval_steps_per_second": 11.126, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.6153846153846154, |
|
"eval_loss": 0.84183669090271, |
|
"eval_runtime": 0.2747, |
|
"eval_samples_per_second": 236.626, |
|
"eval_steps_per_second": 10.921, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.5146, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 0.8419151306152344, |
|
"eval_runtime": 0.2979, |
|
"eval_samples_per_second": 218.221, |
|
"eval_steps_per_second": 10.072, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.5692307692307692, |
|
"eval_loss": 0.8435397148132324, |
|
"eval_runtime": 0.3514, |
|
"eval_samples_per_second": 184.977, |
|
"eval_steps_per_second": 8.537, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.5538461538461539, |
|
"eval_loss": 0.8414743542671204, |
|
"eval_runtime": 0.4837, |
|
"eval_samples_per_second": 134.389, |
|
"eval_steps_per_second": 6.203, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.6153846153846154, |
|
"eval_loss": 0.8293341994285583, |
|
"eval_runtime": 0.4826, |
|
"eval_samples_per_second": 134.685, |
|
"eval_steps_per_second": 6.216, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 0.8253650665283203, |
|
"eval_runtime": 0.2849, |
|
"eval_samples_per_second": 228.165, |
|
"eval_steps_per_second": 10.531, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.6153846153846154, |
|
"eval_loss": 0.8218726515769958, |
|
"eval_runtime": 0.2622, |
|
"eval_samples_per_second": 247.867, |
|
"eval_steps_per_second": 11.44, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.6461538461538462, |
|
"eval_loss": 0.8194869160652161, |
|
"eval_runtime": 0.389, |
|
"eval_samples_per_second": 167.11, |
|
"eval_steps_per_second": 7.713, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 26.67, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.4352, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.6461538461538462, |
|
"eval_loss": 0.8192200660705566, |
|
"eval_runtime": 0.2763, |
|
"eval_samples_per_second": 235.224, |
|
"eval_steps_per_second": 10.857, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.6307692307692307, |
|
"eval_loss": 0.8197530508041382, |
|
"eval_runtime": 0.2574, |
|
"eval_samples_per_second": 252.551, |
|
"eval_steps_per_second": 11.656, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.6615384615384615, |
|
"eval_loss": 0.8229538202285767, |
|
"eval_runtime": 0.2901, |
|
"eval_samples_per_second": 224.029, |
|
"eval_steps_per_second": 10.34, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.6461538461538462, |
|
"eval_loss": 0.8263610005378723, |
|
"eval_runtime": 0.2703, |
|
"eval_samples_per_second": 240.492, |
|
"eval_steps_per_second": 11.1, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.6461538461538462, |
|
"eval_loss": 0.8268065452575684, |
|
"eval_runtime": 0.425, |
|
"eval_samples_per_second": 152.94, |
|
"eval_steps_per_second": 7.059, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.6461538461538462, |
|
"eval_loss": 0.8266280293464661, |
|
"eval_runtime": 0.2626, |
|
"eval_samples_per_second": 247.53, |
|
"eval_steps_per_second": 11.424, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.6461538461538462, |
|
"eval_loss": 0.8262693881988525, |
|
"eval_runtime": 0.2849, |
|
"eval_samples_per_second": 228.131, |
|
"eval_steps_per_second": 10.529, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"learning_rate": 0.0, |
|
"loss": 0.4724, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"eval_accuracy": 0.6461538461538462, |
|
"eval_loss": 0.8263109922409058, |
|
"eval_runtime": 0.3713, |
|
"eval_samples_per_second": 175.053, |
|
"eval_steps_per_second": 8.079, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"step": 50, |
|
"total_flos": 5.411451203278848e+16, |
|
"train_loss": 0.4966608715057373, |
|
"train_runtime": 129.3451, |
|
"train_samples_per_second": 25.127, |
|
"train_steps_per_second": 0.387 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 50, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"total_flos": 5.411451203278848e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|