|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 60.0, |
|
"global_step": 9360, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.51985559566787, |
|
"eval_loss": 0.00757412426173687, |
|
"eval_runtime": 4.16, |
|
"eval_samples_per_second": 66.586, |
|
"eval_steps_per_second": 8.413, |
|
"step": 156 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.51985559566787, |
|
"epoch": 1.0, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5342960288808665, |
|
"eval_loss": 0.04179096594452858, |
|
"eval_runtime": 4.2511, |
|
"eval_samples_per_second": 65.159, |
|
"eval_steps_per_second": 8.233, |
|
"step": 312 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 2.0, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.5054151624548736, |
|
"eval_loss": 0.004435110837221146, |
|
"eval_runtime": 4.2687, |
|
"eval_samples_per_second": 64.891, |
|
"eval_steps_per_second": 8.199, |
|
"step": 468 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 3.0, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 0.0028397435897435895, |
|
"loss": 0.0669, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.4693140794223827, |
|
"eval_loss": 0.01173881534487009, |
|
"eval_runtime": 4.2726, |
|
"eval_samples_per_second": 64.832, |
|
"eval_steps_per_second": 8.192, |
|
"step": 624 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 4.0, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.033335305750370026, |
|
"eval_runtime": 4.2774, |
|
"eval_samples_per_second": 64.758, |
|
"eval_steps_per_second": 8.182, |
|
"step": 780 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 5.0, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.4693140794223827, |
|
"eval_loss": 0.0013900818303227425, |
|
"eval_runtime": 4.2808, |
|
"eval_samples_per_second": 64.708, |
|
"eval_steps_per_second": 8.176, |
|
"step": 936 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 6.0, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 0.00267948717948718, |
|
"loss": 0.0209, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.0007993517792783678, |
|
"eval_runtime": 4.3048, |
|
"eval_samples_per_second": 64.346, |
|
"eval_steps_per_second": 8.13, |
|
"step": 1092 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 7.0, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.0030970817897468805, |
|
"eval_runtime": 4.2737, |
|
"eval_samples_per_second": 64.815, |
|
"eval_steps_per_second": 8.19, |
|
"step": 1248 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 8.0, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.4981949458483754, |
|
"eval_loss": 0.004927648231387138, |
|
"eval_runtime": 4.2737, |
|
"eval_samples_per_second": 64.815, |
|
"eval_steps_per_second": 8.19, |
|
"step": 1404 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 9.0, |
|
"step": 1404 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 0.0025192307692307693, |
|
"loss": 0.0144, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.0006902324385009706, |
|
"eval_runtime": 4.2727, |
|
"eval_samples_per_second": 64.83, |
|
"eval_steps_per_second": 8.192, |
|
"step": 1560 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 10.0, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.4693140794223827, |
|
"eval_loss": 0.001355065149255097, |
|
"eval_runtime": 4.3031, |
|
"eval_samples_per_second": 64.372, |
|
"eval_steps_per_second": 8.134, |
|
"step": 1716 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 11.0, |
|
"step": 1716 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.5054151624548736, |
|
"eval_loss": 0.002226953860372305, |
|
"eval_runtime": 4.3156, |
|
"eval_samples_per_second": 64.186, |
|
"eval_steps_per_second": 8.11, |
|
"step": 1872 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 12.0, |
|
"step": 1872 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"learning_rate": 0.002358974358974359, |
|
"loss": 0.0094, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.0007688578334636986, |
|
"eval_runtime": 4.2786, |
|
"eval_samples_per_second": 64.74, |
|
"eval_steps_per_second": 8.18, |
|
"step": 2028 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 13.0, |
|
"step": 2028 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.0011682923650369048, |
|
"eval_runtime": 4.3112, |
|
"eval_samples_per_second": 64.252, |
|
"eval_steps_per_second": 8.118, |
|
"step": 2184 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 14.0, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.0018351555336266756, |
|
"eval_runtime": 4.3007, |
|
"eval_samples_per_second": 64.409, |
|
"eval_steps_per_second": 8.138, |
|
"step": 2340 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 15.0, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.0007866995292715728, |
|
"eval_runtime": 4.2927, |
|
"eval_samples_per_second": 64.529, |
|
"eval_steps_per_second": 8.153, |
|
"step": 2496 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 16.0, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 16.03, |
|
"learning_rate": 0.0021987179487179486, |
|
"loss": 0.0087, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.001059322850778699, |
|
"eval_runtime": 4.2897, |
|
"eval_samples_per_second": 64.573, |
|
"eval_steps_per_second": 8.159, |
|
"step": 2652 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 17.0, |
|
"step": 2652 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.0008964231819845736, |
|
"eval_runtime": 4.2913, |
|
"eval_samples_per_second": 64.549, |
|
"eval_steps_per_second": 8.156, |
|
"step": 2808 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 18.0, |
|
"step": 2808 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.0009917899733409286, |
|
"eval_runtime": 4.2906, |
|
"eval_samples_per_second": 64.56, |
|
"eval_steps_per_second": 8.157, |
|
"step": 2964 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 19.0, |
|
"step": 2964 |
|
}, |
|
{ |
|
"epoch": 19.23, |
|
"learning_rate": 0.0020384615384615385, |
|
"loss": 0.0091, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.4584837545126354, |
|
"eval_loss": 0.002107406733557582, |
|
"eval_runtime": 4.2956, |
|
"eval_samples_per_second": 64.485, |
|
"eval_steps_per_second": 8.148, |
|
"step": 3120 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 20.0, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.0008094427757896483, |
|
"eval_runtime": 4.3018, |
|
"eval_samples_per_second": 64.392, |
|
"eval_steps_per_second": 8.136, |
|
"step": 3276 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 21.0, |
|
"step": 3276 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.0010112057207152247, |
|
"eval_runtime": 4.2947, |
|
"eval_samples_per_second": 64.498, |
|
"eval_steps_per_second": 8.15, |
|
"step": 3432 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 22.0, |
|
"step": 3432 |
|
}, |
|
{ |
|
"epoch": 22.44, |
|
"learning_rate": 0.0018782051282051281, |
|
"loss": 0.0087, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.0007026080274954438, |
|
"eval_runtime": 4.2959, |
|
"eval_samples_per_second": 64.48, |
|
"eval_steps_per_second": 8.147, |
|
"step": 3588 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 23.0, |
|
"step": 3588 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.47653429602888087, |
|
"eval_loss": 0.0012016486143693328, |
|
"eval_runtime": 4.2934, |
|
"eval_samples_per_second": 64.518, |
|
"eval_steps_per_second": 8.152, |
|
"step": 3744 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 24.0, |
|
"step": 3744 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.0013125467812642455, |
|
"eval_runtime": 4.2925, |
|
"eval_samples_per_second": 64.531, |
|
"eval_steps_per_second": 8.154, |
|
"step": 3900 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 25.0, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 25.64, |
|
"learning_rate": 0.0017179487179487178, |
|
"loss": 0.0088, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.49097472924187724, |
|
"eval_loss": 0.000969512271694839, |
|
"eval_runtime": 4.2949, |
|
"eval_samples_per_second": 64.495, |
|
"eval_steps_per_second": 8.149, |
|
"step": 4056 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 26.0, |
|
"step": 4056 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.47653429602888087, |
|
"eval_loss": 0.001213469309732318, |
|
"eval_runtime": 4.2909, |
|
"eval_samples_per_second": 64.556, |
|
"eval_steps_per_second": 8.157, |
|
"step": 4212 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 27.0, |
|
"step": 4212 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.0011853750329464674, |
|
"eval_runtime": 4.293, |
|
"eval_samples_per_second": 64.524, |
|
"eval_steps_per_second": 8.153, |
|
"step": 4368 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 28.0, |
|
"step": 4368 |
|
}, |
|
{ |
|
"epoch": 28.85, |
|
"learning_rate": 0.0015576923076923079, |
|
"loss": 0.0087, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.49097472924187724, |
|
"eval_loss": 0.0012697704369202256, |
|
"eval_runtime": 4.2941, |
|
"eval_samples_per_second": 64.507, |
|
"eval_steps_per_second": 8.151, |
|
"step": 4524 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 29.0, |
|
"step": 4524 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.0008673242991790175, |
|
"eval_runtime": 4.2937, |
|
"eval_samples_per_second": 64.513, |
|
"eval_steps_per_second": 8.151, |
|
"step": 4680 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 30.0, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.001185604021884501, |
|
"eval_runtime": 4.2957, |
|
"eval_samples_per_second": 64.484, |
|
"eval_steps_per_second": 8.148, |
|
"step": 4836 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 31.0, |
|
"step": 4836 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.0006753391353413463, |
|
"eval_runtime": 4.2927, |
|
"eval_samples_per_second": 64.529, |
|
"eval_steps_per_second": 8.153, |
|
"step": 4992 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 32.0, |
|
"step": 4992 |
|
}, |
|
{ |
|
"epoch": 32.05, |
|
"learning_rate": 0.0013974358974358976, |
|
"loss": 0.0089, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.0009245880064554513, |
|
"eval_runtime": 4.2912, |
|
"eval_samples_per_second": 64.55, |
|
"eval_steps_per_second": 8.156, |
|
"step": 5148 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 33.0, |
|
"step": 5148 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.0008024222333915532, |
|
"eval_runtime": 4.294, |
|
"eval_samples_per_second": 64.509, |
|
"eval_steps_per_second": 8.151, |
|
"step": 5304 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 34.0, |
|
"step": 5304 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.0007070166175253689, |
|
"eval_runtime": 4.2926, |
|
"eval_samples_per_second": 64.53, |
|
"eval_steps_per_second": 8.154, |
|
"step": 5460 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 35.0, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 35.26, |
|
"learning_rate": 0.0012371794871794872, |
|
"loss": 0.0087, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.0008649426745250821, |
|
"eval_runtime": 4.2928, |
|
"eval_samples_per_second": 64.527, |
|
"eval_steps_per_second": 8.153, |
|
"step": 5616 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 36.0, |
|
"step": 5616 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.48014440433212996, |
|
"eval_loss": 0.0006594746373593807, |
|
"eval_runtime": 4.2922, |
|
"eval_samples_per_second": 64.536, |
|
"eval_steps_per_second": 8.154, |
|
"step": 5772 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 37.0, |
|
"step": 5772 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.0007161492831073701, |
|
"eval_runtime": 4.2938, |
|
"eval_samples_per_second": 64.512, |
|
"eval_steps_per_second": 8.151, |
|
"step": 5928 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 38.0, |
|
"step": 5928 |
|
}, |
|
{ |
|
"epoch": 38.46, |
|
"learning_rate": 0.0010769230769230769, |
|
"loss": 0.0093, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.0006771078915335238, |
|
"eval_runtime": 4.2959, |
|
"eval_samples_per_second": 64.48, |
|
"eval_steps_per_second": 8.147, |
|
"step": 6084 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 39.0, |
|
"step": 6084 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.0007806611829437315, |
|
"eval_runtime": 4.2905, |
|
"eval_samples_per_second": 64.561, |
|
"eval_steps_per_second": 8.157, |
|
"step": 6240 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 40.0, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.0011038266820833087, |
|
"eval_runtime": 4.2933, |
|
"eval_samples_per_second": 64.52, |
|
"eval_steps_per_second": 8.152, |
|
"step": 6396 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 41.0, |
|
"step": 6396 |
|
}, |
|
{ |
|
"epoch": 41.67, |
|
"learning_rate": 0.0009166666666666668, |
|
"loss": 0.0086, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.0007803016342222691, |
|
"eval_runtime": 4.2875, |
|
"eval_samples_per_second": 64.607, |
|
"eval_steps_per_second": 8.163, |
|
"step": 6552 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 42.0, |
|
"step": 6552 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.001678149332292378, |
|
"eval_runtime": 4.2895, |
|
"eval_samples_per_second": 64.577, |
|
"eval_steps_per_second": 8.159, |
|
"step": 6708 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 43.0, |
|
"step": 6708 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.0008550164639018476, |
|
"eval_runtime": 4.2923, |
|
"eval_samples_per_second": 64.534, |
|
"eval_steps_per_second": 8.154, |
|
"step": 6864 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 44.0, |
|
"step": 6864 |
|
}, |
|
{ |
|
"epoch": 44.87, |
|
"learning_rate": 0.0007564102564102564, |
|
"loss": 0.0085, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.0006565186777152121, |
|
"eval_runtime": 4.288, |
|
"eval_samples_per_second": 64.598, |
|
"eval_steps_per_second": 8.162, |
|
"step": 7020 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 45.0, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.0022354116663336754, |
|
"eval_runtime": 4.292, |
|
"eval_samples_per_second": 64.539, |
|
"eval_steps_per_second": 8.155, |
|
"step": 7176 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 46.0, |
|
"step": 7176 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.000903658801689744, |
|
"eval_runtime": 4.2892, |
|
"eval_samples_per_second": 64.581, |
|
"eval_steps_per_second": 8.16, |
|
"step": 7332 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 47.0, |
|
"step": 7332 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.0007766391499899328, |
|
"eval_runtime": 4.2894, |
|
"eval_samples_per_second": 64.577, |
|
"eval_steps_per_second": 8.16, |
|
"step": 7488 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 48.0, |
|
"step": 7488 |
|
}, |
|
{ |
|
"epoch": 48.08, |
|
"learning_rate": 0.0005961538461538461, |
|
"loss": 0.0087, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.0007370923412963748, |
|
"eval_runtime": 4.2874, |
|
"eval_samples_per_second": 64.608, |
|
"eval_steps_per_second": 8.163, |
|
"step": 7644 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 49.0, |
|
"step": 7644 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.000995746231637895, |
|
"eval_runtime": 4.2916, |
|
"eval_samples_per_second": 64.544, |
|
"eval_steps_per_second": 8.155, |
|
"step": 7800 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 50.0, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.0006885773618705571, |
|
"eval_runtime": 4.2918, |
|
"eval_samples_per_second": 64.542, |
|
"eval_steps_per_second": 8.155, |
|
"step": 7956 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 51.0, |
|
"step": 7956 |
|
}, |
|
{ |
|
"epoch": 51.28, |
|
"learning_rate": 0.00043589743589743596, |
|
"loss": 0.0084, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.0012769113527610898, |
|
"eval_runtime": 4.2932, |
|
"eval_samples_per_second": 64.521, |
|
"eval_steps_per_second": 8.152, |
|
"step": 8112 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 52.0, |
|
"step": 8112 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.0010381898609921336, |
|
"eval_runtime": 4.293, |
|
"eval_samples_per_second": 64.524, |
|
"eval_steps_per_second": 8.153, |
|
"step": 8268 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 53.0, |
|
"step": 8268 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.0010403883643448353, |
|
"eval_runtime": 4.2921, |
|
"eval_samples_per_second": 64.537, |
|
"eval_steps_per_second": 8.155, |
|
"step": 8424 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 54.0, |
|
"step": 8424 |
|
}, |
|
{ |
|
"epoch": 54.49, |
|
"learning_rate": 0.0002756410256410257, |
|
"loss": 0.0083, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.0006681613740511239, |
|
"eval_runtime": 4.2933, |
|
"eval_samples_per_second": 64.519, |
|
"eval_steps_per_second": 8.152, |
|
"step": 8580 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 55.0, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.0006536916480399668, |
|
"eval_runtime": 4.2936, |
|
"eval_samples_per_second": 64.514, |
|
"eval_steps_per_second": 8.152, |
|
"step": 8736 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 56.0, |
|
"step": 8736 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.0007098207133822143, |
|
"eval_runtime": 4.2932, |
|
"eval_samples_per_second": 64.521, |
|
"eval_steps_per_second": 8.152, |
|
"step": 8892 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 57.0, |
|
"step": 8892 |
|
}, |
|
{ |
|
"epoch": 57.69, |
|
"learning_rate": 0.0001153846153846154, |
|
"loss": 0.0082, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.0006599544431082904, |
|
"eval_runtime": 4.2926, |
|
"eval_samples_per_second": 64.529, |
|
"eval_steps_per_second": 8.154, |
|
"step": 9048 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 58.0, |
|
"step": 9048 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.0006803370197303593, |
|
"eval_runtime": 4.2908, |
|
"eval_samples_per_second": 64.557, |
|
"eval_steps_per_second": 8.157, |
|
"step": 9204 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 59.0, |
|
"step": 9204 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.0007155144703574479, |
|
"eval_runtime": 4.292, |
|
"eval_samples_per_second": 64.538, |
|
"eval_steps_per_second": 8.155, |
|
"step": 9360 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 60.0, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"step": 9360, |
|
"total_flos": 6.96152728406016e+16, |
|
"train_loss": 0.012766934969486334, |
|
"train_runtime": 3693.8313, |
|
"train_samples_per_second": 40.446, |
|
"train_steps_per_second": 2.534 |
|
} |
|
], |
|
"max_steps": 9360, |
|
"num_train_epochs": 60, |
|
"total_flos": 6.96152728406016e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|