|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 35.0, |
|
"global_step": 33600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.535132032339264e-05, |
|
"loss": 1.7652, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.66545764302116, |
|
"eval_loss": 1.7168171405792236, |
|
"eval_runtime": 51.9578, |
|
"eval_samples_per_second": 146.35, |
|
"eval_steps_per_second": 0.924, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.6900880215595094e-05, |
|
"loss": 1.5201, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6816024828889072, |
|
"eval_loss": 1.6037945747375488, |
|
"eval_runtime": 51.6258, |
|
"eval_samples_per_second": 147.291, |
|
"eval_steps_per_second": 0.93, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.7807314645155048e-05, |
|
"loss": 1.4366, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6859236059235067, |
|
"eval_loss": 1.5763635635375977, |
|
"eval_runtime": 52.5239, |
|
"eval_samples_per_second": 144.772, |
|
"eval_steps_per_second": 0.914, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 1.8450440107797548e-05, |
|
"loss": 1.3831, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6914059105006866, |
|
"eval_loss": 1.5295616388320923, |
|
"eval_runtime": 51.6883, |
|
"eval_samples_per_second": 147.113, |
|
"eval_steps_per_second": 0.929, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 1.894928697180815e-05, |
|
"loss": 1.3447, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.6934798357287497, |
|
"eval_loss": 1.5127382278442383, |
|
"eval_runtime": 51.7137, |
|
"eval_samples_per_second": 147.04, |
|
"eval_steps_per_second": 0.928, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 1.93568745373575e-05, |
|
"loss": 1.314, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.6982260680110423, |
|
"eval_loss": 1.4813944101333618, |
|
"eval_runtime": 51.6818, |
|
"eval_samples_per_second": 147.131, |
|
"eval_steps_per_second": 0.929, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 1.9701484913790247e-05, |
|
"loss": 1.29, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.6991250528199647, |
|
"eval_loss": 1.4718950986862183, |
|
"eval_runtime": 52.9344, |
|
"eval_samples_per_second": 143.649, |
|
"eval_steps_per_second": 0.907, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.2669, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7032825019384854, |
|
"eval_loss": 1.449450135231018, |
|
"eval_runtime": 50.9613, |
|
"eval_samples_per_second": 149.211, |
|
"eval_steps_per_second": 0.942, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.2461, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7032601118345916, |
|
"eval_loss": 1.4466781616210938, |
|
"eval_runtime": 50.8922, |
|
"eval_samples_per_second": 149.414, |
|
"eval_steps_per_second": 0.943, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.2323, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7045017515092793, |
|
"eval_loss": 1.4342981576919556, |
|
"eval_runtime": 51.2867, |
|
"eval_samples_per_second": 148.264, |
|
"eval_steps_per_second": 0.936, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.2191, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.7057310891893632, |
|
"eval_loss": 1.4175918102264404, |
|
"eval_runtime": 51.4742, |
|
"eval_samples_per_second": 147.725, |
|
"eval_steps_per_second": 0.933, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.2046, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7111463874637353, |
|
"eval_loss": 1.3918827772140503, |
|
"eval_runtime": 51.2424, |
|
"eval_samples_per_second": 148.393, |
|
"eval_steps_per_second": 0.937, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1944, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.7096209003462108, |
|
"eval_loss": 1.397229790687561, |
|
"eval_runtime": 51.3927, |
|
"eval_samples_per_second": 147.959, |
|
"eval_steps_per_second": 0.934, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1821, |
|
"step": 13440 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.7115298351243047, |
|
"eval_loss": 1.386526107788086, |
|
"eval_runtime": 51.5062, |
|
"eval_samples_per_second": 147.633, |
|
"eval_steps_per_second": 0.932, |
|
"step": 13440 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1716, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.7124512834797834, |
|
"eval_loss": 1.383684515953064, |
|
"eval_runtime": 51.5334, |
|
"eval_samples_per_second": 147.555, |
|
"eval_steps_per_second": 0.931, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1627, |
|
"step": 15360 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.7130167108614363, |
|
"eval_loss": 1.3739854097366333, |
|
"eval_runtime": 51.1674, |
|
"eval_samples_per_second": 148.61, |
|
"eval_steps_per_second": 0.938, |
|
"step": 15360 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1535, |
|
"step": 16320 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.7154258294693137, |
|
"eval_loss": 1.3581925630569458, |
|
"eval_runtime": 50.9905, |
|
"eval_samples_per_second": 149.126, |
|
"eval_steps_per_second": 0.941, |
|
"step": 16320 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1459, |
|
"step": 17280 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.7157034442498541, |
|
"eval_loss": 1.3612279891967773, |
|
"eval_runtime": 51.0142, |
|
"eval_samples_per_second": 149.057, |
|
"eval_steps_per_second": 0.941, |
|
"step": 17280 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1381, |
|
"step": 18240 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.7173944953215458, |
|
"eval_loss": 1.3571031093597412, |
|
"eval_runtime": 51.233, |
|
"eval_samples_per_second": 148.42, |
|
"eval_steps_per_second": 0.937, |
|
"step": 18240 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1314, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.7164889608500034, |
|
"eval_loss": 1.3498369455337524, |
|
"eval_runtime": 51.475, |
|
"eval_samples_per_second": 147.722, |
|
"eval_steps_per_second": 0.932, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1216, |
|
"step": 20160 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.7190562640400042, |
|
"eval_loss": 1.3336502313613892, |
|
"eval_runtime": 51.2931, |
|
"eval_samples_per_second": 148.246, |
|
"eval_steps_per_second": 0.936, |
|
"step": 20160 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1187, |
|
"step": 21120 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.7189180295993838, |
|
"eval_loss": 1.3376628160476685, |
|
"eval_runtime": 51.22, |
|
"eval_samples_per_second": 148.458, |
|
"eval_steps_per_second": 0.937, |
|
"step": 21120 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1128, |
|
"step": 22080 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.7185502647071909, |
|
"eval_loss": 1.3377180099487305, |
|
"eval_runtime": 50.9033, |
|
"eval_samples_per_second": 149.381, |
|
"eval_steps_per_second": 0.943, |
|
"step": 22080 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1052, |
|
"step": 23040 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.7208811149658572, |
|
"eval_loss": 1.3223472833633423, |
|
"eval_runtime": 51.1878, |
|
"eval_samples_per_second": 148.551, |
|
"eval_steps_per_second": 0.938, |
|
"step": 23040 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0996, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.7216902223097578, |
|
"eval_loss": 1.3264613151550293, |
|
"eval_runtime": 50.8905, |
|
"eval_samples_per_second": 149.419, |
|
"eval_steps_per_second": 0.943, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0961, |
|
"step": 24960 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.7211948506436695, |
|
"eval_loss": 1.320527195930481, |
|
"eval_runtime": 50.9638, |
|
"eval_samples_per_second": 149.204, |
|
"eval_steps_per_second": 0.942, |
|
"step": 24960 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0902, |
|
"step": 25920 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.7213838036019521, |
|
"eval_loss": 1.321337342262268, |
|
"eval_runtime": 52.1531, |
|
"eval_samples_per_second": 145.802, |
|
"eval_steps_per_second": 0.92, |
|
"step": 25920 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0835, |
|
"step": 26880 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.7239870388684853, |
|
"eval_loss": 1.3021934032440186, |
|
"eval_runtime": 51.6703, |
|
"eval_samples_per_second": 147.164, |
|
"eval_steps_per_second": 0.929, |
|
"step": 26880 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0796, |
|
"step": 27840 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.7225473777214267, |
|
"eval_loss": 1.3106894493103027, |
|
"eval_runtime": 51.4692, |
|
"eval_samples_per_second": 147.739, |
|
"eval_steps_per_second": 0.933, |
|
"step": 27840 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.076, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.7246294369637408, |
|
"eval_loss": 1.3005998134613037, |
|
"eval_runtime": 51.1849, |
|
"eval_samples_per_second": 148.56, |
|
"eval_steps_per_second": 0.938, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0713, |
|
"step": 29760 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.723824419552894, |
|
"eval_loss": 1.3021259307861328, |
|
"eval_runtime": 51.1204, |
|
"eval_samples_per_second": 148.747, |
|
"eval_steps_per_second": 0.939, |
|
"step": 29760 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0679, |
|
"step": 30720 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.7239979316606835, |
|
"eval_loss": 1.3063867092132568, |
|
"eval_runtime": 50.892, |
|
"eval_samples_per_second": 149.415, |
|
"eval_steps_per_second": 0.943, |
|
"step": 30720 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0638, |
|
"step": 31680 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.7261307848236347, |
|
"eval_loss": 1.2864927053451538, |
|
"eval_runtime": 50.9275, |
|
"eval_samples_per_second": 149.31, |
|
"eval_steps_per_second": 0.943, |
|
"step": 31680 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.058, |
|
"step": 32640 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.7236361200229268, |
|
"eval_loss": 1.3007187843322754, |
|
"eval_runtime": 51.9944, |
|
"eval_samples_per_second": 146.246, |
|
"eval_steps_per_second": 0.923, |
|
"step": 32640 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0548, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.7257316118449666, |
|
"eval_loss": 1.2933671474456787, |
|
"eval_runtime": 51.2859, |
|
"eval_samples_per_second": 148.267, |
|
"eval_steps_per_second": 0.936, |
|
"step": 33600 |
|
} |
|
], |
|
"max_steps": 38400, |
|
"num_train_epochs": 40, |
|
"total_flos": 2041797358387200.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|