20230830190813 / trainer_state.json
dkqjrm's picture
End of training
4205463
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 80.0,
"global_step": 27200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_accuracy": 0.5203761755485894,
"eval_loss": 0.7313023209571838,
"eval_runtime": 10.3291,
"eval_samples_per_second": 61.767,
"eval_steps_per_second": 7.745,
"step": 340
},
{
"best_epoch": 0,
"best_eval_accuracy": 0.5203761755485894,
"epoch": 1.0,
"step": 340
},
{
"epoch": 1.47,
"learning_rate": 0.000294485294117647,
"loss": 0.7523,
"step": 500
},
{
"epoch": 2.0,
"eval_accuracy": 0.5,
"eval_loss": 0.7285043001174927,
"eval_runtime": 10.3914,
"eval_samples_per_second": 61.397,
"eval_steps_per_second": 7.699,
"step": 680
},
{
"best_epoch": 0,
"best_eval_accuracy": 0.5203761755485894,
"epoch": 2.0,
"step": 680
},
{
"epoch": 2.94,
"learning_rate": 0.00028897058823529407,
"loss": 0.7461,
"step": 1000
},
{
"epoch": 3.0,
"eval_accuracy": 0.5062695924764891,
"eval_loss": 0.7228550314903259,
"eval_runtime": 10.386,
"eval_samples_per_second": 61.429,
"eval_steps_per_second": 7.703,
"step": 1020
},
{
"best_epoch": 0,
"best_eval_accuracy": 0.5203761755485894,
"epoch": 3.0,
"step": 1020
},
{
"epoch": 4.0,
"eval_accuracy": 0.5783699059561128,
"eval_loss": 0.706242561340332,
"eval_runtime": 10.3842,
"eval_samples_per_second": 61.44,
"eval_steps_per_second": 7.704,
"step": 1360
},
{
"best_epoch": 3,
"best_eval_accuracy": 0.5783699059561128,
"epoch": 4.0,
"step": 1360
},
{
"epoch": 4.41,
"learning_rate": 0.00028345588235294115,
"loss": 0.7318,
"step": 1500
},
{
"epoch": 5.0,
"eval_accuracy": 0.603448275862069,
"eval_loss": 0.779592752456665,
"eval_runtime": 10.4186,
"eval_samples_per_second": 61.237,
"eval_steps_per_second": 7.679,
"step": 1700
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 5.0,
"step": 1700
},
{
"epoch": 5.88,
"learning_rate": 0.0002779411764705882,
"loss": 0.7057,
"step": 2000
},
{
"epoch": 6.0,
"eval_accuracy": 0.5830721003134797,
"eval_loss": 0.819421648979187,
"eval_runtime": 10.3891,
"eval_samples_per_second": 61.41,
"eval_steps_per_second": 7.7,
"step": 2040
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 6.0,
"step": 2040
},
{
"epoch": 7.0,
"eval_accuracy": 0.5,
"eval_loss": 0.7296625971794128,
"eval_runtime": 10.3494,
"eval_samples_per_second": 61.646,
"eval_steps_per_second": 7.73,
"step": 2380
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 7.0,
"step": 2380
},
{
"epoch": 7.35,
"learning_rate": 0.00027242647058823525,
"loss": 0.7178,
"step": 2500
},
{
"epoch": 8.0,
"eval_accuracy": 0.5,
"eval_loss": 0.7423174381256104,
"eval_runtime": 10.3479,
"eval_samples_per_second": 61.655,
"eval_steps_per_second": 7.731,
"step": 2720
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 8.0,
"step": 2720
},
{
"epoch": 8.82,
"learning_rate": 0.0002669117647058823,
"loss": 0.7417,
"step": 3000
},
{
"epoch": 9.0,
"eval_accuracy": 0.5,
"eval_loss": 0.7279828786849976,
"eval_runtime": 10.3545,
"eval_samples_per_second": 61.615,
"eval_steps_per_second": 7.726,
"step": 3060
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 9.0,
"step": 3060
},
{
"epoch": 10.0,
"eval_accuracy": 0.5015673981191222,
"eval_loss": 0.7606477737426758,
"eval_runtime": 10.346,
"eval_samples_per_second": 61.666,
"eval_steps_per_second": 7.732,
"step": 3400
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 10.0,
"step": 3400
},
{
"epoch": 10.29,
"learning_rate": 0.0002613970588235294,
"loss": 0.7399,
"step": 3500
},
{
"epoch": 11.0,
"eval_accuracy": 0.5172413793103449,
"eval_loss": 0.7346045970916748,
"eval_runtime": 10.3459,
"eval_samples_per_second": 61.667,
"eval_steps_per_second": 7.733,
"step": 3740
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 11.0,
"step": 3740
},
{
"epoch": 11.76,
"learning_rate": 0.0002558823529411764,
"loss": 0.7334,
"step": 4000
},
{
"epoch": 12.0,
"eval_accuracy": 0.5,
"eval_loss": 0.7410521507263184,
"eval_runtime": 10.3526,
"eval_samples_per_second": 61.627,
"eval_steps_per_second": 7.728,
"step": 4080
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 12.0,
"step": 4080
},
{
"epoch": 13.0,
"eval_accuracy": 0.5,
"eval_loss": 0.7588055729866028,
"eval_runtime": 10.341,
"eval_samples_per_second": 61.696,
"eval_steps_per_second": 7.736,
"step": 4420
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 13.0,
"step": 4420
},
{
"epoch": 13.24,
"learning_rate": 0.0002503676470588235,
"loss": 0.7332,
"step": 4500
},
{
"epoch": 14.0,
"eval_accuracy": 0.4717868338557994,
"eval_loss": 0.7427398562431335,
"eval_runtime": 10.356,
"eval_samples_per_second": 61.607,
"eval_steps_per_second": 7.725,
"step": 4760
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 14.0,
"step": 4760
},
{
"epoch": 14.71,
"learning_rate": 0.0002448529411764706,
"loss": 0.7345,
"step": 5000
},
{
"epoch": 15.0,
"eval_accuracy": 0.5047021943573667,
"eval_loss": 0.7316854596138,
"eval_runtime": 10.3543,
"eval_samples_per_second": 61.617,
"eval_steps_per_second": 7.726,
"step": 5100
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 15.0,
"step": 5100
},
{
"epoch": 16.0,
"eval_accuracy": 0.5031347962382445,
"eval_loss": 0.7394306063652039,
"eval_runtime": 10.3304,
"eval_samples_per_second": 61.759,
"eval_steps_per_second": 7.744,
"step": 5440
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 16.0,
"step": 5440
},
{
"epoch": 16.18,
"learning_rate": 0.00023933823529411765,
"loss": 0.7308,
"step": 5500
},
{
"epoch": 17.0,
"eval_accuracy": 0.5,
"eval_loss": 0.744519054889679,
"eval_runtime": 10.3255,
"eval_samples_per_second": 61.789,
"eval_steps_per_second": 7.748,
"step": 5780
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 17.0,
"step": 5780
},
{
"epoch": 17.65,
"learning_rate": 0.0002338235294117647,
"loss": 0.7295,
"step": 6000
},
{
"epoch": 18.0,
"eval_accuracy": 0.4717868338557994,
"eval_loss": 0.7516658902168274,
"eval_runtime": 10.338,
"eval_samples_per_second": 61.714,
"eval_steps_per_second": 7.738,
"step": 6120
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 18.0,
"step": 6120
},
{
"epoch": 19.0,
"eval_accuracy": 0.5015673981191222,
"eval_loss": 0.7323266267776489,
"eval_runtime": 10.3388,
"eval_samples_per_second": 61.709,
"eval_steps_per_second": 7.738,
"step": 6460
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 19.0,
"step": 6460
},
{
"epoch": 19.12,
"learning_rate": 0.00022830882352941172,
"loss": 0.728,
"step": 6500
},
{
"epoch": 20.0,
"eval_accuracy": 0.5156739811912225,
"eval_loss": 0.7320307493209839,
"eval_runtime": 10.3718,
"eval_samples_per_second": 61.513,
"eval_steps_per_second": 7.713,
"step": 6800
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 20.0,
"step": 6800
},
{
"epoch": 20.59,
"learning_rate": 0.00022279411764705882,
"loss": 0.73,
"step": 7000
},
{
"epoch": 21.0,
"eval_accuracy": 0.5172413793103449,
"eval_loss": 0.730900228023529,
"eval_runtime": 10.3696,
"eval_samples_per_second": 61.526,
"eval_steps_per_second": 7.715,
"step": 7140
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 21.0,
"step": 7140
},
{
"epoch": 22.0,
"eval_accuracy": 0.49843260188087773,
"eval_loss": 0.7434073686599731,
"eval_runtime": 10.3764,
"eval_samples_per_second": 61.486,
"eval_steps_per_second": 7.71,
"step": 7480
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 22.0,
"step": 7480
},
{
"epoch": 22.06,
"learning_rate": 0.00021727941176470585,
"loss": 0.7304,
"step": 7500
},
{
"epoch": 23.0,
"eval_accuracy": 0.5094043887147336,
"eval_loss": 0.7365709543228149,
"eval_runtime": 10.371,
"eval_samples_per_second": 61.518,
"eval_steps_per_second": 7.714,
"step": 7820
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 23.0,
"step": 7820
},
{
"epoch": 23.53,
"learning_rate": 0.00021176470588235295,
"loss": 0.7298,
"step": 8000
},
{
"epoch": 24.0,
"eval_accuracy": 0.5,
"eval_loss": 0.7333634495735168,
"eval_runtime": 10.3649,
"eval_samples_per_second": 61.554,
"eval_steps_per_second": 7.718,
"step": 8160
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 24.0,
"step": 8160
},
{
"epoch": 25.0,
"learning_rate": 0.00020624999999999997,
"loss": 0.7283,
"step": 8500
},
{
"epoch": 25.0,
"eval_accuracy": 0.512539184952978,
"eval_loss": 0.7341791391372681,
"eval_runtime": 10.3671,
"eval_samples_per_second": 61.541,
"eval_steps_per_second": 7.717,
"step": 8500
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 25.0,
"step": 8500
},
{
"epoch": 26.0,
"eval_accuracy": 0.5047021943573667,
"eval_loss": 0.7310704588890076,
"eval_runtime": 10.3754,
"eval_samples_per_second": 61.491,
"eval_steps_per_second": 7.711,
"step": 8840
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 26.0,
"step": 8840
},
{
"epoch": 26.47,
"learning_rate": 0.00020073529411764702,
"loss": 0.7291,
"step": 9000
},
{
"epoch": 27.0,
"eval_accuracy": 0.4702194357366771,
"eval_loss": 0.7565436363220215,
"eval_runtime": 10.384,
"eval_samples_per_second": 61.441,
"eval_steps_per_second": 7.704,
"step": 9180
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 27.0,
"step": 9180
},
{
"epoch": 27.94,
"learning_rate": 0.0001952205882352941,
"loss": 0.7292,
"step": 9500
},
{
"epoch": 28.0,
"eval_accuracy": 0.5031347962382445,
"eval_loss": 0.7282286286354065,
"eval_runtime": 10.3869,
"eval_samples_per_second": 61.424,
"eval_steps_per_second": 7.702,
"step": 9520
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 28.0,
"step": 9520
},
{
"epoch": 29.0,
"eval_accuracy": 0.5015673981191222,
"eval_loss": 0.7332981824874878,
"eval_runtime": 10.3776,
"eval_samples_per_second": 61.478,
"eval_steps_per_second": 7.709,
"step": 9860
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 29.0,
"step": 9860
},
{
"epoch": 29.41,
"learning_rate": 0.00018970588235294115,
"loss": 0.7261,
"step": 10000
},
{
"epoch": 30.0,
"eval_accuracy": 0.512539184952978,
"eval_loss": 0.7328195571899414,
"eval_runtime": 10.3898,
"eval_samples_per_second": 61.406,
"eval_steps_per_second": 7.7,
"step": 10200
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 30.0,
"step": 10200
},
{
"epoch": 30.88,
"learning_rate": 0.00018419117647058822,
"loss": 0.7279,
"step": 10500
},
{
"epoch": 31.0,
"eval_accuracy": 0.512539184952978,
"eval_loss": 0.7348790764808655,
"eval_runtime": 10.3887,
"eval_samples_per_second": 61.413,
"eval_steps_per_second": 7.701,
"step": 10540
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 31.0,
"step": 10540
},
{
"epoch": 32.0,
"eval_accuracy": 0.4702194357366771,
"eval_loss": 0.7592222690582275,
"eval_runtime": 10.3874,
"eval_samples_per_second": 61.421,
"eval_steps_per_second": 7.702,
"step": 10880
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 32.0,
"step": 10880
},
{
"epoch": 32.35,
"learning_rate": 0.00017867647058823527,
"loss": 0.7252,
"step": 11000
},
{
"epoch": 33.0,
"eval_accuracy": 0.5094043887147336,
"eval_loss": 0.7393137812614441,
"eval_runtime": 10.3855,
"eval_samples_per_second": 61.432,
"eval_steps_per_second": 7.703,
"step": 11220
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 33.0,
"step": 11220
},
{
"epoch": 33.82,
"learning_rate": 0.00017316176470588232,
"loss": 0.7263,
"step": 11500
},
{
"epoch": 34.0,
"eval_accuracy": 0.5047021943573667,
"eval_loss": 0.7394311428070068,
"eval_runtime": 10.3899,
"eval_samples_per_second": 61.406,
"eval_steps_per_second": 7.7,
"step": 11560
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 34.0,
"step": 11560
},
{
"epoch": 35.0,
"eval_accuracy": 0.5015673981191222,
"eval_loss": 0.7465404272079468,
"eval_runtime": 10.3834,
"eval_samples_per_second": 61.444,
"eval_steps_per_second": 7.705,
"step": 11900
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 35.0,
"step": 11900
},
{
"epoch": 35.29,
"learning_rate": 0.0001676470588235294,
"loss": 0.7269,
"step": 12000
},
{
"epoch": 36.0,
"eval_accuracy": 0.5141065830721003,
"eval_loss": 0.7349045872688293,
"eval_runtime": 10.4039,
"eval_samples_per_second": 61.323,
"eval_steps_per_second": 7.689,
"step": 12240
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 36.0,
"step": 12240
},
{
"epoch": 36.76,
"learning_rate": 0.00016213235294117645,
"loss": 0.7263,
"step": 12500
},
{
"epoch": 37.0,
"eval_accuracy": 0.5047021943573667,
"eval_loss": 0.7295121550559998,
"eval_runtime": 10.3882,
"eval_samples_per_second": 61.416,
"eval_steps_per_second": 7.701,
"step": 12580
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 37.0,
"step": 12580
},
{
"epoch": 38.0,
"eval_accuracy": 0.5172413793103449,
"eval_loss": 0.7328969836235046,
"eval_runtime": 10.391,
"eval_samples_per_second": 61.399,
"eval_steps_per_second": 7.699,
"step": 12920
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 38.0,
"step": 12920
},
{
"epoch": 38.24,
"learning_rate": 0.00015661764705882352,
"loss": 0.728,
"step": 13000
},
{
"epoch": 39.0,
"eval_accuracy": 0.5,
"eval_loss": 0.7401482462882996,
"eval_runtime": 10.401,
"eval_samples_per_second": 61.34,
"eval_steps_per_second": 7.692,
"step": 13260
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 39.0,
"step": 13260
},
{
"epoch": 39.71,
"learning_rate": 0.00015110294117647057,
"loss": 0.7254,
"step": 13500
},
{
"epoch": 40.0,
"eval_accuracy": 0.5156739811912225,
"eval_loss": 0.7330672144889832,
"eval_runtime": 10.4053,
"eval_samples_per_second": 61.315,
"eval_steps_per_second": 7.688,
"step": 13600
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 40.0,
"step": 13600
},
{
"epoch": 41.0,
"eval_accuracy": 0.5172413793103449,
"eval_loss": 0.7308171391487122,
"eval_runtime": 10.3936,
"eval_samples_per_second": 61.384,
"eval_steps_per_second": 7.697,
"step": 13940
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 41.0,
"step": 13940
},
{
"epoch": 41.18,
"learning_rate": 0.00014558823529411762,
"loss": 0.7265,
"step": 14000
},
{
"epoch": 42.0,
"eval_accuracy": 0.5172413793103449,
"eval_loss": 0.7311594486236572,
"eval_runtime": 10.4029,
"eval_samples_per_second": 61.329,
"eval_steps_per_second": 7.69,
"step": 14280
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 42.0,
"step": 14280
},
{
"epoch": 42.65,
"learning_rate": 0.0001400735294117647,
"loss": 0.7234,
"step": 14500
},
{
"epoch": 43.0,
"eval_accuracy": 0.5,
"eval_loss": 0.7393350005149841,
"eval_runtime": 10.3921,
"eval_samples_per_second": 61.393,
"eval_steps_per_second": 7.698,
"step": 14620
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 43.0,
"step": 14620
},
{
"epoch": 44.0,
"eval_accuracy": 0.5,
"eval_loss": 0.7391661405563354,
"eval_runtime": 10.415,
"eval_samples_per_second": 61.258,
"eval_steps_per_second": 7.681,
"step": 14960
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 44.0,
"step": 14960
},
{
"epoch": 44.12,
"learning_rate": 0.00013455882352941175,
"loss": 0.7254,
"step": 15000
},
{
"epoch": 45.0,
"eval_accuracy": 0.5,
"eval_loss": 0.7389068007469177,
"eval_runtime": 10.4123,
"eval_samples_per_second": 61.274,
"eval_steps_per_second": 7.683,
"step": 15300
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 45.0,
"step": 15300
},
{
"epoch": 45.59,
"learning_rate": 0.00012904411764705882,
"loss": 0.7225,
"step": 15500
},
{
"epoch": 46.0,
"eval_accuracy": 0.5156739811912225,
"eval_loss": 0.731235921382904,
"eval_runtime": 10.4254,
"eval_samples_per_second": 61.197,
"eval_steps_per_second": 7.674,
"step": 15640
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 46.0,
"step": 15640
},
{
"epoch": 47.0,
"eval_accuracy": 0.5,
"eval_loss": 0.7334668636322021,
"eval_runtime": 10.3994,
"eval_samples_per_second": 61.35,
"eval_steps_per_second": 7.693,
"step": 15980
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 47.0,
"step": 15980
},
{
"epoch": 47.06,
"learning_rate": 0.00012352941176470587,
"loss": 0.7268,
"step": 16000
},
{
"epoch": 48.0,
"eval_accuracy": 0.5015673981191222,
"eval_loss": 0.7363240122795105,
"eval_runtime": 10.4221,
"eval_samples_per_second": 61.216,
"eval_steps_per_second": 7.676,
"step": 16320
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 48.0,
"step": 16320
},
{
"epoch": 48.53,
"learning_rate": 0.00011801470588235293,
"loss": 0.7258,
"step": 16500
},
{
"epoch": 49.0,
"eval_accuracy": 0.5031347962382445,
"eval_loss": 0.7393150329589844,
"eval_runtime": 10.4224,
"eval_samples_per_second": 61.214,
"eval_steps_per_second": 7.676,
"step": 16660
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 49.0,
"step": 16660
},
{
"epoch": 50.0,
"learning_rate": 0.0001125,
"loss": 0.7253,
"step": 17000
},
{
"epoch": 50.0,
"eval_accuracy": 0.5047021943573667,
"eval_loss": 0.7305631041526794,
"eval_runtime": 10.4154,
"eval_samples_per_second": 61.256,
"eval_steps_per_second": 7.681,
"step": 17000
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 50.0,
"step": 17000
},
{
"epoch": 51.0,
"eval_accuracy": 0.5094043887147336,
"eval_loss": 0.7371691465377808,
"eval_runtime": 10.429,
"eval_samples_per_second": 61.175,
"eval_steps_per_second": 7.671,
"step": 17340
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 51.0,
"step": 17340
},
{
"epoch": 51.47,
"learning_rate": 0.00010698529411764705,
"loss": 0.7247,
"step": 17500
},
{
"epoch": 52.0,
"eval_accuracy": 0.5,
"eval_loss": 0.7402310371398926,
"eval_runtime": 10.4194,
"eval_samples_per_second": 61.232,
"eval_steps_per_second": 7.678,
"step": 17680
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 52.0,
"step": 17680
},
{
"epoch": 52.94,
"learning_rate": 0.00010147058823529411,
"loss": 0.7248,
"step": 18000
},
{
"epoch": 53.0,
"eval_accuracy": 0.5141065830721003,
"eval_loss": 0.7354699969291687,
"eval_runtime": 10.4291,
"eval_samples_per_second": 61.175,
"eval_steps_per_second": 7.671,
"step": 18020
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 53.0,
"step": 18020
},
{
"epoch": 54.0,
"eval_accuracy": 0.5156739811912225,
"eval_loss": 0.7369323372840881,
"eval_runtime": 10.4219,
"eval_samples_per_second": 61.217,
"eval_steps_per_second": 7.676,
"step": 18360
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 54.0,
"step": 18360
},
{
"epoch": 54.41,
"learning_rate": 9.595588235294116e-05,
"loss": 0.7237,
"step": 18500
},
{
"epoch": 55.0,
"eval_accuracy": 0.5141065830721003,
"eval_loss": 0.7319989800453186,
"eval_runtime": 10.4292,
"eval_samples_per_second": 61.174,
"eval_steps_per_second": 7.671,
"step": 18700
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 55.0,
"step": 18700
},
{
"epoch": 55.88,
"learning_rate": 9.044117647058822e-05,
"loss": 0.7226,
"step": 19000
},
{
"epoch": 56.0,
"eval_accuracy": 0.5172413793103449,
"eval_loss": 0.7365678548812866,
"eval_runtime": 10.4179,
"eval_samples_per_second": 61.241,
"eval_steps_per_second": 7.679,
"step": 19040
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 56.0,
"step": 19040
},
{
"epoch": 57.0,
"eval_accuracy": 0.5172413793103449,
"eval_loss": 0.7315055131912231,
"eval_runtime": 10.4146,
"eval_samples_per_second": 61.26,
"eval_steps_per_second": 7.682,
"step": 19380
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 57.0,
"step": 19380
},
{
"epoch": 57.35,
"learning_rate": 8.492647058823528e-05,
"loss": 0.7238,
"step": 19500
},
{
"epoch": 58.0,
"eval_accuracy": 0.5015673981191222,
"eval_loss": 0.73880934715271,
"eval_runtime": 10.4223,
"eval_samples_per_second": 61.215,
"eval_steps_per_second": 7.676,
"step": 19720
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 58.0,
"step": 19720
},
{
"epoch": 58.82,
"learning_rate": 7.941176470588235e-05,
"loss": 0.7228,
"step": 20000
},
{
"epoch": 59.0,
"eval_accuracy": 0.5047021943573667,
"eval_loss": 0.7347163558006287,
"eval_runtime": 10.4154,
"eval_samples_per_second": 61.256,
"eval_steps_per_second": 7.681,
"step": 20060
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 59.0,
"step": 20060
},
{
"epoch": 60.0,
"eval_accuracy": 0.5141065830721003,
"eval_loss": 0.7313491106033325,
"eval_runtime": 10.4505,
"eval_samples_per_second": 61.05,
"eval_steps_per_second": 7.655,
"step": 20400
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 60.0,
"step": 20400
},
{
"epoch": 60.29,
"learning_rate": 7.389705882352941e-05,
"loss": 0.7245,
"step": 20500
},
{
"epoch": 61.0,
"eval_accuracy": 0.5141065830721003,
"eval_loss": 0.7329691052436829,
"eval_runtime": 10.4244,
"eval_samples_per_second": 61.203,
"eval_steps_per_second": 7.674,
"step": 20740
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 61.0,
"step": 20740
},
{
"epoch": 61.76,
"learning_rate": 6.838235294117646e-05,
"loss": 0.7222,
"step": 21000
},
{
"epoch": 62.0,
"eval_accuracy": 0.5141065830721003,
"eval_loss": 0.7349640130996704,
"eval_runtime": 10.4114,
"eval_samples_per_second": 61.279,
"eval_steps_per_second": 7.684,
"step": 21080
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 62.0,
"step": 21080
},
{
"epoch": 63.0,
"eval_accuracy": 0.5156739811912225,
"eval_loss": 0.7314162254333496,
"eval_runtime": 10.4097,
"eval_samples_per_second": 61.289,
"eval_steps_per_second": 7.685,
"step": 21420
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 63.0,
"step": 21420
},
{
"epoch": 63.24,
"learning_rate": 6.286764705882352e-05,
"loss": 0.724,
"step": 21500
},
{
"epoch": 64.0,
"eval_accuracy": 0.5141065830721003,
"eval_loss": 0.7327254414558411,
"eval_runtime": 10.4103,
"eval_samples_per_second": 61.285,
"eval_steps_per_second": 7.685,
"step": 21760
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 64.0,
"step": 21760
},
{
"epoch": 64.71,
"learning_rate": 5.7352941176470576e-05,
"loss": 0.7236,
"step": 22000
},
{
"epoch": 65.0,
"eval_accuracy": 0.5172413793103449,
"eval_loss": 0.7306498289108276,
"eval_runtime": 10.404,
"eval_samples_per_second": 61.323,
"eval_steps_per_second": 7.689,
"step": 22100
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 65.0,
"step": 22100
},
{
"epoch": 66.0,
"eval_accuracy": 0.5141065830721003,
"eval_loss": 0.7351139783859253,
"eval_runtime": 10.4141,
"eval_samples_per_second": 61.263,
"eval_steps_per_second": 7.682,
"step": 22440
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 66.0,
"step": 22440
},
{
"epoch": 66.18,
"learning_rate": 5.183823529411764e-05,
"loss": 0.7205,
"step": 22500
},
{
"epoch": 67.0,
"eval_accuracy": 0.512539184952978,
"eval_loss": 0.7343327403068542,
"eval_runtime": 10.4158,
"eval_samples_per_second": 61.253,
"eval_steps_per_second": 7.681,
"step": 22780
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 67.0,
"step": 22780
},
{
"epoch": 67.65,
"learning_rate": 4.63235294117647e-05,
"loss": 0.7236,
"step": 23000
},
{
"epoch": 68.0,
"eval_accuracy": 0.5156739811912225,
"eval_loss": 0.7313010096549988,
"eval_runtime": 10.4192,
"eval_samples_per_second": 61.233,
"eval_steps_per_second": 7.678,
"step": 23120
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 68.0,
"step": 23120
},
{
"epoch": 69.0,
"eval_accuracy": 0.5172413793103449,
"eval_loss": 0.7338398694992065,
"eval_runtime": 10.4173,
"eval_samples_per_second": 61.244,
"eval_steps_per_second": 7.68,
"step": 23460
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 69.0,
"step": 23460
},
{
"epoch": 69.12,
"learning_rate": 4.080882352941176e-05,
"loss": 0.7221,
"step": 23500
},
{
"epoch": 70.0,
"eval_accuracy": 0.5156739811912225,
"eval_loss": 0.7317362427711487,
"eval_runtime": 10.4144,
"eval_samples_per_second": 61.262,
"eval_steps_per_second": 7.682,
"step": 23800
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 70.0,
"step": 23800
},
{
"epoch": 70.59,
"learning_rate": 3.529411764705882e-05,
"loss": 0.7226,
"step": 24000
},
{
"epoch": 71.0,
"eval_accuracy": 0.5141065830721003,
"eval_loss": 0.7343974113464355,
"eval_runtime": 10.418,
"eval_samples_per_second": 61.24,
"eval_steps_per_second": 7.679,
"step": 24140
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 71.0,
"step": 24140
},
{
"epoch": 72.0,
"eval_accuracy": 0.5156739811912225,
"eval_loss": 0.7342385649681091,
"eval_runtime": 10.4188,
"eval_samples_per_second": 61.235,
"eval_steps_per_second": 7.678,
"step": 24480
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 72.0,
"step": 24480
},
{
"epoch": 72.06,
"learning_rate": 2.9779411764705876e-05,
"loss": 0.7209,
"step": 24500
},
{
"epoch": 73.0,
"eval_accuracy": 0.5156739811912225,
"eval_loss": 0.7333251237869263,
"eval_runtime": 10.4209,
"eval_samples_per_second": 61.223,
"eval_steps_per_second": 7.677,
"step": 24820
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 73.0,
"step": 24820
},
{
"epoch": 73.53,
"learning_rate": 2.426470588235294e-05,
"loss": 0.7229,
"step": 25000
},
{
"epoch": 74.0,
"eval_accuracy": 0.5141065830721003,
"eval_loss": 0.7358043789863586,
"eval_runtime": 10.4307,
"eval_samples_per_second": 61.166,
"eval_steps_per_second": 7.67,
"step": 25160
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 74.0,
"step": 25160
},
{
"epoch": 75.0,
"learning_rate": 1.875e-05,
"loss": 0.7204,
"step": 25500
},
{
"epoch": 75.0,
"eval_accuracy": 0.5156739811912225,
"eval_loss": 0.7342051863670349,
"eval_runtime": 10.4227,
"eval_samples_per_second": 61.212,
"eval_steps_per_second": 7.676,
"step": 25500
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 75.0,
"step": 25500
},
{
"epoch": 76.0,
"eval_accuracy": 0.5156739811912225,
"eval_loss": 0.7328677773475647,
"eval_runtime": 10.4246,
"eval_samples_per_second": 61.201,
"eval_steps_per_second": 7.674,
"step": 25840
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 76.0,
"step": 25840
},
{
"epoch": 76.47,
"learning_rate": 1.323529411764706e-05,
"loss": 0.7213,
"step": 26000
},
{
"epoch": 77.0,
"eval_accuracy": 0.5141065830721003,
"eval_loss": 0.7333688139915466,
"eval_runtime": 10.429,
"eval_samples_per_second": 61.176,
"eval_steps_per_second": 7.671,
"step": 26180
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 77.0,
"step": 26180
},
{
"epoch": 77.94,
"learning_rate": 7.720588235294117e-06,
"loss": 0.7208,
"step": 26500
},
{
"epoch": 78.0,
"eval_accuracy": 0.5141065830721003,
"eval_loss": 0.7335054278373718,
"eval_runtime": 10.4223,
"eval_samples_per_second": 61.215,
"eval_steps_per_second": 7.676,
"step": 26520
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 78.0,
"step": 26520
},
{
"epoch": 79.0,
"eval_accuracy": 0.5141065830721003,
"eval_loss": 0.7329541444778442,
"eval_runtime": 10.431,
"eval_samples_per_second": 61.164,
"eval_steps_per_second": 7.669,
"step": 26860
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 79.0,
"step": 26860
},
{
"epoch": 79.41,
"learning_rate": 2.2058823529411763e-06,
"loss": 0.7203,
"step": 27000
},
{
"epoch": 80.0,
"eval_accuracy": 0.5141065830721003,
"eval_loss": 0.73334801197052,
"eval_runtime": 10.4229,
"eval_samples_per_second": 61.212,
"eval_steps_per_second": 7.675,
"step": 27200
},
{
"best_epoch": 4,
"best_eval_accuracy": 0.603448275862069,
"epoch": 80.0,
"step": 27200
},
{
"epoch": 80.0,
"step": 27200,
"total_flos": 2.19509178753024e+17,
"train_loss": 0.726637606901281,
"train_runtime": 10977.5099,
"train_samples_per_second": 39.557,
"train_steps_per_second": 2.478
}
],
"max_steps": 27200,
"num_train_epochs": 80,
"total_flos": 2.19509178753024e+17,
"trial_name": null,
"trial_params": null
}