tyzhu's picture
Training in progress, epoch 19
96db0c6 verified
raw
history blame contribute delete
No virus
16.7 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 20.0,
"eval_steps": 500,
"global_step": 10580,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.19,
"learning_rate": 0.0001,
"loss": 0.3787,
"step": 100
},
{
"epoch": 0.38,
"learning_rate": 0.0001,
"loss": 0.315,
"step": 200
},
{
"epoch": 0.57,
"learning_rate": 0.0001,
"loss": 0.3191,
"step": 300
},
{
"epoch": 0.76,
"learning_rate": 0.0001,
"loss": 0.3082,
"step": 400
},
{
"epoch": 0.95,
"learning_rate": 0.0001,
"loss": 0.2952,
"step": 500
},
{
"epoch": 1.0,
"eval_accuracy": 0.7924862745098039,
"eval_loss": 0.37208321690559387,
"eval_runtime": 7.4879,
"eval_samples_per_second": 66.774,
"eval_steps_per_second": 8.414,
"step": 529
},
{
"epoch": 1.13,
"learning_rate": 0.0001,
"loss": 0.2733,
"step": 600
},
{
"epoch": 1.32,
"learning_rate": 0.0001,
"loss": 0.2549,
"step": 700
},
{
"epoch": 1.51,
"learning_rate": 0.0001,
"loss": 0.2483,
"step": 800
},
{
"epoch": 1.7,
"learning_rate": 0.0001,
"loss": 0.2473,
"step": 900
},
{
"epoch": 1.89,
"learning_rate": 0.0001,
"loss": 0.2481,
"step": 1000
},
{
"epoch": 2.0,
"eval_accuracy": 0.8003137254901961,
"eval_loss": 0.3231143653392792,
"eval_runtime": 7.0792,
"eval_samples_per_second": 70.629,
"eval_steps_per_second": 8.899,
"step": 1058
},
{
"epoch": 2.08,
"learning_rate": 0.0001,
"loss": 0.2244,
"step": 1100
},
{
"epoch": 2.27,
"learning_rate": 0.0001,
"loss": 0.1981,
"step": 1200
},
{
"epoch": 2.46,
"learning_rate": 0.0001,
"loss": 0.1946,
"step": 1300
},
{
"epoch": 2.65,
"learning_rate": 0.0001,
"loss": 0.1978,
"step": 1400
},
{
"epoch": 2.84,
"learning_rate": 0.0001,
"loss": 0.1935,
"step": 1500
},
{
"epoch": 3.0,
"eval_accuracy": 0.8044156862745098,
"eval_loss": 0.29632872343063354,
"eval_runtime": 7.6622,
"eval_samples_per_second": 65.255,
"eval_steps_per_second": 8.222,
"step": 1587
},
{
"epoch": 3.02,
"learning_rate": 0.0001,
"loss": 0.1865,
"step": 1600
},
{
"epoch": 3.21,
"learning_rate": 0.0001,
"loss": 0.1586,
"step": 1700
},
{
"epoch": 3.4,
"learning_rate": 0.0001,
"loss": 0.1598,
"step": 1800
},
{
"epoch": 3.59,
"learning_rate": 0.0001,
"loss": 0.1596,
"step": 1900
},
{
"epoch": 3.78,
"learning_rate": 0.0001,
"loss": 0.1603,
"step": 2000
},
{
"epoch": 3.97,
"learning_rate": 0.0001,
"loss": 0.1593,
"step": 2100
},
{
"epoch": 4.0,
"eval_accuracy": 0.806235294117647,
"eval_loss": 0.28722652792930603,
"eval_runtime": 6.677,
"eval_samples_per_second": 74.884,
"eval_steps_per_second": 9.435,
"step": 2116
},
{
"epoch": 4.16,
"learning_rate": 0.0001,
"loss": 0.1369,
"step": 2200
},
{
"epoch": 4.35,
"learning_rate": 0.0001,
"loss": 0.1371,
"step": 2300
},
{
"epoch": 4.54,
"learning_rate": 0.0001,
"loss": 0.136,
"step": 2400
},
{
"epoch": 4.73,
"learning_rate": 0.0001,
"loss": 0.1379,
"step": 2500
},
{
"epoch": 4.91,
"learning_rate": 0.0001,
"loss": 0.1405,
"step": 2600
},
{
"epoch": 5.0,
"eval_accuracy": 0.8067450980392157,
"eval_loss": 0.29079169034957886,
"eval_runtime": 7.7882,
"eval_samples_per_second": 64.2,
"eval_steps_per_second": 8.089,
"step": 2645
},
{
"epoch": 5.1,
"learning_rate": 0.0001,
"loss": 0.1278,
"step": 2700
},
{
"epoch": 5.29,
"learning_rate": 0.0001,
"loss": 0.1208,
"step": 2800
},
{
"epoch": 5.48,
"learning_rate": 0.0001,
"loss": 0.1205,
"step": 2900
},
{
"epoch": 5.67,
"learning_rate": 0.0001,
"loss": 0.1239,
"step": 3000
},
{
"epoch": 5.86,
"learning_rate": 0.0001,
"loss": 0.1235,
"step": 3100
},
{
"epoch": 6.0,
"eval_accuracy": 0.8071529411764706,
"eval_loss": 0.29287615418434143,
"eval_runtime": 7.1498,
"eval_samples_per_second": 69.932,
"eval_steps_per_second": 8.811,
"step": 3174
},
{
"epoch": 6.05,
"learning_rate": 0.0001,
"loss": 0.1205,
"step": 3200
},
{
"epoch": 6.24,
"learning_rate": 0.0001,
"loss": 0.1096,
"step": 3300
},
{
"epoch": 6.43,
"learning_rate": 0.0001,
"loss": 0.1103,
"step": 3400
},
{
"epoch": 6.62,
"learning_rate": 0.0001,
"loss": 0.1127,
"step": 3500
},
{
"epoch": 6.81,
"learning_rate": 0.0001,
"loss": 0.1136,
"step": 3600
},
{
"epoch": 6.99,
"learning_rate": 0.0001,
"loss": 0.1154,
"step": 3700
},
{
"epoch": 7.0,
"eval_accuracy": 0.8070666666666667,
"eval_loss": 0.31088685989379883,
"eval_runtime": 7.7231,
"eval_samples_per_second": 64.741,
"eval_steps_per_second": 8.157,
"step": 3703
},
{
"epoch": 7.18,
"learning_rate": 0.0001,
"loss": 0.1004,
"step": 3800
},
{
"epoch": 7.37,
"learning_rate": 0.0001,
"loss": 0.1024,
"step": 3900
},
{
"epoch": 7.56,
"learning_rate": 0.0001,
"loss": 0.104,
"step": 4000
},
{
"epoch": 7.75,
"learning_rate": 0.0001,
"loss": 0.1046,
"step": 4100
},
{
"epoch": 7.94,
"learning_rate": 0.0001,
"loss": 0.106,
"step": 4200
},
{
"epoch": 8.0,
"eval_accuracy": 0.8069019607843138,
"eval_loss": 0.3178957402706146,
"eval_runtime": 7.5236,
"eval_samples_per_second": 66.458,
"eval_steps_per_second": 8.374,
"step": 4232
},
{
"epoch": 8.13,
"learning_rate": 0.0001,
"loss": 0.0979,
"step": 4300
},
{
"epoch": 8.32,
"learning_rate": 0.0001,
"loss": 0.095,
"step": 4400
},
{
"epoch": 8.51,
"learning_rate": 0.0001,
"loss": 0.0974,
"step": 4500
},
{
"epoch": 8.7,
"learning_rate": 0.0001,
"loss": 0.0979,
"step": 4600
},
{
"epoch": 8.88,
"learning_rate": 0.0001,
"loss": 0.0997,
"step": 4700
},
{
"epoch": 9.0,
"eval_accuracy": 0.8070901960784314,
"eval_loss": 0.33391040563583374,
"eval_runtime": 7.6758,
"eval_samples_per_second": 65.14,
"eval_steps_per_second": 8.208,
"step": 4761
},
{
"epoch": 9.07,
"learning_rate": 0.0001,
"loss": 0.0968,
"step": 4800
},
{
"epoch": 9.26,
"learning_rate": 0.0001,
"loss": 0.0901,
"step": 4900
},
{
"epoch": 9.45,
"learning_rate": 0.0001,
"loss": 0.0917,
"step": 5000
},
{
"epoch": 9.64,
"learning_rate": 0.0001,
"loss": 0.0936,
"step": 5100
},
{
"epoch": 9.83,
"learning_rate": 0.0001,
"loss": 0.095,
"step": 5200
},
{
"epoch": 10.0,
"eval_accuracy": 0.8067294117647059,
"eval_loss": 0.3424333930015564,
"eval_runtime": 6.6593,
"eval_samples_per_second": 75.083,
"eval_steps_per_second": 9.46,
"step": 5290
},
{
"epoch": 10.02,
"learning_rate": 0.0001,
"loss": 0.0952,
"step": 5300
},
{
"epoch": 10.21,
"learning_rate": 0.0001,
"loss": 0.0867,
"step": 5400
},
{
"epoch": 10.4,
"learning_rate": 0.0001,
"loss": 0.0902,
"step": 5500
},
{
"epoch": 10.59,
"learning_rate": 0.0001,
"loss": 0.0915,
"step": 5600
},
{
"epoch": 10.78,
"learning_rate": 0.0001,
"loss": 0.0919,
"step": 5700
},
{
"epoch": 10.96,
"learning_rate": 0.0001,
"loss": 0.0922,
"step": 5800
},
{
"epoch": 11.0,
"eval_accuracy": 0.8065882352941176,
"eval_loss": 0.35157716274261475,
"eval_runtime": 7.0864,
"eval_samples_per_second": 70.558,
"eval_steps_per_second": 8.89,
"step": 5819
},
{
"epoch": 11.15,
"learning_rate": 0.0001,
"loss": 0.0855,
"step": 5900
},
{
"epoch": 11.34,
"learning_rate": 0.0001,
"loss": 0.0863,
"step": 6000
},
{
"epoch": 11.53,
"learning_rate": 0.0001,
"loss": 0.0865,
"step": 6100
},
{
"epoch": 11.72,
"learning_rate": 0.0001,
"loss": 0.0876,
"step": 6200
},
{
"epoch": 11.91,
"learning_rate": 0.0001,
"loss": 0.089,
"step": 6300
},
{
"epoch": 12.0,
"eval_accuracy": 0.806321568627451,
"eval_loss": 0.37196654081344604,
"eval_runtime": 7.1828,
"eval_samples_per_second": 69.611,
"eval_steps_per_second": 8.771,
"step": 6348
},
{
"epoch": 12.1,
"learning_rate": 0.0001,
"loss": 0.0857,
"step": 6400
},
{
"epoch": 12.29,
"learning_rate": 0.0001,
"loss": 0.0833,
"step": 6500
},
{
"epoch": 12.48,
"learning_rate": 0.0001,
"loss": 0.0851,
"step": 6600
},
{
"epoch": 12.67,
"learning_rate": 0.0001,
"loss": 0.0857,
"step": 6700
},
{
"epoch": 12.85,
"learning_rate": 0.0001,
"loss": 0.0862,
"step": 6800
},
{
"epoch": 13.0,
"eval_accuracy": 0.8065254901960784,
"eval_loss": 0.37400877475738525,
"eval_runtime": 7.7823,
"eval_samples_per_second": 64.248,
"eval_steps_per_second": 8.095,
"step": 6877
},
{
"epoch": 13.04,
"learning_rate": 0.0001,
"loss": 0.0864,
"step": 6900
},
{
"epoch": 13.23,
"learning_rate": 0.0001,
"loss": 0.0819,
"step": 7000
},
{
"epoch": 13.42,
"learning_rate": 0.0001,
"loss": 0.0835,
"step": 7100
},
{
"epoch": 13.61,
"learning_rate": 0.0001,
"loss": 0.0837,
"step": 7200
},
{
"epoch": 13.8,
"learning_rate": 0.0001,
"loss": 0.085,
"step": 7300
},
{
"epoch": 13.99,
"learning_rate": 0.0001,
"loss": 0.0862,
"step": 7400
},
{
"epoch": 14.0,
"eval_accuracy": 0.8070117647058823,
"eval_loss": 0.3680531084537506,
"eval_runtime": 7.8108,
"eval_samples_per_second": 64.014,
"eval_steps_per_second": 8.066,
"step": 7406
},
{
"epoch": 14.18,
"learning_rate": 0.0001,
"loss": 0.0796,
"step": 7500
},
{
"epoch": 14.37,
"learning_rate": 0.0001,
"loss": 0.0816,
"step": 7600
},
{
"epoch": 14.56,
"learning_rate": 0.0001,
"loss": 0.0824,
"step": 7700
},
{
"epoch": 14.74,
"learning_rate": 0.0001,
"loss": 0.0838,
"step": 7800
},
{
"epoch": 14.93,
"learning_rate": 0.0001,
"loss": 0.0852,
"step": 7900
},
{
"epoch": 15.0,
"eval_accuracy": 0.8066588235294118,
"eval_loss": 0.37707823514938354,
"eval_runtime": 7.5371,
"eval_samples_per_second": 66.339,
"eval_steps_per_second": 8.359,
"step": 7935
},
{
"epoch": 15.12,
"learning_rate": 0.0001,
"loss": 0.0817,
"step": 8000
},
{
"epoch": 15.31,
"learning_rate": 0.0001,
"loss": 0.0804,
"step": 8100
},
{
"epoch": 15.5,
"learning_rate": 0.0001,
"loss": 0.0823,
"step": 8200
},
{
"epoch": 15.69,
"learning_rate": 0.0001,
"loss": 0.0828,
"step": 8300
},
{
"epoch": 15.88,
"learning_rate": 0.0001,
"loss": 0.0849,
"step": 8400
},
{
"epoch": 16.0,
"eval_accuracy": 0.8066274509803921,
"eval_loss": 0.3813852071762085,
"eval_runtime": 7.4973,
"eval_samples_per_second": 66.691,
"eval_steps_per_second": 8.403,
"step": 8464
},
{
"epoch": 16.07,
"learning_rate": 0.0001,
"loss": 0.0813,
"step": 8500
},
{
"epoch": 16.26,
"learning_rate": 0.0001,
"loss": 0.0794,
"step": 8600
},
{
"epoch": 16.45,
"learning_rate": 0.0001,
"loss": 0.0811,
"step": 8700
},
{
"epoch": 16.64,
"learning_rate": 0.0001,
"loss": 0.0825,
"step": 8800
},
{
"epoch": 16.82,
"learning_rate": 0.0001,
"loss": 0.083,
"step": 8900
},
{
"epoch": 17.0,
"eval_accuracy": 0.806478431372549,
"eval_loss": 0.37993746995925903,
"eval_runtime": 6.6523,
"eval_samples_per_second": 75.162,
"eval_steps_per_second": 9.47,
"step": 8993
},
{
"epoch": 17.01,
"learning_rate": 0.0001,
"loss": 0.0842,
"step": 9000
},
{
"epoch": 17.2,
"learning_rate": 0.0001,
"loss": 0.0797,
"step": 9100
},
{
"epoch": 17.39,
"learning_rate": 0.0001,
"loss": 0.082,
"step": 9200
},
{
"epoch": 17.58,
"learning_rate": 0.0001,
"loss": 0.0821,
"step": 9300
},
{
"epoch": 17.77,
"learning_rate": 0.0001,
"loss": 0.0827,
"step": 9400
},
{
"epoch": 17.96,
"learning_rate": 0.0001,
"loss": 0.0838,
"step": 9500
},
{
"epoch": 18.0,
"eval_accuracy": 0.8068156862745098,
"eval_loss": 0.3886854946613312,
"eval_runtime": 7.5079,
"eval_samples_per_second": 66.596,
"eval_steps_per_second": 8.391,
"step": 9522
},
{
"epoch": 18.15,
"learning_rate": 0.0001,
"loss": 0.0795,
"step": 9600
},
{
"epoch": 18.34,
"learning_rate": 0.0001,
"loss": 0.0794,
"step": 9700
},
{
"epoch": 18.53,
"learning_rate": 0.0001,
"loss": 0.0828,
"step": 9800
},
{
"epoch": 18.71,
"learning_rate": 0.0001,
"loss": 0.0824,
"step": 9900
},
{
"epoch": 18.9,
"learning_rate": 0.0001,
"loss": 0.0834,
"step": 10000
},
{
"epoch": 19.0,
"eval_accuracy": 0.8067294117647059,
"eval_loss": 0.39091432094573975,
"eval_runtime": 7.0941,
"eval_samples_per_second": 70.481,
"eval_steps_per_second": 8.881,
"step": 10051
},
{
"epoch": 19.09,
"learning_rate": 0.0001,
"loss": 0.0809,
"step": 10100
},
{
"epoch": 19.28,
"learning_rate": 0.0001,
"loss": 0.0797,
"step": 10200
},
{
"epoch": 19.47,
"learning_rate": 0.0001,
"loss": 0.0797,
"step": 10300
},
{
"epoch": 19.66,
"learning_rate": 0.0001,
"loss": 0.0814,
"step": 10400
},
{
"epoch": 19.85,
"learning_rate": 0.0001,
"loss": 0.0818,
"step": 10500
},
{
"epoch": 20.0,
"eval_accuracy": 0.8067607843137254,
"eval_loss": 0.39837226271629333,
"eval_runtime": 7.7346,
"eval_samples_per_second": 64.645,
"eval_steps_per_second": 8.145,
"step": 10580
},
{
"epoch": 20.0,
"step": 10580,
"total_flos": 2.2427623579965194e+18,
"train_loss": 0.12152552902135146,
"train_runtime": 7955.4081,
"train_samples_per_second": 42.55,
"train_steps_per_second": 1.33
}
],
"logging_steps": 100,
"max_steps": 10580,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 2.2427623579965194e+18,
"trial_name": null,
"trial_params": null
}