20230826051154 / trainer_state.json
dkqjrm's picture
End of training
f22914a
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 80.0,
"global_step": 2000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_accuracy": 0.57,
"eval_loss": 0.4804803133010864,
"eval_runtime": 2.9697,
"eval_samples_per_second": 33.673,
"eval_steps_per_second": 4.377,
"step": 25
},
{
"best_epoch": 0,
"best_eval_accuracy": 0.57,
"epoch": 1.0,
"step": 25
},
{
"epoch": 2.0,
"eval_accuracy": 0.59,
"eval_loss": 0.31524839997291565,
"eval_runtime": 3.0246,
"eval_samples_per_second": 33.062,
"eval_steps_per_second": 4.298,
"step": 50
},
{
"best_epoch": 1,
"best_eval_accuracy": 0.59,
"epoch": 2.0,
"step": 50
},
{
"epoch": 3.0,
"eval_accuracy": 0.62,
"eval_loss": 0.30196473002433777,
"eval_runtime": 3.0719,
"eval_samples_per_second": 32.553,
"eval_steps_per_second": 4.232,
"step": 75
},
{
"best_epoch": 2,
"best_eval_accuracy": 0.62,
"epoch": 3.0,
"step": 75
},
{
"epoch": 4.0,
"eval_accuracy": 0.59,
"eval_loss": 0.2893486022949219,
"eval_runtime": 3.1068,
"eval_samples_per_second": 32.188,
"eval_steps_per_second": 4.184,
"step": 100
},
{
"best_epoch": 2,
"best_eval_accuracy": 0.62,
"epoch": 4.0,
"step": 100
},
{
"epoch": 5.0,
"eval_accuracy": 0.4,
"eval_loss": 0.2987529933452606,
"eval_runtime": 3.1313,
"eval_samples_per_second": 31.936,
"eval_steps_per_second": 4.152,
"step": 125
},
{
"best_epoch": 2,
"best_eval_accuracy": 0.62,
"epoch": 5.0,
"step": 125
},
{
"epoch": 6.0,
"eval_accuracy": 0.57,
"eval_loss": 0.2916392982006073,
"eval_runtime": 3.1469,
"eval_samples_per_second": 31.777,
"eval_steps_per_second": 4.131,
"step": 150
},
{
"best_epoch": 2,
"best_eval_accuracy": 0.62,
"epoch": 6.0,
"step": 150
},
{
"epoch": 7.0,
"eval_accuracy": 0.62,
"eval_loss": 0.29468733072280884,
"eval_runtime": 3.1542,
"eval_samples_per_second": 31.704,
"eval_steps_per_second": 4.121,
"step": 175
},
{
"best_epoch": 2,
"best_eval_accuracy": 0.62,
"epoch": 7.0,
"step": 175
},
{
"epoch": 8.0,
"eval_accuracy": 0.61,
"eval_loss": 0.2887653708457947,
"eval_runtime": 3.1629,
"eval_samples_per_second": 31.617,
"eval_steps_per_second": 4.11,
"step": 200
},
{
"best_epoch": 2,
"best_eval_accuracy": 0.62,
"epoch": 8.0,
"step": 200
},
{
"epoch": 9.0,
"eval_accuracy": 0.53,
"eval_loss": 0.2915412187576294,
"eval_runtime": 3.1715,
"eval_samples_per_second": 31.531,
"eval_steps_per_second": 4.099,
"step": 225
},
{
"best_epoch": 2,
"best_eval_accuracy": 0.62,
"epoch": 9.0,
"step": 225
},
{
"epoch": 10.0,
"eval_accuracy": 0.63,
"eval_loss": 0.29383793473243713,
"eval_runtime": 3.1658,
"eval_samples_per_second": 31.587,
"eval_steps_per_second": 4.106,
"step": 250
},
{
"best_epoch": 9,
"best_eval_accuracy": 0.63,
"epoch": 10.0,
"step": 250
},
{
"epoch": 11.0,
"eval_accuracy": 0.36,
"eval_loss": 0.29850146174430847,
"eval_runtime": 3.1673,
"eval_samples_per_second": 31.572,
"eval_steps_per_second": 4.104,
"step": 275
},
{
"best_epoch": 9,
"best_eval_accuracy": 0.63,
"epoch": 11.0,
"step": 275
},
{
"epoch": 12.0,
"eval_accuracy": 0.65,
"eval_loss": 0.28536227345466614,
"eval_runtime": 3.1687,
"eval_samples_per_second": 31.558,
"eval_steps_per_second": 4.103,
"step": 300
},
{
"best_epoch": 11,
"best_eval_accuracy": 0.65,
"epoch": 12.0,
"step": 300
},
{
"epoch": 13.0,
"eval_accuracy": 0.49,
"eval_loss": 0.2870297431945801,
"eval_runtime": 3.1692,
"eval_samples_per_second": 31.554,
"eval_steps_per_second": 4.102,
"step": 325
},
{
"best_epoch": 11,
"best_eval_accuracy": 0.65,
"epoch": 13.0,
"step": 325
},
{
"epoch": 14.0,
"eval_accuracy": 0.64,
"eval_loss": 0.28021568059921265,
"eval_runtime": 3.1713,
"eval_samples_per_second": 31.533,
"eval_steps_per_second": 4.099,
"step": 350
},
{
"best_epoch": 11,
"best_eval_accuracy": 0.65,
"epoch": 14.0,
"step": 350
},
{
"epoch": 15.0,
"eval_accuracy": 0.61,
"eval_loss": 0.2801496684551239,
"eval_runtime": 3.1719,
"eval_samples_per_second": 31.527,
"eval_steps_per_second": 4.098,
"step": 375
},
{
"best_epoch": 11,
"best_eval_accuracy": 0.65,
"epoch": 15.0,
"step": 375
},
{
"epoch": 16.0,
"eval_accuracy": 0.63,
"eval_loss": 0.2805960774421692,
"eval_runtime": 3.1705,
"eval_samples_per_second": 31.541,
"eval_steps_per_second": 4.1,
"step": 400
},
{
"best_epoch": 11,
"best_eval_accuracy": 0.65,
"epoch": 16.0,
"step": 400
},
{
"epoch": 17.0,
"eval_accuracy": 0.6,
"eval_loss": 0.28096577525138855,
"eval_runtime": 3.1719,
"eval_samples_per_second": 31.527,
"eval_steps_per_second": 4.098,
"step": 425
},
{
"best_epoch": 11,
"best_eval_accuracy": 0.65,
"epoch": 17.0,
"step": 425
},
{
"epoch": 18.0,
"eval_accuracy": 0.66,
"eval_loss": 0.2888098657131195,
"eval_runtime": 3.1688,
"eval_samples_per_second": 31.558,
"eval_steps_per_second": 4.102,
"step": 450
},
{
"best_epoch": 17,
"best_eval_accuracy": 0.66,
"epoch": 18.0,
"step": 450
},
{
"epoch": 19.0,
"eval_accuracy": 0.63,
"eval_loss": 0.278043657541275,
"eval_runtime": 3.1698,
"eval_samples_per_second": 31.548,
"eval_steps_per_second": 4.101,
"step": 475
},
{
"best_epoch": 17,
"best_eval_accuracy": 0.66,
"epoch": 19.0,
"step": 475
},
{
"epoch": 20.0,
"learning_rate": 0.015,
"loss": 0.6923,
"step": 500
},
{
"epoch": 20.0,
"eval_accuracy": 0.6,
"eval_loss": 0.28032350540161133,
"eval_runtime": 3.1696,
"eval_samples_per_second": 31.549,
"eval_steps_per_second": 4.101,
"step": 500
},
{
"best_epoch": 17,
"best_eval_accuracy": 0.66,
"epoch": 20.0,
"step": 500
},
{
"epoch": 21.0,
"eval_accuracy": 0.65,
"eval_loss": 0.2768295407295227,
"eval_runtime": 3.1686,
"eval_samples_per_second": 31.559,
"eval_steps_per_second": 4.103,
"step": 525
},
{
"best_epoch": 17,
"best_eval_accuracy": 0.66,
"epoch": 21.0,
"step": 525
},
{
"epoch": 22.0,
"eval_accuracy": 0.65,
"eval_loss": 0.2743799388408661,
"eval_runtime": 3.1675,
"eval_samples_per_second": 31.57,
"eval_steps_per_second": 4.104,
"step": 550
},
{
"best_epoch": 17,
"best_eval_accuracy": 0.66,
"epoch": 22.0,
"step": 550
},
{
"epoch": 23.0,
"eval_accuracy": 0.66,
"eval_loss": 0.2831212282180786,
"eval_runtime": 3.1751,
"eval_samples_per_second": 31.495,
"eval_steps_per_second": 4.094,
"step": 575
},
{
"best_epoch": 17,
"best_eval_accuracy": 0.66,
"epoch": 23.0,
"step": 575
},
{
"epoch": 24.0,
"eval_accuracy": 0.67,
"eval_loss": 0.27428343892097473,
"eval_runtime": 3.174,
"eval_samples_per_second": 31.506,
"eval_steps_per_second": 4.096,
"step": 600
},
{
"best_epoch": 23,
"best_eval_accuracy": 0.67,
"epoch": 24.0,
"step": 600
},
{
"epoch": 25.0,
"eval_accuracy": 0.69,
"eval_loss": 0.2847433388233185,
"eval_runtime": 3.1729,
"eval_samples_per_second": 31.517,
"eval_steps_per_second": 4.097,
"step": 625
},
{
"best_epoch": 24,
"best_eval_accuracy": 0.69,
"epoch": 25.0,
"step": 625
},
{
"epoch": 26.0,
"eval_accuracy": 0.71,
"eval_loss": 0.273659884929657,
"eval_runtime": 3.1699,
"eval_samples_per_second": 31.547,
"eval_steps_per_second": 4.101,
"step": 650
},
{
"best_epoch": 25,
"best_eval_accuracy": 0.71,
"epoch": 26.0,
"step": 650
},
{
"epoch": 27.0,
"eval_accuracy": 0.65,
"eval_loss": 0.28170689940452576,
"eval_runtime": 3.1699,
"eval_samples_per_second": 31.547,
"eval_steps_per_second": 4.101,
"step": 675
},
{
"best_epoch": 25,
"best_eval_accuracy": 0.71,
"epoch": 27.0,
"step": 675
},
{
"epoch": 28.0,
"eval_accuracy": 0.68,
"eval_loss": 0.2770252525806427,
"eval_runtime": 3.1658,
"eval_samples_per_second": 31.587,
"eval_steps_per_second": 4.106,
"step": 700
},
{
"best_epoch": 25,
"best_eval_accuracy": 0.71,
"epoch": 28.0,
"step": 700
},
{
"epoch": 29.0,
"eval_accuracy": 0.67,
"eval_loss": 0.28874772787094116,
"eval_runtime": 3.1645,
"eval_samples_per_second": 31.6,
"eval_steps_per_second": 4.108,
"step": 725
},
{
"best_epoch": 25,
"best_eval_accuracy": 0.71,
"epoch": 29.0,
"step": 725
},
{
"epoch": 30.0,
"eval_accuracy": 0.64,
"eval_loss": 0.27803295850753784,
"eval_runtime": 3.1654,
"eval_samples_per_second": 31.592,
"eval_steps_per_second": 4.107,
"step": 750
},
{
"best_epoch": 25,
"best_eval_accuracy": 0.71,
"epoch": 30.0,
"step": 750
},
{
"epoch": 31.0,
"eval_accuracy": 0.66,
"eval_loss": 0.270718514919281,
"eval_runtime": 3.1669,
"eval_samples_per_second": 31.577,
"eval_steps_per_second": 4.105,
"step": 775
},
{
"best_epoch": 25,
"best_eval_accuracy": 0.71,
"epoch": 31.0,
"step": 775
},
{
"epoch": 32.0,
"eval_accuracy": 0.7,
"eval_loss": 0.2889265716075897,
"eval_runtime": 3.1651,
"eval_samples_per_second": 31.595,
"eval_steps_per_second": 4.107,
"step": 800
},
{
"best_epoch": 25,
"best_eval_accuracy": 0.71,
"epoch": 32.0,
"step": 800
},
{
"epoch": 33.0,
"eval_accuracy": 0.68,
"eval_loss": 0.2821086645126343,
"eval_runtime": 3.1646,
"eval_samples_per_second": 31.6,
"eval_steps_per_second": 4.108,
"step": 825
},
{
"best_epoch": 25,
"best_eval_accuracy": 0.71,
"epoch": 33.0,
"step": 825
},
{
"epoch": 34.0,
"eval_accuracy": 0.7,
"eval_loss": 0.2735106647014618,
"eval_runtime": 3.1678,
"eval_samples_per_second": 31.568,
"eval_steps_per_second": 4.104,
"step": 850
},
{
"best_epoch": 25,
"best_eval_accuracy": 0.71,
"epoch": 34.0,
"step": 850
},
{
"epoch": 35.0,
"eval_accuracy": 0.66,
"eval_loss": 0.27724650502204895,
"eval_runtime": 3.1727,
"eval_samples_per_second": 31.519,
"eval_steps_per_second": 4.097,
"step": 875
},
{
"best_epoch": 25,
"best_eval_accuracy": 0.71,
"epoch": 35.0,
"step": 875
},
{
"epoch": 36.0,
"eval_accuracy": 0.67,
"eval_loss": 0.27664193511009216,
"eval_runtime": 3.1673,
"eval_samples_per_second": 31.573,
"eval_steps_per_second": 4.104,
"step": 900
},
{
"best_epoch": 25,
"best_eval_accuracy": 0.71,
"epoch": 36.0,
"step": 900
},
{
"epoch": 37.0,
"eval_accuracy": 0.68,
"eval_loss": 0.28623029589653015,
"eval_runtime": 3.1643,
"eval_samples_per_second": 31.602,
"eval_steps_per_second": 4.108,
"step": 925
},
{
"best_epoch": 25,
"best_eval_accuracy": 0.71,
"epoch": 37.0,
"step": 925
},
{
"epoch": 38.0,
"eval_accuracy": 0.65,
"eval_loss": 0.27454671263694763,
"eval_runtime": 3.1647,
"eval_samples_per_second": 31.599,
"eval_steps_per_second": 4.108,
"step": 950
},
{
"best_epoch": 25,
"best_eval_accuracy": 0.71,
"epoch": 38.0,
"step": 950
},
{
"epoch": 39.0,
"eval_accuracy": 0.66,
"eval_loss": 0.28275245428085327,
"eval_runtime": 3.1658,
"eval_samples_per_second": 31.587,
"eval_steps_per_second": 4.106,
"step": 975
},
{
"best_epoch": 25,
"best_eval_accuracy": 0.71,
"epoch": 39.0,
"step": 975
},
{
"epoch": 40.0,
"learning_rate": 0.01,
"loss": 0.5864,
"step": 1000
},
{
"epoch": 40.0,
"eval_accuracy": 0.68,
"eval_loss": 0.3264109790325165,
"eval_runtime": 3.1694,
"eval_samples_per_second": 31.552,
"eval_steps_per_second": 4.102,
"step": 1000
},
{
"best_epoch": 25,
"best_eval_accuracy": 0.71,
"epoch": 40.0,
"step": 1000
},
{
"epoch": 41.0,
"eval_accuracy": 0.68,
"eval_loss": 0.2749994993209839,
"eval_runtime": 3.1811,
"eval_samples_per_second": 31.436,
"eval_steps_per_second": 4.087,
"step": 1025
},
{
"best_epoch": 25,
"best_eval_accuracy": 0.71,
"epoch": 41.0,
"step": 1025
},
{
"epoch": 42.0,
"eval_accuracy": 0.67,
"eval_loss": 0.28311243653297424,
"eval_runtime": 3.1697,
"eval_samples_per_second": 31.549,
"eval_steps_per_second": 4.101,
"step": 1050
},
{
"best_epoch": 25,
"best_eval_accuracy": 0.71,
"epoch": 42.0,
"step": 1050
},
{
"epoch": 43.0,
"eval_accuracy": 0.67,
"eval_loss": 0.2724871039390564,
"eval_runtime": 3.1778,
"eval_samples_per_second": 31.468,
"eval_steps_per_second": 4.091,
"step": 1075
},
{
"best_epoch": 25,
"best_eval_accuracy": 0.71,
"epoch": 43.0,
"step": 1075
},
{
"epoch": 44.0,
"eval_accuracy": 0.68,
"eval_loss": 0.2908870577812195,
"eval_runtime": 3.1727,
"eval_samples_per_second": 31.519,
"eval_steps_per_second": 4.098,
"step": 1100
},
{
"best_epoch": 25,
"best_eval_accuracy": 0.71,
"epoch": 44.0,
"step": 1100
},
{
"epoch": 45.0,
"eval_accuracy": 0.69,
"eval_loss": 0.2840518653392792,
"eval_runtime": 3.1714,
"eval_samples_per_second": 31.532,
"eval_steps_per_second": 4.099,
"step": 1125
},
{
"best_epoch": 25,
"best_eval_accuracy": 0.71,
"epoch": 45.0,
"step": 1125
},
{
"epoch": 46.0,
"eval_accuracy": 0.69,
"eval_loss": 0.31259122490882874,
"eval_runtime": 3.1745,
"eval_samples_per_second": 31.501,
"eval_steps_per_second": 4.095,
"step": 1150
},
{
"best_epoch": 25,
"best_eval_accuracy": 0.71,
"epoch": 46.0,
"step": 1150
},
{
"epoch": 47.0,
"eval_accuracy": 0.72,
"eval_loss": 0.28924334049224854,
"eval_runtime": 3.1749,
"eval_samples_per_second": 31.497,
"eval_steps_per_second": 4.095,
"step": 1175
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.72,
"epoch": 47.0,
"step": 1175
},
{
"epoch": 48.0,
"eval_accuracy": 0.7,
"eval_loss": 0.2886520326137543,
"eval_runtime": 3.177,
"eval_samples_per_second": 31.476,
"eval_steps_per_second": 4.092,
"step": 1200
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.72,
"epoch": 48.0,
"step": 1200
},
{
"epoch": 49.0,
"eval_accuracy": 0.7,
"eval_loss": 0.2833870053291321,
"eval_runtime": 3.176,
"eval_samples_per_second": 31.486,
"eval_steps_per_second": 4.093,
"step": 1225
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.72,
"epoch": 49.0,
"step": 1225
},
{
"epoch": 50.0,
"eval_accuracy": 0.66,
"eval_loss": 0.2731325626373291,
"eval_runtime": 3.1813,
"eval_samples_per_second": 31.433,
"eval_steps_per_second": 4.086,
"step": 1250
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.72,
"epoch": 50.0,
"step": 1250
},
{
"epoch": 51.0,
"eval_accuracy": 0.68,
"eval_loss": 0.28878146409988403,
"eval_runtime": 3.1811,
"eval_samples_per_second": 31.436,
"eval_steps_per_second": 4.087,
"step": 1275
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.72,
"epoch": 51.0,
"step": 1275
},
{
"epoch": 52.0,
"eval_accuracy": 0.67,
"eval_loss": 0.3080463707447052,
"eval_runtime": 3.1823,
"eval_samples_per_second": 31.423,
"eval_steps_per_second": 4.085,
"step": 1300
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.72,
"epoch": 52.0,
"step": 1300
},
{
"epoch": 53.0,
"eval_accuracy": 0.67,
"eval_loss": 0.28622153401374817,
"eval_runtime": 3.1939,
"eval_samples_per_second": 31.309,
"eval_steps_per_second": 4.07,
"step": 1325
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.72,
"epoch": 53.0,
"step": 1325
},
{
"epoch": 54.0,
"eval_accuracy": 0.67,
"eval_loss": 0.27717000246047974,
"eval_runtime": 3.1836,
"eval_samples_per_second": 31.411,
"eval_steps_per_second": 4.083,
"step": 1350
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.72,
"epoch": 54.0,
"step": 1350
},
{
"epoch": 55.0,
"eval_accuracy": 0.67,
"eval_loss": 0.2790798544883728,
"eval_runtime": 3.186,
"eval_samples_per_second": 31.387,
"eval_steps_per_second": 4.08,
"step": 1375
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.72,
"epoch": 55.0,
"step": 1375
},
{
"epoch": 56.0,
"eval_accuracy": 0.68,
"eval_loss": 0.29302406311035156,
"eval_runtime": 3.1924,
"eval_samples_per_second": 31.325,
"eval_steps_per_second": 4.072,
"step": 1400
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.72,
"epoch": 56.0,
"step": 1400
},
{
"epoch": 57.0,
"eval_accuracy": 0.66,
"eval_loss": 0.27831196784973145,
"eval_runtime": 3.1927,
"eval_samples_per_second": 31.321,
"eval_steps_per_second": 4.072,
"step": 1425
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.72,
"epoch": 57.0,
"step": 1425
},
{
"epoch": 58.0,
"eval_accuracy": 0.67,
"eval_loss": 0.28552454710006714,
"eval_runtime": 3.1926,
"eval_samples_per_second": 31.322,
"eval_steps_per_second": 4.072,
"step": 1450
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.72,
"epoch": 58.0,
"step": 1450
},
{
"epoch": 59.0,
"eval_accuracy": 0.69,
"eval_loss": 0.28503143787384033,
"eval_runtime": 3.19,
"eval_samples_per_second": 31.348,
"eval_steps_per_second": 4.075,
"step": 1475
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.72,
"epoch": 59.0,
"step": 1475
},
{
"epoch": 60.0,
"learning_rate": 0.005,
"loss": 0.4926,
"step": 1500
},
{
"epoch": 60.0,
"eval_accuracy": 0.69,
"eval_loss": 0.28993481397628784,
"eval_runtime": 3.1899,
"eval_samples_per_second": 31.348,
"eval_steps_per_second": 4.075,
"step": 1500
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.72,
"epoch": 60.0,
"step": 1500
},
{
"epoch": 61.0,
"eval_accuracy": 0.67,
"eval_loss": 0.27968910336494446,
"eval_runtime": 3.1909,
"eval_samples_per_second": 31.339,
"eval_steps_per_second": 4.074,
"step": 1525
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.72,
"epoch": 61.0,
"step": 1525
},
{
"epoch": 62.0,
"eval_accuracy": 0.69,
"eval_loss": 0.33221057057380676,
"eval_runtime": 3.1856,
"eval_samples_per_second": 31.391,
"eval_steps_per_second": 4.081,
"step": 1550
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.72,
"epoch": 62.0,
"step": 1550
},
{
"epoch": 63.0,
"eval_accuracy": 0.69,
"eval_loss": 0.2761762738227844,
"eval_runtime": 3.1864,
"eval_samples_per_second": 31.384,
"eval_steps_per_second": 4.08,
"step": 1575
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.72,
"epoch": 63.0,
"step": 1575
},
{
"epoch": 64.0,
"eval_accuracy": 0.7,
"eval_loss": 0.2816024720668793,
"eval_runtime": 3.1868,
"eval_samples_per_second": 31.38,
"eval_steps_per_second": 4.079,
"step": 1600
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.72,
"epoch": 64.0,
"step": 1600
},
{
"epoch": 65.0,
"eval_accuracy": 0.68,
"eval_loss": 0.2951596677303314,
"eval_runtime": 3.1855,
"eval_samples_per_second": 31.393,
"eval_steps_per_second": 4.081,
"step": 1625
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.72,
"epoch": 65.0,
"step": 1625
},
{
"epoch": 66.0,
"eval_accuracy": 0.68,
"eval_loss": 0.2794454097747803,
"eval_runtime": 3.1836,
"eval_samples_per_second": 31.411,
"eval_steps_per_second": 4.083,
"step": 1650
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.72,
"epoch": 66.0,
"step": 1650
},
{
"epoch": 67.0,
"eval_accuracy": 0.69,
"eval_loss": 0.2873387932777405,
"eval_runtime": 3.1872,
"eval_samples_per_second": 31.375,
"eval_steps_per_second": 4.079,
"step": 1675
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.72,
"epoch": 67.0,
"step": 1675
},
{
"epoch": 68.0,
"eval_accuracy": 0.69,
"eval_loss": 0.2834796607494354,
"eval_runtime": 3.1878,
"eval_samples_per_second": 31.37,
"eval_steps_per_second": 4.078,
"step": 1700
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.72,
"epoch": 68.0,
"step": 1700
},
{
"epoch": 69.0,
"eval_accuracy": 0.68,
"eval_loss": 0.2908068597316742,
"eval_runtime": 3.2004,
"eval_samples_per_second": 31.246,
"eval_steps_per_second": 4.062,
"step": 1725
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.72,
"epoch": 69.0,
"step": 1725
},
{
"epoch": 70.0,
"eval_accuracy": 0.68,
"eval_loss": 0.300818532705307,
"eval_runtime": 3.1872,
"eval_samples_per_second": 31.376,
"eval_steps_per_second": 4.079,
"step": 1750
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.72,
"epoch": 70.0,
"step": 1750
},
{
"epoch": 71.0,
"eval_accuracy": 0.68,
"eval_loss": 0.2892753481864929,
"eval_runtime": 3.1915,
"eval_samples_per_second": 31.333,
"eval_steps_per_second": 4.073,
"step": 1775
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.72,
"epoch": 71.0,
"step": 1775
},
{
"epoch": 72.0,
"eval_accuracy": 0.68,
"eval_loss": 0.28255218267440796,
"eval_runtime": 3.1936,
"eval_samples_per_second": 31.313,
"eval_steps_per_second": 4.071,
"step": 1800
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.72,
"epoch": 72.0,
"step": 1800
},
{
"epoch": 73.0,
"eval_accuracy": 0.68,
"eval_loss": 0.29189300537109375,
"eval_runtime": 3.189,
"eval_samples_per_second": 31.358,
"eval_steps_per_second": 4.077,
"step": 1825
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.72,
"epoch": 73.0,
"step": 1825
},
{
"epoch": 74.0,
"eval_accuracy": 0.7,
"eval_loss": 0.2832392454147339,
"eval_runtime": 3.1867,
"eval_samples_per_second": 31.381,
"eval_steps_per_second": 4.079,
"step": 1850
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.72,
"epoch": 74.0,
"step": 1850
},
{
"epoch": 75.0,
"eval_accuracy": 0.7,
"eval_loss": 0.2830210328102112,
"eval_runtime": 3.1895,
"eval_samples_per_second": 31.353,
"eval_steps_per_second": 4.076,
"step": 1875
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.72,
"epoch": 75.0,
"step": 1875
},
{
"epoch": 76.0,
"eval_accuracy": 0.69,
"eval_loss": 0.2808878719806671,
"eval_runtime": 3.1857,
"eval_samples_per_second": 31.39,
"eval_steps_per_second": 4.081,
"step": 1900
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.72,
"epoch": 76.0,
"step": 1900
},
{
"epoch": 77.0,
"eval_accuracy": 0.69,
"eval_loss": 0.2822246849536896,
"eval_runtime": 3.1912,
"eval_samples_per_second": 31.336,
"eval_steps_per_second": 4.074,
"step": 1925
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.72,
"epoch": 77.0,
"step": 1925
},
{
"epoch": 78.0,
"eval_accuracy": 0.69,
"eval_loss": 0.2884049117565155,
"eval_runtime": 3.1866,
"eval_samples_per_second": 31.382,
"eval_steps_per_second": 4.08,
"step": 1950
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.72,
"epoch": 78.0,
"step": 1950
},
{
"epoch": 79.0,
"eval_accuracy": 0.7,
"eval_loss": 0.290973961353302,
"eval_runtime": 3.191,
"eval_samples_per_second": 31.339,
"eval_steps_per_second": 4.074,
"step": 1975
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.72,
"epoch": 79.0,
"step": 1975
},
{
"epoch": 80.0,
"learning_rate": 0.0,
"loss": 0.4369,
"step": 2000
},
{
"epoch": 80.0,
"eval_accuracy": 0.7,
"eval_loss": 0.2896697521209717,
"eval_runtime": 3.1895,
"eval_samples_per_second": 31.353,
"eval_steps_per_second": 4.076,
"step": 2000
},
{
"best_epoch": 46,
"best_eval_accuracy": 0.72,
"epoch": 80.0,
"step": 2000
},
{
"epoch": 80.0,
"step": 2000,
"total_flos": 2.9821702864896e+16,
"train_loss": 0.5520365753173828,
"train_runtime": 1695.1694,
"train_samples_per_second": 18.877,
"train_steps_per_second": 1.18
}
],
"max_steps": 2000,
"num_train_epochs": 80,
"total_flos": 2.9821702864896e+16,
"trial_name": null,
"trial_params": null
}