BilalMuftuoglu's picture
End of training
0dc9497 verified
raw
history blame contribute delete
No virus
28.6 kB
{
"best_metric": 0.9069767441860465,
"best_model_checkpoint": "deit-base-distilled-patch16-224-75-fold1/checkpoint-88",
"epoch": 100.0,
"eval_steps": 500,
"global_step": 200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_accuracy": 0.6511627906976745,
"eval_loss": 0.6476810574531555,
"eval_runtime": 0.6318,
"eval_samples_per_second": 68.06,
"eval_steps_per_second": 3.166,
"step": 2
},
{
"epoch": 2.0,
"eval_accuracy": 0.6976744186046512,
"eval_loss": 0.6527612805366516,
"eval_runtime": 0.6173,
"eval_samples_per_second": 69.663,
"eval_steps_per_second": 3.24,
"step": 4
},
{
"epoch": 3.0,
"eval_accuracy": 0.6976744186046512,
"eval_loss": 0.8095551133155823,
"eval_runtime": 0.6043,
"eval_samples_per_second": 71.154,
"eval_steps_per_second": 3.309,
"step": 6
},
{
"epoch": 4.0,
"eval_accuracy": 0.6976744186046512,
"eval_loss": 0.7679464817047119,
"eval_runtime": 0.6207,
"eval_samples_per_second": 69.279,
"eval_steps_per_second": 3.222,
"step": 8
},
{
"epoch": 5.0,
"grad_norm": 2.9488413333892822,
"learning_rate": 2.5e-05,
"loss": 0.5994,
"step": 10
},
{
"epoch": 5.0,
"eval_accuracy": 0.6976744186046512,
"eval_loss": 0.5481935739517212,
"eval_runtime": 0.6133,
"eval_samples_per_second": 70.117,
"eval_steps_per_second": 3.261,
"step": 10
},
{
"epoch": 6.0,
"eval_accuracy": 0.7441860465116279,
"eval_loss": 0.4983576536178589,
"eval_runtime": 0.6175,
"eval_samples_per_second": 69.631,
"eval_steps_per_second": 3.239,
"step": 12
},
{
"epoch": 7.0,
"eval_accuracy": 0.6976744186046512,
"eval_loss": 0.6156066656112671,
"eval_runtime": 0.6217,
"eval_samples_per_second": 69.167,
"eval_steps_per_second": 3.217,
"step": 14
},
{
"epoch": 8.0,
"eval_accuracy": 0.7674418604651163,
"eval_loss": 0.530674397945404,
"eval_runtime": 0.6286,
"eval_samples_per_second": 68.405,
"eval_steps_per_second": 3.182,
"step": 16
},
{
"epoch": 9.0,
"eval_accuracy": 0.7674418604651163,
"eval_loss": 0.40364784002304077,
"eval_runtime": 0.6233,
"eval_samples_per_second": 68.986,
"eval_steps_per_second": 3.209,
"step": 18
},
{
"epoch": 10.0,
"grad_norm": 4.075669288635254,
"learning_rate": 5e-05,
"loss": 0.3806,
"step": 20
},
{
"epoch": 10.0,
"eval_accuracy": 0.7906976744186046,
"eval_loss": 0.42405834794044495,
"eval_runtime": 0.6323,
"eval_samples_per_second": 68.001,
"eval_steps_per_second": 3.163,
"step": 20
},
{
"epoch": 11.0,
"eval_accuracy": 0.813953488372093,
"eval_loss": 0.4263027012348175,
"eval_runtime": 0.6181,
"eval_samples_per_second": 69.564,
"eval_steps_per_second": 3.236,
"step": 22
},
{
"epoch": 12.0,
"eval_accuracy": 0.7441860465116279,
"eval_loss": 0.6778327226638794,
"eval_runtime": 0.642,
"eval_samples_per_second": 66.973,
"eval_steps_per_second": 3.115,
"step": 24
},
{
"epoch": 13.0,
"eval_accuracy": 0.7674418604651163,
"eval_loss": 0.5884802937507629,
"eval_runtime": 0.6192,
"eval_samples_per_second": 69.444,
"eval_steps_per_second": 3.23,
"step": 26
},
{
"epoch": 14.0,
"eval_accuracy": 0.7906976744186046,
"eval_loss": 0.6048120260238647,
"eval_runtime": 0.6378,
"eval_samples_per_second": 67.423,
"eval_steps_per_second": 3.136,
"step": 28
},
{
"epoch": 15.0,
"grad_norm": 3.8426947593688965,
"learning_rate": 4.722222222222222e-05,
"loss": 0.273,
"step": 30
},
{
"epoch": 15.0,
"eval_accuracy": 0.813953488372093,
"eval_loss": 0.5110020041465759,
"eval_runtime": 0.6259,
"eval_samples_per_second": 68.697,
"eval_steps_per_second": 3.195,
"step": 30
},
{
"epoch": 16.0,
"eval_accuracy": 0.7441860465116279,
"eval_loss": 0.3793295919895172,
"eval_runtime": 0.6397,
"eval_samples_per_second": 67.215,
"eval_steps_per_second": 3.126,
"step": 32
},
{
"epoch": 17.0,
"eval_accuracy": 0.7906976744186046,
"eval_loss": 0.3635205626487732,
"eval_runtime": 0.63,
"eval_samples_per_second": 68.259,
"eval_steps_per_second": 3.175,
"step": 34
},
{
"epoch": 18.0,
"eval_accuracy": 0.813953488372093,
"eval_loss": 0.3863283097743988,
"eval_runtime": 0.6683,
"eval_samples_per_second": 64.347,
"eval_steps_per_second": 2.993,
"step": 36
},
{
"epoch": 19.0,
"eval_accuracy": 0.8372093023255814,
"eval_loss": 0.37879544496536255,
"eval_runtime": 0.621,
"eval_samples_per_second": 69.245,
"eval_steps_per_second": 3.221,
"step": 38
},
{
"epoch": 20.0,
"grad_norm": 4.38327693939209,
"learning_rate": 4.4444444444444447e-05,
"loss": 0.2388,
"step": 40
},
{
"epoch": 20.0,
"eval_accuracy": 0.813953488372093,
"eval_loss": 0.33901217579841614,
"eval_runtime": 0.6226,
"eval_samples_per_second": 69.07,
"eval_steps_per_second": 3.213,
"step": 40
},
{
"epoch": 21.0,
"eval_accuracy": 0.7906976744186046,
"eval_loss": 0.45933058857917786,
"eval_runtime": 0.6161,
"eval_samples_per_second": 69.798,
"eval_steps_per_second": 3.246,
"step": 42
},
{
"epoch": 22.0,
"eval_accuracy": 0.8604651162790697,
"eval_loss": 0.344134122133255,
"eval_runtime": 0.6226,
"eval_samples_per_second": 69.067,
"eval_steps_per_second": 3.212,
"step": 44
},
{
"epoch": 23.0,
"eval_accuracy": 0.7906976744186046,
"eval_loss": 0.5482814908027649,
"eval_runtime": 0.6286,
"eval_samples_per_second": 68.407,
"eval_steps_per_second": 3.182,
"step": 46
},
{
"epoch": 24.0,
"eval_accuracy": 0.7906976744186046,
"eval_loss": 0.6398680210113525,
"eval_runtime": 0.6347,
"eval_samples_per_second": 67.748,
"eval_steps_per_second": 3.151,
"step": 48
},
{
"epoch": 25.0,
"grad_norm": 1.9829111099243164,
"learning_rate": 4.166666666666667e-05,
"loss": 0.189,
"step": 50
},
{
"epoch": 25.0,
"eval_accuracy": 0.8604651162790697,
"eval_loss": 0.3333226442337036,
"eval_runtime": 0.6246,
"eval_samples_per_second": 68.841,
"eval_steps_per_second": 3.202,
"step": 50
},
{
"epoch": 26.0,
"eval_accuracy": 0.8604651162790697,
"eval_loss": 0.3325919210910797,
"eval_runtime": 0.6227,
"eval_samples_per_second": 69.059,
"eval_steps_per_second": 3.212,
"step": 52
},
{
"epoch": 27.0,
"eval_accuracy": 0.7906976744186046,
"eval_loss": 0.41504570841789246,
"eval_runtime": 0.6243,
"eval_samples_per_second": 68.877,
"eval_steps_per_second": 3.204,
"step": 54
},
{
"epoch": 28.0,
"eval_accuracy": 0.8837209302325582,
"eval_loss": 0.34200432896614075,
"eval_runtime": 0.6237,
"eval_samples_per_second": 68.94,
"eval_steps_per_second": 3.207,
"step": 56
},
{
"epoch": 29.0,
"eval_accuracy": 0.8372093023255814,
"eval_loss": 0.36485448479652405,
"eval_runtime": 0.6179,
"eval_samples_per_second": 69.595,
"eval_steps_per_second": 3.237,
"step": 58
},
{
"epoch": 30.0,
"grad_norm": 2.870300531387329,
"learning_rate": 3.888888888888889e-05,
"loss": 0.1718,
"step": 60
},
{
"epoch": 30.0,
"eval_accuracy": 0.8604651162790697,
"eval_loss": 0.36506161093711853,
"eval_runtime": 0.6332,
"eval_samples_per_second": 67.905,
"eval_steps_per_second": 3.158,
"step": 60
},
{
"epoch": 31.0,
"eval_accuracy": 0.813953488372093,
"eval_loss": 0.46762949228286743,
"eval_runtime": 0.6159,
"eval_samples_per_second": 69.813,
"eval_steps_per_second": 3.247,
"step": 62
},
{
"epoch": 32.0,
"eval_accuracy": 0.8604651162790697,
"eval_loss": 0.3543034791946411,
"eval_runtime": 0.6304,
"eval_samples_per_second": 68.211,
"eval_steps_per_second": 3.173,
"step": 64
},
{
"epoch": 33.0,
"eval_accuracy": 0.813953488372093,
"eval_loss": 0.3209027349948883,
"eval_runtime": 0.6291,
"eval_samples_per_second": 68.348,
"eval_steps_per_second": 3.179,
"step": 66
},
{
"epoch": 34.0,
"eval_accuracy": 0.8604651162790697,
"eval_loss": 0.34199726581573486,
"eval_runtime": 0.6272,
"eval_samples_per_second": 68.564,
"eval_steps_per_second": 3.189,
"step": 68
},
{
"epoch": 35.0,
"grad_norm": 3.6730294227600098,
"learning_rate": 3.611111111111111e-05,
"loss": 0.1466,
"step": 70
},
{
"epoch": 35.0,
"eval_accuracy": 0.8372093023255814,
"eval_loss": 0.37998369336128235,
"eval_runtime": 0.6314,
"eval_samples_per_second": 68.108,
"eval_steps_per_second": 3.168,
"step": 70
},
{
"epoch": 36.0,
"eval_accuracy": 0.813953488372093,
"eval_loss": 0.6547455191612244,
"eval_runtime": 0.6789,
"eval_samples_per_second": 63.339,
"eval_steps_per_second": 2.946,
"step": 72
},
{
"epoch": 37.0,
"eval_accuracy": 0.7674418604651163,
"eval_loss": 0.9743425250053406,
"eval_runtime": 0.6571,
"eval_samples_per_second": 65.436,
"eval_steps_per_second": 3.044,
"step": 74
},
{
"epoch": 38.0,
"eval_accuracy": 0.7906976744186046,
"eval_loss": 0.6677446961402893,
"eval_runtime": 0.6757,
"eval_samples_per_second": 63.638,
"eval_steps_per_second": 2.96,
"step": 76
},
{
"epoch": 39.0,
"eval_accuracy": 0.813953488372093,
"eval_loss": 0.5691245794296265,
"eval_runtime": 0.6355,
"eval_samples_per_second": 67.666,
"eval_steps_per_second": 3.147,
"step": 78
},
{
"epoch": 40.0,
"grad_norm": 1.461414098739624,
"learning_rate": 3.3333333333333335e-05,
"loss": 0.119,
"step": 80
},
{
"epoch": 40.0,
"eval_accuracy": 0.8604651162790697,
"eval_loss": 0.4796452224254608,
"eval_runtime": 0.6302,
"eval_samples_per_second": 68.228,
"eval_steps_per_second": 3.173,
"step": 80
},
{
"epoch": 41.0,
"eval_accuracy": 0.8837209302325582,
"eval_loss": 0.32433825731277466,
"eval_runtime": 0.6208,
"eval_samples_per_second": 69.265,
"eval_steps_per_second": 3.222,
"step": 82
},
{
"epoch": 42.0,
"eval_accuracy": 0.8604651162790697,
"eval_loss": 0.29689204692840576,
"eval_runtime": 0.6213,
"eval_samples_per_second": 69.211,
"eval_steps_per_second": 3.219,
"step": 84
},
{
"epoch": 43.0,
"eval_accuracy": 0.8372093023255814,
"eval_loss": 0.3637494146823883,
"eval_runtime": 0.6187,
"eval_samples_per_second": 69.505,
"eval_steps_per_second": 3.233,
"step": 86
},
{
"epoch": 44.0,
"eval_accuracy": 0.9069767441860465,
"eval_loss": 0.30976200103759766,
"eval_runtime": 0.6227,
"eval_samples_per_second": 69.059,
"eval_steps_per_second": 3.212,
"step": 88
},
{
"epoch": 45.0,
"grad_norm": 3.725754737854004,
"learning_rate": 3.055555555555556e-05,
"loss": 0.1123,
"step": 90
},
{
"epoch": 45.0,
"eval_accuracy": 0.8837209302325582,
"eval_loss": 0.3953772783279419,
"eval_runtime": 0.6186,
"eval_samples_per_second": 69.516,
"eval_steps_per_second": 3.233,
"step": 90
},
{
"epoch": 46.0,
"eval_accuracy": 0.9069767441860465,
"eval_loss": 0.3197100758552551,
"eval_runtime": 0.631,
"eval_samples_per_second": 68.144,
"eval_steps_per_second": 3.169,
"step": 92
},
{
"epoch": 47.0,
"eval_accuracy": 0.8372093023255814,
"eval_loss": 0.318818062543869,
"eval_runtime": 0.6195,
"eval_samples_per_second": 69.405,
"eval_steps_per_second": 3.228,
"step": 94
},
{
"epoch": 48.0,
"eval_accuracy": 0.8372093023255814,
"eval_loss": 0.30999791622161865,
"eval_runtime": 0.6294,
"eval_samples_per_second": 68.321,
"eval_steps_per_second": 3.178,
"step": 96
},
{
"epoch": 49.0,
"eval_accuracy": 0.8604651162790697,
"eval_loss": 0.3653392791748047,
"eval_runtime": 0.6265,
"eval_samples_per_second": 68.633,
"eval_steps_per_second": 3.192,
"step": 98
},
{
"epoch": 50.0,
"grad_norm": 3.50057315826416,
"learning_rate": 2.777777777777778e-05,
"loss": 0.1136,
"step": 100
},
{
"epoch": 50.0,
"eval_accuracy": 0.8837209302325582,
"eval_loss": 0.35266682505607605,
"eval_runtime": 0.6288,
"eval_samples_per_second": 68.382,
"eval_steps_per_second": 3.181,
"step": 100
},
{
"epoch": 51.0,
"eval_accuracy": 0.8604651162790697,
"eval_loss": 0.3152053952217102,
"eval_runtime": 0.6272,
"eval_samples_per_second": 68.564,
"eval_steps_per_second": 3.189,
"step": 102
},
{
"epoch": 52.0,
"eval_accuracy": 0.8604651162790697,
"eval_loss": 0.32772013545036316,
"eval_runtime": 0.6243,
"eval_samples_per_second": 68.875,
"eval_steps_per_second": 3.203,
"step": 104
},
{
"epoch": 53.0,
"eval_accuracy": 0.8837209302325582,
"eval_loss": 0.32207396626472473,
"eval_runtime": 0.6252,
"eval_samples_per_second": 68.781,
"eval_steps_per_second": 3.199,
"step": 106
},
{
"epoch": 54.0,
"eval_accuracy": 0.8604651162790697,
"eval_loss": 0.34381797909736633,
"eval_runtime": 0.6342,
"eval_samples_per_second": 67.803,
"eval_steps_per_second": 3.154,
"step": 108
},
{
"epoch": 55.0,
"grad_norm": 1.3971636295318604,
"learning_rate": 2.5e-05,
"loss": 0.0858,
"step": 110
},
{
"epoch": 55.0,
"eval_accuracy": 0.8604651162790697,
"eval_loss": 0.4682579040527344,
"eval_runtime": 0.6268,
"eval_samples_per_second": 68.598,
"eval_steps_per_second": 3.191,
"step": 110
},
{
"epoch": 56.0,
"eval_accuracy": 0.8604651162790697,
"eval_loss": 0.4511352479457855,
"eval_runtime": 0.636,
"eval_samples_per_second": 67.609,
"eval_steps_per_second": 3.145,
"step": 112
},
{
"epoch": 57.0,
"eval_accuracy": 0.8604651162790697,
"eval_loss": 0.3486385643482208,
"eval_runtime": 0.636,
"eval_samples_per_second": 67.605,
"eval_steps_per_second": 3.144,
"step": 114
},
{
"epoch": 58.0,
"eval_accuracy": 0.8837209302325582,
"eval_loss": 0.35942766070365906,
"eval_runtime": 0.6431,
"eval_samples_per_second": 66.86,
"eval_steps_per_second": 3.11,
"step": 116
},
{
"epoch": 59.0,
"eval_accuracy": 0.8604651162790697,
"eval_loss": 0.39140263199806213,
"eval_runtime": 0.6182,
"eval_samples_per_second": 69.554,
"eval_steps_per_second": 3.235,
"step": 118
},
{
"epoch": 60.0,
"grad_norm": 3.632699728012085,
"learning_rate": 2.2222222222222223e-05,
"loss": 0.084,
"step": 120
},
{
"epoch": 60.0,
"eval_accuracy": 0.8837209302325582,
"eval_loss": 0.4256921410560608,
"eval_runtime": 0.6209,
"eval_samples_per_second": 69.256,
"eval_steps_per_second": 3.221,
"step": 120
},
{
"epoch": 61.0,
"eval_accuracy": 0.8837209302325582,
"eval_loss": 0.4505208134651184,
"eval_runtime": 0.6326,
"eval_samples_per_second": 67.972,
"eval_steps_per_second": 3.162,
"step": 122
},
{
"epoch": 62.0,
"eval_accuracy": 0.8604651162790697,
"eval_loss": 0.4038391411304474,
"eval_runtime": 0.6342,
"eval_samples_per_second": 67.797,
"eval_steps_per_second": 3.153,
"step": 124
},
{
"epoch": 63.0,
"eval_accuracy": 0.8372093023255814,
"eval_loss": 0.3745191693305969,
"eval_runtime": 0.6342,
"eval_samples_per_second": 67.8,
"eval_steps_per_second": 3.153,
"step": 126
},
{
"epoch": 64.0,
"eval_accuracy": 0.813953488372093,
"eval_loss": 0.3773575723171234,
"eval_runtime": 0.6457,
"eval_samples_per_second": 66.599,
"eval_steps_per_second": 3.098,
"step": 128
},
{
"epoch": 65.0,
"grad_norm": 1.8791605234146118,
"learning_rate": 1.9444444444444445e-05,
"loss": 0.0938,
"step": 130
},
{
"epoch": 65.0,
"eval_accuracy": 0.813953488372093,
"eval_loss": 0.37120649218559265,
"eval_runtime": 0.6273,
"eval_samples_per_second": 68.551,
"eval_steps_per_second": 3.188,
"step": 130
},
{
"epoch": 66.0,
"eval_accuracy": 0.813953488372093,
"eval_loss": 0.3736191987991333,
"eval_runtime": 0.6296,
"eval_samples_per_second": 68.293,
"eval_steps_per_second": 3.176,
"step": 132
},
{
"epoch": 67.0,
"eval_accuracy": 0.8604651162790697,
"eval_loss": 0.3839561641216278,
"eval_runtime": 0.6195,
"eval_samples_per_second": 69.415,
"eval_steps_per_second": 3.229,
"step": 134
},
{
"epoch": 68.0,
"eval_accuracy": 0.8604651162790697,
"eval_loss": 0.39015671610832214,
"eval_runtime": 0.623,
"eval_samples_per_second": 69.018,
"eval_steps_per_second": 3.21,
"step": 136
},
{
"epoch": 69.0,
"eval_accuracy": 0.8604651162790697,
"eval_loss": 0.41052982211112976,
"eval_runtime": 0.6203,
"eval_samples_per_second": 69.32,
"eval_steps_per_second": 3.224,
"step": 138
},
{
"epoch": 70.0,
"grad_norm": 1.6623305082321167,
"learning_rate": 1.6666666666666667e-05,
"loss": 0.055,
"step": 140
},
{
"epoch": 70.0,
"eval_accuracy": 0.8604651162790697,
"eval_loss": 0.44984933733940125,
"eval_runtime": 0.6218,
"eval_samples_per_second": 69.156,
"eval_steps_per_second": 3.217,
"step": 140
},
{
"epoch": 71.0,
"eval_accuracy": 0.8604651162790697,
"eval_loss": 0.4954254627227783,
"eval_runtime": 0.6202,
"eval_samples_per_second": 69.328,
"eval_steps_per_second": 3.225,
"step": 142
},
{
"epoch": 72.0,
"eval_accuracy": 0.8604651162790697,
"eval_loss": 0.6397253274917603,
"eval_runtime": 0.6291,
"eval_samples_per_second": 68.351,
"eval_steps_per_second": 3.179,
"step": 144
},
{
"epoch": 73.0,
"eval_accuracy": 0.8604651162790697,
"eval_loss": 0.6271260976791382,
"eval_runtime": 0.6211,
"eval_samples_per_second": 69.235,
"eval_steps_per_second": 3.22,
"step": 146
},
{
"epoch": 74.0,
"eval_accuracy": 0.8604651162790697,
"eval_loss": 0.4821247160434723,
"eval_runtime": 0.6297,
"eval_samples_per_second": 68.291,
"eval_steps_per_second": 3.176,
"step": 148
},
{
"epoch": 75.0,
"grad_norm": 2.1010677814483643,
"learning_rate": 1.388888888888889e-05,
"loss": 0.0755,
"step": 150
},
{
"epoch": 75.0,
"eval_accuracy": 0.9069767441860465,
"eval_loss": 0.36991921067237854,
"eval_runtime": 0.6162,
"eval_samples_per_second": 69.782,
"eval_steps_per_second": 3.246,
"step": 150
},
{
"epoch": 76.0,
"eval_accuracy": 0.8604651162790697,
"eval_loss": 0.33030539751052856,
"eval_runtime": 0.6223,
"eval_samples_per_second": 69.094,
"eval_steps_per_second": 3.214,
"step": 152
},
{
"epoch": 77.0,
"eval_accuracy": 0.8837209302325582,
"eval_loss": 0.328202486038208,
"eval_runtime": 0.6165,
"eval_samples_per_second": 69.745,
"eval_steps_per_second": 3.244,
"step": 154
},
{
"epoch": 78.0,
"eval_accuracy": 0.8837209302325582,
"eval_loss": 0.31808754801750183,
"eval_runtime": 0.6336,
"eval_samples_per_second": 67.862,
"eval_steps_per_second": 3.156,
"step": 156
},
{
"epoch": 79.0,
"eval_accuracy": 0.8604651162790697,
"eval_loss": 0.3082694709300995,
"eval_runtime": 0.6161,
"eval_samples_per_second": 69.798,
"eval_steps_per_second": 3.246,
"step": 158
},
{
"epoch": 80.0,
"grad_norm": 1.6037031412124634,
"learning_rate": 1.1111111111111112e-05,
"loss": 0.0603,
"step": 160
},
{
"epoch": 80.0,
"eval_accuracy": 0.8372093023255814,
"eval_loss": 0.3170202970504761,
"eval_runtime": 0.615,
"eval_samples_per_second": 69.918,
"eval_steps_per_second": 3.252,
"step": 160
},
{
"epoch": 81.0,
"eval_accuracy": 0.8372093023255814,
"eval_loss": 0.33968400955200195,
"eval_runtime": 0.6268,
"eval_samples_per_second": 68.606,
"eval_steps_per_second": 3.191,
"step": 162
},
{
"epoch": 82.0,
"eval_accuracy": 0.8372093023255814,
"eval_loss": 0.3537546992301941,
"eval_runtime": 0.6325,
"eval_samples_per_second": 67.982,
"eval_steps_per_second": 3.162,
"step": 164
},
{
"epoch": 83.0,
"eval_accuracy": 0.8372093023255814,
"eval_loss": 0.3461407721042633,
"eval_runtime": 0.6296,
"eval_samples_per_second": 68.3,
"eval_steps_per_second": 3.177,
"step": 166
},
{
"epoch": 84.0,
"eval_accuracy": 0.813953488372093,
"eval_loss": 0.33368217945098877,
"eval_runtime": 0.6261,
"eval_samples_per_second": 68.682,
"eval_steps_per_second": 3.195,
"step": 168
},
{
"epoch": 85.0,
"grad_norm": 2.1904349327087402,
"learning_rate": 8.333333333333334e-06,
"loss": 0.0653,
"step": 170
},
{
"epoch": 85.0,
"eval_accuracy": 0.8372093023255814,
"eval_loss": 0.3330100178718567,
"eval_runtime": 0.6375,
"eval_samples_per_second": 67.448,
"eval_steps_per_second": 3.137,
"step": 170
},
{
"epoch": 86.0,
"eval_accuracy": 0.8837209302325582,
"eval_loss": 0.345066636800766,
"eval_runtime": 0.6175,
"eval_samples_per_second": 69.64,
"eval_steps_per_second": 3.239,
"step": 172
},
{
"epoch": 87.0,
"eval_accuracy": 0.8837209302325582,
"eval_loss": 0.3611831068992615,
"eval_runtime": 0.6169,
"eval_samples_per_second": 69.709,
"eval_steps_per_second": 3.242,
"step": 174
},
{
"epoch": 88.0,
"eval_accuracy": 0.8604651162790697,
"eval_loss": 0.382154643535614,
"eval_runtime": 0.6328,
"eval_samples_per_second": 67.955,
"eval_steps_per_second": 3.161,
"step": 176
},
{
"epoch": 89.0,
"eval_accuracy": 0.8604651162790697,
"eval_loss": 0.38750723004341125,
"eval_runtime": 0.6278,
"eval_samples_per_second": 68.495,
"eval_steps_per_second": 3.186,
"step": 178
},
{
"epoch": 90.0,
"grad_norm": 1.9912004470825195,
"learning_rate": 5.555555555555556e-06,
"loss": 0.0571,
"step": 180
},
{
"epoch": 90.0,
"eval_accuracy": 0.8604651162790697,
"eval_loss": 0.38448840379714966,
"eval_runtime": 0.6272,
"eval_samples_per_second": 68.554,
"eval_steps_per_second": 3.189,
"step": 180
},
{
"epoch": 91.0,
"eval_accuracy": 0.8837209302325582,
"eval_loss": 0.3641882836818695,
"eval_runtime": 0.625,
"eval_samples_per_second": 68.795,
"eval_steps_per_second": 3.2,
"step": 182
},
{
"epoch": 92.0,
"eval_accuracy": 0.8837209302325582,
"eval_loss": 0.3528924584388733,
"eval_runtime": 0.631,
"eval_samples_per_second": 68.15,
"eval_steps_per_second": 3.17,
"step": 184
},
{
"epoch": 93.0,
"eval_accuracy": 0.8837209302325582,
"eval_loss": 0.347101092338562,
"eval_runtime": 0.6246,
"eval_samples_per_second": 68.848,
"eval_steps_per_second": 3.202,
"step": 186
},
{
"epoch": 94.0,
"eval_accuracy": 0.8837209302325582,
"eval_loss": 0.35397639870643616,
"eval_runtime": 0.6263,
"eval_samples_per_second": 68.662,
"eval_steps_per_second": 3.194,
"step": 188
},
{
"epoch": 95.0,
"grad_norm": 1.0477629899978638,
"learning_rate": 2.777777777777778e-06,
"loss": 0.069,
"step": 190
},
{
"epoch": 95.0,
"eval_accuracy": 0.8837209302325582,
"eval_loss": 0.36087337136268616,
"eval_runtime": 0.6222,
"eval_samples_per_second": 69.109,
"eval_steps_per_second": 3.214,
"step": 190
},
{
"epoch": 96.0,
"eval_accuracy": 0.8837209302325582,
"eval_loss": 0.3609488904476166,
"eval_runtime": 0.6221,
"eval_samples_per_second": 69.119,
"eval_steps_per_second": 3.215,
"step": 192
},
{
"epoch": 97.0,
"eval_accuracy": 0.8837209302325582,
"eval_loss": 0.3633655905723572,
"eval_runtime": 0.6181,
"eval_samples_per_second": 69.562,
"eval_steps_per_second": 3.235,
"step": 194
},
{
"epoch": 98.0,
"eval_accuracy": 0.8837209302325582,
"eval_loss": 0.3627144694328308,
"eval_runtime": 0.6291,
"eval_samples_per_second": 68.35,
"eval_steps_per_second": 3.179,
"step": 196
},
{
"epoch": 99.0,
"eval_accuracy": 0.8837209302325582,
"eval_loss": 0.36094027757644653,
"eval_runtime": 0.6307,
"eval_samples_per_second": 68.181,
"eval_steps_per_second": 3.171,
"step": 198
},
{
"epoch": 100.0,
"grad_norm": 1.145646572113037,
"learning_rate": 0.0,
"loss": 0.0667,
"step": 200
},
{
"epoch": 100.0,
"eval_accuracy": 0.8837209302325582,
"eval_loss": 0.36037373542785645,
"eval_runtime": 0.6299,
"eval_samples_per_second": 68.267,
"eval_steps_per_second": 3.175,
"step": 200
},
{
"epoch": 100.0,
"step": 200,
"total_flos": 1.867590382823424e+18,
"train_loss": 0.15281517714262008,
"train_runtime": 1123.7579,
"train_samples_per_second": 21.446,
"train_steps_per_second": 0.178
},
{
"epoch": 100.0,
"eval_accuracy": 0.9069767441860465,
"eval_loss": 0.30976200103759766,
"eval_runtime": 0.6605,
"eval_samples_per_second": 65.101,
"eval_steps_per_second": 3.028,
"step": 200
}
],
"logging_steps": 10,
"max_steps": 200,
"num_input_tokens_seen": 0,
"num_train_epochs": 100,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.867590382823424e+18,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}