pszemraj's picture
load model from drive and convert
9232bf9
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.15840619002650258,
"global_step": 325,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 5.7142857142857135e-05,
"loss": 1.9649,
"step": 2
},
{
"epoch": 0.0,
"learning_rate": 0.00011428571428571427,
"loss": 2.0221,
"step": 4
},
{
"epoch": 0.0,
"learning_rate": 0.0001714285714285714,
"loss": 1.9956,
"step": 6
},
{
"epoch": 0.0,
"learning_rate": 0.00022857142857142854,
"loss": 1.9373,
"step": 8
},
{
"epoch": 0.0,
"learning_rate": 0.0002857142857142857,
"loss": 2.0042,
"step": 10
},
{
"epoch": 0.01,
"learning_rate": 0.0003428571428571428,
"loss": 1.9593,
"step": 12
},
{
"epoch": 0.01,
"learning_rate": 0.00039999999999999996,
"loss": 2.031,
"step": 14
},
{
"epoch": 0.01,
"learning_rate": 0.0004571428571428571,
"loss": 1.9644,
"step": 16
},
{
"epoch": 0.01,
"learning_rate": 0.0005142857142857142,
"loss": 1.9968,
"step": 18
},
{
"epoch": 0.01,
"learning_rate": 0.0005714285714285714,
"loss": 1.9694,
"step": 20
},
{
"epoch": 0.01,
"learning_rate": 0.0005999996407482917,
"loss": 1.9885,
"step": 22
},
{
"epoch": 0.01,
"learning_rate": 0.0005999967667397879,
"loss": 1.9295,
"step": 24
},
{
"epoch": 0.01,
"learning_rate": 0.0005999910187503132,
"loss": 1.9646,
"step": 26
},
{
"epoch": 0.01,
"learning_rate": 0.0005999823968349338,
"loss": 1.9615,
"step": 28
},
{
"epoch": 0.01,
"learning_rate": 0.000599970901076248,
"loss": 1.9456,
"step": 30
},
{
"epoch": 0.02,
"learning_rate": 0.0005999565315843857,
"loss": 2.014,
"step": 32
},
{
"epoch": 0.02,
"learning_rate": 0.0005999392884970068,
"loss": 1.9118,
"step": 34
},
{
"epoch": 0.02,
"learning_rate": 0.0005999191719793011,
"loss": 1.944,
"step": 36
},
{
"epoch": 0.02,
"learning_rate": 0.0005998961822239856,
"loss": 1.9475,
"step": 38
},
{
"epoch": 0.02,
"learning_rate": 0.000599870319451303,
"loss": 2.038,
"step": 40
},
{
"epoch": 0.02,
"learning_rate": 0.0005998415839090198,
"loss": 1.9438,
"step": 42
},
{
"epoch": 0.02,
"learning_rate": 0.0005998099758724235,
"loss": 2.0804,
"step": 44
},
{
"epoch": 0.02,
"learning_rate": 0.0005997754956443205,
"loss": 1.9767,
"step": 46
},
{
"epoch": 0.02,
"learning_rate": 0.0005997381435550326,
"loss": 1.9322,
"step": 48
},
{
"epoch": 0.02,
"learning_rate": 0.0005996979199623944,
"loss": 1.9276,
"step": 50
},
{
"epoch": 0.03,
"learning_rate": 0.0005996548252517495,
"loss": 1.9933,
"step": 52
},
{
"epoch": 0.03,
"learning_rate": 0.0005996088598359469,
"loss": 1.8901,
"step": 54
},
{
"epoch": 0.03,
"learning_rate": 0.0005995600241553371,
"loss": 1.9472,
"step": 56
},
{
"epoch": 0.03,
"learning_rate": 0.000599508318677768,
"loss": 1.9855,
"step": 58
},
{
"epoch": 0.03,
"learning_rate": 0.00059945374389858,
"loss": 1.9887,
"step": 60
},
{
"epoch": 0.03,
"learning_rate": 0.0005993963003406018,
"loss": 1.9798,
"step": 62
},
{
"epoch": 0.03,
"learning_rate": 0.0005993359885541448,
"loss": 1.9956,
"step": 64
},
{
"epoch": 0.03,
"learning_rate": 0.0005992728091169984,
"loss": 1.9411,
"step": 66
},
{
"epoch": 0.03,
"learning_rate": 0.0005992067626344242,
"loss": 1.9722,
"step": 68
},
{
"epoch": 0.03,
"learning_rate": 0.00059913784973915,
"loss": 1.97,
"step": 70
},
{
"epoch": 0.04,
"learning_rate": 0.0005990660710913641,
"loss": 1.9612,
"step": 72
},
{
"epoch": 0.04,
"learning_rate": 0.0005989914273787089,
"loss": 1.8503,
"step": 74
},
{
"epoch": 0.04,
"learning_rate": 0.0005989139193162741,
"loss": 1.992,
"step": 76
},
{
"epoch": 0.04,
"learning_rate": 0.00059883354764659,
"loss": 1.9675,
"step": 78
},
{
"epoch": 0.04,
"learning_rate": 0.0005987503131396204,
"loss": 1.9609,
"step": 80
},
{
"epoch": 0.04,
"learning_rate": 0.0005986642165927551,
"loss": 2.0349,
"step": 82
},
{
"epoch": 0.04,
"learning_rate": 0.0005985752588308026,
"loss": 1.9824,
"step": 84
},
{
"epoch": 0.04,
"learning_rate": 0.0005984834407059817,
"loss": 1.9017,
"step": 86
},
{
"epoch": 0.04,
"learning_rate": 0.0005983887630979137,
"loss": 1.903,
"step": 88
},
{
"epoch": 0.04,
"learning_rate": 0.000598291226913614,
"loss": 1.9067,
"step": 90
},
{
"epoch": 0.04,
"learning_rate": 0.000598190833087483,
"loss": 1.941,
"step": 92
},
{
"epoch": 0.05,
"learning_rate": 0.0005980875825812974,
"loss": 1.9856,
"step": 94
},
{
"epoch": 0.05,
"learning_rate": 0.0005979814763842014,
"loss": 1.9555,
"step": 96
},
{
"epoch": 0.05,
"learning_rate": 0.0005978725155126967,
"loss": 1.9408,
"step": 98
},
{
"epoch": 0.05,
"learning_rate": 0.0005977607010106324,
"loss": 2.0131,
"step": 100
},
{
"epoch": 0.05,
"learning_rate": 0.0005976460339491963,
"loss": 1.9499,
"step": 102
},
{
"epoch": 0.05,
"learning_rate": 0.000597528515426903,
"loss": 1.9381,
"step": 104
},
{
"epoch": 0.05,
"learning_rate": 0.0005974081465695849,
"loss": 1.9805,
"step": 106
},
{
"epoch": 0.05,
"learning_rate": 0.0005972849285303804,
"loss": 1.8787,
"step": 108
},
{
"epoch": 0.05,
"learning_rate": 0.0005971588624897232,
"loss": 1.8912,
"step": 110
},
{
"epoch": 0.05,
"learning_rate": 0.0005970299496553309,
"loss": 1.9536,
"step": 112
},
{
"epoch": 0.06,
"learning_rate": 0.0005968981912621937,
"loss": 1.9388,
"step": 114
},
{
"epoch": 0.06,
"learning_rate": 0.0005967635885725623,
"loss": 2.0041,
"step": 116
},
{
"epoch": 0.06,
"learning_rate": 0.0005966261428759357,
"loss": 1.9447,
"step": 118
},
{
"epoch": 0.06,
"learning_rate": 0.0005964858554890492,
"loss": 2.0031,
"step": 120
},
{
"epoch": 0.06,
"learning_rate": 0.0005963427277558616,
"loss": 1.9063,
"step": 122
},
{
"epoch": 0.06,
"learning_rate": 0.0005961967610475422,
"loss": 1.9492,
"step": 124
},
{
"epoch": 0.06,
"learning_rate": 0.0005960479567624578,
"loss": 1.9956,
"step": 126
},
{
"epoch": 0.06,
"learning_rate": 0.0005958963163261595,
"loss": 1.9329,
"step": 128
},
{
"epoch": 0.06,
"learning_rate": 0.0005957418411913688,
"loss": 1.9424,
"step": 130
},
{
"epoch": 0.06,
"learning_rate": 0.0005955845328379636,
"loss": 1.9105,
"step": 132
},
{
"epoch": 0.07,
"learning_rate": 0.000595424392772964,
"loss": 1.9439,
"step": 134
},
{
"epoch": 0.07,
"learning_rate": 0.0005952614225305184,
"loss": 1.9586,
"step": 136
},
{
"epoch": 0.07,
"learning_rate": 0.0005950956236718882,
"loss": 1.8851,
"step": 138
},
{
"epoch": 0.07,
"learning_rate": 0.0005949269977854329,
"loss": 1.9031,
"step": 140
},
{
"epoch": 0.07,
"learning_rate": 0.0005947555464865954,
"loss": 1.9294,
"step": 142
},
{
"epoch": 0.07,
"learning_rate": 0.000594581271417886,
"loss": 1.9779,
"step": 144
},
{
"epoch": 0.07,
"learning_rate": 0.0005944041742488665,
"loss": 1.9515,
"step": 146
},
{
"epoch": 0.07,
"learning_rate": 0.0005942242566761351,
"loss": 1.9249,
"step": 148
},
{
"epoch": 0.07,
"learning_rate": 0.0005940415204233092,
"loss": 1.9104,
"step": 150
},
{
"epoch": 0.07,
"learning_rate": 0.0005938559672410093,
"loss": 1.9548,
"step": 152
},
{
"epoch": 0.08,
"learning_rate": 0.0005936675989068425,
"loss": 1.9314,
"step": 154
},
{
"epoch": 0.08,
"learning_rate": 0.0005934764172253849,
"loss": 1.9468,
"step": 156
},
{
"epoch": 0.08,
"learning_rate": 0.0005932824240281645,
"loss": 1.9821,
"step": 158
},
{
"epoch": 0.08,
"learning_rate": 0.0005930856211736438,
"loss": 1.9609,
"step": 160
},
{
"epoch": 0.08,
"learning_rate": 0.0005928860105472022,
"loss": 1.9261,
"step": 162
},
{
"epoch": 0.08,
"learning_rate": 0.0005926835940611172,
"loss": 1.9594,
"step": 164
},
{
"epoch": 0.08,
"learning_rate": 0.000592478373654547,
"loss": 1.8914,
"step": 166
},
{
"epoch": 0.08,
"learning_rate": 0.0005922703512935113,
"loss": 1.9509,
"step": 168
},
{
"epoch": 0.08,
"learning_rate": 0.0005920595289708723,
"loss": 1.9988,
"step": 170
},
{
"epoch": 0.08,
"learning_rate": 0.0005918459087063165,
"loss": 1.9886,
"step": 172
},
{
"epoch": 0.08,
"learning_rate": 0.0005916294925463346,
"loss": 2.0024,
"step": 174
},
{
"epoch": 0.09,
"learning_rate": 0.0005914102825642018,
"loss": 1.859,
"step": 176
},
{
"epoch": 0.09,
"learning_rate": 0.0005911882808599586,
"loss": 1.9439,
"step": 178
},
{
"epoch": 0.09,
"learning_rate": 0.0005909634895603902,
"loss": 1.9823,
"step": 180
},
{
"epoch": 0.09,
"learning_rate": 0.000590735910819006,
"loss": 1.9308,
"step": 182
},
{
"epoch": 0.09,
"learning_rate": 0.0005905055468160197,
"loss": 1.9459,
"step": 184
},
{
"epoch": 0.09,
"learning_rate": 0.0005902723997583274,
"loss": 1.9146,
"step": 186
},
{
"epoch": 0.09,
"learning_rate": 0.0005900364718794873,
"loss": 1.9036,
"step": 188
},
{
"epoch": 0.09,
"learning_rate": 0.0005897977654396977,
"loss": 1.9035,
"step": 190
},
{
"epoch": 0.09,
"learning_rate": 0.000589556282725776,
"loss": 1.9607,
"step": 192
},
{
"epoch": 0.09,
"learning_rate": 0.0005893120260511362,
"loss": 2.0468,
"step": 194
},
{
"epoch": 0.1,
"learning_rate": 0.0005890649977557668,
"loss": 1.9687,
"step": 196
},
{
"epoch": 0.1,
"learning_rate": 0.0005888152002062089,
"loss": 1.9958,
"step": 198
},
{
"epoch": 0.1,
"learning_rate": 0.0005885626357955329,
"loss": 1.9025,
"step": 200
},
{
"epoch": 0.1,
"learning_rate": 0.0005883073069433159,
"loss": 1.9077,
"step": 202
},
{
"epoch": 0.1,
"learning_rate": 0.0005880492160956185,
"loss": 1.9494,
"step": 204
},
{
"epoch": 0.1,
"learning_rate": 0.0005877883657249612,
"loss": 1.8716,
"step": 206
},
{
"epoch": 0.1,
"learning_rate": 0.000587524758330301,
"loss": 1.9075,
"step": 208
},
{
"epoch": 0.1,
"learning_rate": 0.0005872583964370073,
"loss": 1.9406,
"step": 210
},
{
"epoch": 0.1,
"learning_rate": 0.0005869892825968375,
"loss": 1.9179,
"step": 212
},
{
"epoch": 0.1,
"learning_rate": 0.0005867174193879131,
"loss": 1.9702,
"step": 214
},
{
"epoch": 0.11,
"learning_rate": 0.0005864428094146943,
"loss": 1.9297,
"step": 216
},
{
"epoch": 0.11,
"learning_rate": 0.0005861654553079557,
"loss": 1.8467,
"step": 218
},
{
"epoch": 0.11,
"learning_rate": 0.0005858853597247606,
"loss": 1.9145,
"step": 220
},
{
"epoch": 0.11,
"learning_rate": 0.0005856025253484358,
"loss": 1.944,
"step": 222
},
{
"epoch": 0.11,
"learning_rate": 0.0005853169548885461,
"loss": 1.9321,
"step": 224
},
{
"epoch": 0.11,
"learning_rate": 0.0005850286510808675,
"loss": 1.9838,
"step": 226
},
{
"epoch": 0.11,
"learning_rate": 0.0005847376166873624,
"loss": 1.9891,
"step": 228
},
{
"epoch": 0.11,
"learning_rate": 0.0005844438544961515,
"loss": 1.9384,
"step": 230
},
{
"epoch": 0.11,
"learning_rate": 0.0005841473673214886,
"loss": 1.8826,
"step": 232
},
{
"epoch": 0.11,
"learning_rate": 0.0005838481580037324,
"loss": 1.8983,
"step": 234
},
{
"epoch": 0.12,
"learning_rate": 0.0005835462294093202,
"loss": 1.8804,
"step": 236
},
{
"epoch": 0.12,
"learning_rate": 0.00058324158443074,
"loss": 1.8997,
"step": 238
},
{
"epoch": 0.12,
"learning_rate": 0.0005829342259865026,
"loss": 1.9478,
"step": 240
},
{
"epoch": 0.12,
"learning_rate": 0.0005826241570211144,
"loss": 1.9727,
"step": 242
},
{
"epoch": 0.12,
"learning_rate": 0.0005823113805050482,
"loss": 1.9216,
"step": 244
},
{
"epoch": 0.12,
"learning_rate": 0.0005819958994347157,
"loss": 1.9208,
"step": 246
},
{
"epoch": 0.12,
"learning_rate": 0.000581677716832438,
"loss": 1.9201,
"step": 248
},
{
"epoch": 0.12,
"learning_rate": 0.0005813568357464172,
"loss": 1.869,
"step": 250
},
{
"epoch": 0.12,
"learning_rate": 0.0005810332592507066,
"loss": 1.9111,
"step": 252
},
{
"epoch": 0.12,
"learning_rate": 0.0005807069904451822,
"loss": 1.8696,
"step": 254
},
{
"epoch": 0.12,
"learning_rate": 0.0005803780324555121,
"loss": 1.8946,
"step": 256
},
{
"epoch": 0.13,
"learning_rate": 0.0005800463884331269,
"loss": 1.9641,
"step": 258
},
{
"epoch": 0.13,
"learning_rate": 0.0005797120615551896,
"loss": 1.8923,
"step": 260
},
{
"epoch": 0.13,
"learning_rate": 0.0005793750550245648,
"loss": 1.8612,
"step": 262
},
{
"epoch": 0.13,
"learning_rate": 0.0005790353720697887,
"loss": 1.927,
"step": 264
},
{
"epoch": 0.13,
"learning_rate": 0.0005786930159450374,
"loss": 1.9709,
"step": 266
},
{
"epoch": 0.13,
"learning_rate": 0.0005783479899300962,
"loss": 1.9665,
"step": 268
},
{
"epoch": 0.13,
"learning_rate": 0.0005780002973303283,
"loss": 1.8657,
"step": 270
},
{
"epoch": 0.13,
"learning_rate": 0.0005776499414766424,
"loss": 2.0055,
"step": 272
},
{
"epoch": 0.13,
"learning_rate": 0.0005772969257254615,
"loss": 1.9147,
"step": 274
},
{
"epoch": 0.13,
"learning_rate": 0.0005769412534586908,
"loss": 1.9383,
"step": 276
},
{
"epoch": 0.14,
"learning_rate": 0.0005765829280836846,
"loss": 1.9575,
"step": 278
},
{
"epoch": 0.14,
"learning_rate": 0.0005762219530332142,
"loss": 1.9192,
"step": 280
},
{
"epoch": 0.14,
"learning_rate": 0.0005758583317654352,
"loss": 1.8842,
"step": 282
},
{
"epoch": 0.14,
"learning_rate": 0.0005754920677638535,
"loss": 1.9905,
"step": 284
},
{
"epoch": 0.14,
"learning_rate": 0.000575123164537293,
"loss": 1.9686,
"step": 286
},
{
"epoch": 0.14,
"learning_rate": 0.0005747516256198616,
"loss": 2.0003,
"step": 288
},
{
"epoch": 0.14,
"learning_rate": 0.0005743774545709163,
"loss": 1.9195,
"step": 290
},
{
"epoch": 0.14,
"learning_rate": 0.000574000654975031,
"loss": 1.8899,
"step": 292
},
{
"epoch": 0.14,
"learning_rate": 0.0005736212304419609,
"loss": 1.9143,
"step": 294
},
{
"epoch": 0.14,
"learning_rate": 0.000573239184606608,
"loss": 1.8431,
"step": 296
},
{
"epoch": 0.15,
"learning_rate": 0.0005728545211289866,
"loss": 1.8978,
"step": 298
},
{
"epoch": 0.15,
"learning_rate": 0.0005724672436941882,
"loss": 1.9017,
"step": 300
},
{
"epoch": 0.15,
"learning_rate": 0.0005720773560123461,
"loss": 1.8912,
"step": 302
},
{
"epoch": 0.15,
"learning_rate": 0.0005716848618185996,
"loss": 1.9412,
"step": 304
},
{
"epoch": 0.15,
"learning_rate": 0.000571289764873059,
"loss": 1.8843,
"step": 306
},
{
"epoch": 0.15,
"learning_rate": 0.0005708920689607684,
"loss": 1.8971,
"step": 308
},
{
"epoch": 0.15,
"learning_rate": 0.0005704917778916709,
"loss": 1.9243,
"step": 310
},
{
"epoch": 0.15,
"learning_rate": 0.0005700888955005706,
"loss": 1.8342,
"step": 312
},
{
"epoch": 0.15,
"learning_rate": 0.000569683425647097,
"loss": 1.8725,
"step": 314
},
{
"epoch": 0.15,
"learning_rate": 0.0005692753722156673,
"loss": 1.8597,
"step": 316
},
{
"epoch": 0.15,
"learning_rate": 0.0005688647391154496,
"loss": 1.954,
"step": 318
},
{
"epoch": 0.16,
"learning_rate": 0.0005684515302803256,
"loss": 1.9454,
"step": 320
},
{
"epoch": 0.16,
"learning_rate": 0.000568035749668852,
"loss": 1.9336,
"step": 322
},
{
"epoch": 0.16,
"learning_rate": 0.000567617401264224,
"loss": 1.96,
"step": 324
}
],
"max_steps": 2051,
"num_train_epochs": 1,
"total_flos": 4.557933379584e+17,
"trial_name": null,
"trial_params": null
}