llm / checkpoint-495 /trainer_state.json
Facehugger135's picture
Upload folder using huggingface_hub
3d9dcd1
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 15.0,
"global_step": 495,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.18,
"learning_rate": 2.4e-05,
"loss": 3.6345,
"step": 6
},
{
"epoch": 0.36,
"learning_rate": 4.8e-05,
"loss": 3.6023,
"step": 12
},
{
"epoch": 0.55,
"learning_rate": 7.2e-05,
"loss": 3.5522,
"step": 18
},
{
"epoch": 0.73,
"learning_rate": 9.6e-05,
"loss": 3.558,
"step": 24
},
{
"epoch": 0.91,
"learning_rate": 0.00012,
"loss": 3.5927,
"step": 30
},
{
"epoch": 1.09,
"learning_rate": 0.000144,
"loss": 3.4643,
"step": 36
},
{
"epoch": 1.27,
"learning_rate": 0.000168,
"loss": 3.469,
"step": 42
},
{
"epoch": 1.45,
"learning_rate": 0.000192,
"loss": 3.5263,
"step": 48
},
{
"epoch": 1.64,
"learning_rate": 0.00019820224719101123,
"loss": 3.4205,
"step": 54
},
{
"epoch": 1.82,
"learning_rate": 0.0001955056179775281,
"loss": 3.3894,
"step": 60
},
{
"epoch": 2.0,
"learning_rate": 0.00019280898876404497,
"loss": 3.4589,
"step": 66
},
{
"epoch": 2.18,
"learning_rate": 0.0001901123595505618,
"loss": 3.312,
"step": 72
},
{
"epoch": 2.36,
"learning_rate": 0.00018741573033707868,
"loss": 3.4469,
"step": 78
},
{
"epoch": 2.55,
"learning_rate": 0.00018471910112359552,
"loss": 3.3239,
"step": 84
},
{
"epoch": 2.73,
"learning_rate": 0.00018202247191011236,
"loss": 3.4202,
"step": 90
},
{
"epoch": 2.91,
"learning_rate": 0.00017932584269662923,
"loss": 3.368,
"step": 96
},
{
"epoch": 3.09,
"learning_rate": 0.00017662921348314607,
"loss": 3.2992,
"step": 102
},
{
"epoch": 3.27,
"learning_rate": 0.0001739325842696629,
"loss": 3.2699,
"step": 108
},
{
"epoch": 3.45,
"learning_rate": 0.00017123595505617978,
"loss": 3.3384,
"step": 114
},
{
"epoch": 3.64,
"learning_rate": 0.00016853932584269662,
"loss": 3.3604,
"step": 120
},
{
"epoch": 3.82,
"learning_rate": 0.0001658426966292135,
"loss": 3.2875,
"step": 126
},
{
"epoch": 4.0,
"learning_rate": 0.00016314606741573036,
"loss": 3.3874,
"step": 132
},
{
"epoch": 4.18,
"learning_rate": 0.0001604494382022472,
"loss": 3.1982,
"step": 138
},
{
"epoch": 4.36,
"learning_rate": 0.00015775280898876404,
"loss": 3.323,
"step": 144
},
{
"epoch": 4.55,
"learning_rate": 0.0001550561797752809,
"loss": 3.3645,
"step": 150
},
{
"epoch": 4.73,
"learning_rate": 0.00015235955056179775,
"loss": 3.3209,
"step": 156
},
{
"epoch": 4.91,
"learning_rate": 0.00014966292134831462,
"loss": 3.28,
"step": 162
},
{
"epoch": 5.09,
"learning_rate": 0.00014696629213483146,
"loss": 3.4203,
"step": 168
},
{
"epoch": 5.27,
"learning_rate": 0.0001442696629213483,
"loss": 3.1531,
"step": 174
},
{
"epoch": 5.45,
"learning_rate": 0.00014157303370786517,
"loss": 3.1857,
"step": 180
},
{
"epoch": 5.64,
"learning_rate": 0.00013887640449438204,
"loss": 3.2815,
"step": 186
},
{
"epoch": 5.82,
"learning_rate": 0.00013617977528089889,
"loss": 3.2879,
"step": 192
},
{
"epoch": 6.0,
"learning_rate": 0.00013348314606741575,
"loss": 3.4061,
"step": 198
},
{
"epoch": 6.18,
"learning_rate": 0.0001307865168539326,
"loss": 3.3026,
"step": 204
},
{
"epoch": 6.36,
"learning_rate": 0.00012808988764044944,
"loss": 3.3174,
"step": 210
},
{
"epoch": 6.55,
"learning_rate": 0.0001253932584269663,
"loss": 3.297,
"step": 216
},
{
"epoch": 6.73,
"learning_rate": 0.00012269662921348315,
"loss": 3.2406,
"step": 222
},
{
"epoch": 6.91,
"learning_rate": 0.00012,
"loss": 3.1832,
"step": 228
},
{
"epoch": 7.09,
"learning_rate": 0.00011730337078651686,
"loss": 3.2514,
"step": 234
},
{
"epoch": 7.27,
"learning_rate": 0.0001146067415730337,
"loss": 3.2083,
"step": 240
},
{
"epoch": 7.45,
"learning_rate": 0.00011191011235955056,
"loss": 3.1881,
"step": 246
},
{
"epoch": 7.64,
"learning_rate": 0.00010921348314606742,
"loss": 3.1987,
"step": 252
},
{
"epoch": 7.82,
"learning_rate": 0.00010651685393258428,
"loss": 3.2388,
"step": 258
},
{
"epoch": 8.0,
"learning_rate": 0.00010382022471910113,
"loss": 3.3154,
"step": 264
},
{
"epoch": 8.18,
"learning_rate": 0.00010112359550561799,
"loss": 3.2307,
"step": 270
},
{
"epoch": 8.36,
"learning_rate": 9.842696629213483e-05,
"loss": 3.2064,
"step": 276
},
{
"epoch": 8.55,
"learning_rate": 9.573033707865169e-05,
"loss": 3.2081,
"step": 282
},
{
"epoch": 8.73,
"learning_rate": 9.303370786516854e-05,
"loss": 3.2904,
"step": 288
},
{
"epoch": 8.91,
"learning_rate": 9.03370786516854e-05,
"loss": 3.2104,
"step": 294
},
{
"epoch": 9.09,
"learning_rate": 8.764044943820225e-05,
"loss": 3.3033,
"step": 300
},
{
"epoch": 9.27,
"learning_rate": 8.494382022471911e-05,
"loss": 3.2296,
"step": 306
},
{
"epoch": 9.45,
"learning_rate": 8.224719101123596e-05,
"loss": 3.2077,
"step": 312
},
{
"epoch": 9.64,
"learning_rate": 7.95505617977528e-05,
"loss": 3.2331,
"step": 318
},
{
"epoch": 9.82,
"learning_rate": 7.685393258426966e-05,
"loss": 3.2844,
"step": 324
},
{
"epoch": 10.0,
"learning_rate": 7.415730337078653e-05,
"loss": 3.0974,
"step": 330
},
{
"epoch": 10.18,
"learning_rate": 7.146067415730337e-05,
"loss": 3.2892,
"step": 336
},
{
"epoch": 10.36,
"learning_rate": 6.876404494382023e-05,
"loss": 3.1417,
"step": 342
},
{
"epoch": 10.55,
"learning_rate": 6.606741573033708e-05,
"loss": 3.2408,
"step": 348
},
{
"epoch": 10.73,
"learning_rate": 6.337078651685394e-05,
"loss": 3.3139,
"step": 354
},
{
"epoch": 10.91,
"learning_rate": 6.067415730337079e-05,
"loss": 3.1222,
"step": 360
},
{
"epoch": 11.09,
"learning_rate": 5.7977528089887646e-05,
"loss": 3.2575,
"step": 366
},
{
"epoch": 11.27,
"learning_rate": 5.5280898876404495e-05,
"loss": 3.356,
"step": 372
},
{
"epoch": 11.45,
"learning_rate": 5.258426966292135e-05,
"loss": 3.129,
"step": 378
},
{
"epoch": 11.64,
"learning_rate": 4.9887640449438205e-05,
"loss": 3.1446,
"step": 384
},
{
"epoch": 11.82,
"learning_rate": 4.719101123595506e-05,
"loss": 3.1887,
"step": 390
},
{
"epoch": 12.0,
"learning_rate": 4.4494382022471916e-05,
"loss": 3.1785,
"step": 396
},
{
"epoch": 12.18,
"learning_rate": 4.1797752808988764e-05,
"loss": 3.278,
"step": 402
},
{
"epoch": 12.36,
"learning_rate": 3.910112359550562e-05,
"loss": 3.3521,
"step": 408
},
{
"epoch": 12.55,
"learning_rate": 3.6404494382022475e-05,
"loss": 3.1367,
"step": 414
},
{
"epoch": 12.73,
"learning_rate": 3.370786516853933e-05,
"loss": 3.1938,
"step": 420
},
{
"epoch": 12.91,
"learning_rate": 3.1011235955056185e-05,
"loss": 3.0515,
"step": 426
},
{
"epoch": 13.09,
"learning_rate": 2.8314606741573037e-05,
"loss": 3.187,
"step": 432
},
{
"epoch": 13.27,
"learning_rate": 2.5617977528089885e-05,
"loss": 3.2362,
"step": 438
},
{
"epoch": 13.45,
"learning_rate": 2.292134831460674e-05,
"loss": 3.1142,
"step": 444
},
{
"epoch": 13.64,
"learning_rate": 2.0224719101123596e-05,
"loss": 3.2631,
"step": 450
},
{
"epoch": 13.82,
"learning_rate": 1.752808988764045e-05,
"loss": 3.2107,
"step": 456
},
{
"epoch": 14.0,
"learning_rate": 1.4831460674157305e-05,
"loss": 3.1778,
"step": 462
},
{
"epoch": 14.18,
"learning_rate": 1.2134831460674158e-05,
"loss": 3.1921,
"step": 468
},
{
"epoch": 14.36,
"learning_rate": 9.438202247191012e-06,
"loss": 3.2432,
"step": 474
},
{
"epoch": 14.55,
"learning_rate": 6.741573033707865e-06,
"loss": 3.2269,
"step": 480
},
{
"epoch": 14.73,
"learning_rate": 4.044943820224719e-06,
"loss": 3.2172,
"step": 486
},
{
"epoch": 14.91,
"learning_rate": 1.3483146067415732e-06,
"loss": 3.1409,
"step": 492
}
],
"max_steps": 495,
"num_train_epochs": 15,
"total_flos": 517312924876800.0,
"trial_name": null,
"trial_params": null
}