poetry-author / checkpoint-4960 /trainer_state.json
dvs's picture
Upload folder using huggingface_hub
927a7b4 verified
{
"best_metric": 6.5507588386535645,
"best_model_checkpoint": "poetry-author/checkpoint-4960",
"epoch": 4.0,
"eval_steps": 500,
"global_step": 4960,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05,
"learning_rate": 2.4193548387096776e-06,
"loss": 7.2029,
"step": 62
},
{
"epoch": 0.1,
"learning_rate": 4.919354838709678e-06,
"loss": 7.1816,
"step": 124
},
{
"epoch": 0.15,
"learning_rate": 7.419354838709678e-06,
"loss": 7.1418,
"step": 186
},
{
"epoch": 0.2,
"learning_rate": 9.919354838709679e-06,
"loss": 7.1678,
"step": 248
},
{
"epoch": 0.25,
"learning_rate": 1.2379032258064517e-05,
"loss": 7.1274,
"step": 310
},
{
"epoch": 0.3,
"learning_rate": 1.4879032258064519e-05,
"loss": 7.1285,
"step": 372
},
{
"epoch": 0.35,
"learning_rate": 1.7379032258064517e-05,
"loss": 7.108,
"step": 434
},
{
"epoch": 0.4,
"learning_rate": 1.9879032258064516e-05,
"loss": 7.0809,
"step": 496
},
{
"epoch": 0.45,
"learning_rate": 2.2379032258064516e-05,
"loss": 7.1026,
"step": 558
},
{
"epoch": 0.5,
"learning_rate": 2.4879032258064516e-05,
"loss": 7.0691,
"step": 620
},
{
"epoch": 0.55,
"learning_rate": 2.737903225806452e-05,
"loss": 7.0714,
"step": 682
},
{
"epoch": 0.6,
"learning_rate": 2.9879032258064516e-05,
"loss": 7.0847,
"step": 744
},
{
"epoch": 0.65,
"learning_rate": 3.2379032258064515e-05,
"loss": 7.0491,
"step": 806
},
{
"epoch": 0.7,
"learning_rate": 3.487903225806452e-05,
"loss": 7.0685,
"step": 868
},
{
"epoch": 0.75,
"learning_rate": 3.7379032258064515e-05,
"loss": 7.038,
"step": 930
},
{
"epoch": 0.8,
"learning_rate": 3.987903225806452e-05,
"loss": 7.0614,
"step": 992
},
{
"epoch": 0.85,
"learning_rate": 4.2379032258064514e-05,
"loss": 7.0234,
"step": 1054
},
{
"epoch": 0.9,
"learning_rate": 4.487903225806452e-05,
"loss": 6.9863,
"step": 1116
},
{
"epoch": 0.95,
"learning_rate": 4.737903225806452e-05,
"loss": 6.9664,
"step": 1178
},
{
"epoch": 1.0,
"learning_rate": 4.987903225806452e-05,
"loss": 6.9782,
"step": 1240
},
{
"epoch": 1.0,
"eval_accuracy": 0.02056451612903226,
"eval_f1_macro": 0.0006143466739070911,
"eval_f1_micro": 0.02056451612903226,
"eval_f1_weighted": 0.004146182182163288,
"eval_loss": 6.874168395996094,
"eval_precision_macro": 0.0003535209813895348,
"eval_precision_micro": 0.02056451612903226,
"eval_precision_weighted": 0.0024119848598185387,
"eval_recall_macro": 0.0037010590753155195,
"eval_recall_micro": 0.02056451612903226,
"eval_recall_weighted": 0.02056451612903226,
"eval_runtime": 5.5652,
"eval_samples_per_second": 445.625,
"eval_steps_per_second": 27.852,
"step": 1240
},
{
"epoch": 1.05,
"learning_rate": 4.9735663082437276e-05,
"loss": 6.9144,
"step": 1302
},
{
"epoch": 1.1,
"learning_rate": 4.94578853046595e-05,
"loss": 6.8315,
"step": 1364
},
{
"epoch": 1.15,
"learning_rate": 4.9180107526881726e-05,
"loss": 6.8663,
"step": 1426
},
{
"epoch": 1.2,
"learning_rate": 4.89068100358423e-05,
"loss": 6.837,
"step": 1488
},
{
"epoch": 1.25,
"learning_rate": 4.862903225806452e-05,
"loss": 6.7745,
"step": 1550
},
{
"epoch": 1.3,
"learning_rate": 4.835125448028674e-05,
"loss": 6.8747,
"step": 1612
},
{
"epoch": 1.35,
"learning_rate": 4.807347670250896e-05,
"loss": 6.8005,
"step": 1674
},
{
"epoch": 1.4,
"learning_rate": 4.7795698924731186e-05,
"loss": 6.8131,
"step": 1736
},
{
"epoch": 1.45,
"learning_rate": 4.751792114695341e-05,
"loss": 6.7554,
"step": 1798
},
{
"epoch": 1.5,
"learning_rate": 4.724014336917563e-05,
"loss": 6.774,
"step": 1860
},
{
"epoch": 1.55,
"learning_rate": 4.696236559139785e-05,
"loss": 6.7964,
"step": 1922
},
{
"epoch": 1.6,
"learning_rate": 4.6684587813620074e-05,
"loss": 6.8653,
"step": 1984
},
{
"epoch": 1.65,
"learning_rate": 4.64068100358423e-05,
"loss": 6.8209,
"step": 2046
},
{
"epoch": 1.7,
"learning_rate": 4.612903225806452e-05,
"loss": 6.7755,
"step": 2108
},
{
"epoch": 1.75,
"learning_rate": 4.585125448028674e-05,
"loss": 6.8384,
"step": 2170
},
{
"epoch": 1.8,
"learning_rate": 4.557347670250896e-05,
"loss": 6.7932,
"step": 2232
},
{
"epoch": 1.85,
"learning_rate": 4.5295698924731187e-05,
"loss": 6.8017,
"step": 2294
},
{
"epoch": 1.9,
"learning_rate": 4.5017921146953405e-05,
"loss": 6.7575,
"step": 2356
},
{
"epoch": 1.95,
"learning_rate": 4.474014336917563e-05,
"loss": 6.7087,
"step": 2418
},
{
"epoch": 2.0,
"learning_rate": 4.4462365591397856e-05,
"loss": 6.8306,
"step": 2480
},
{
"epoch": 2.0,
"eval_accuracy": 0.027822580645161292,
"eval_f1_macro": 0.002895880527713299,
"eval_f1_micro": 0.027822580645161292,
"eval_f1_weighted": 0.011518078961075304,
"eval_loss": 6.696753025054932,
"eval_precision_macro": 0.002399325952306467,
"eval_precision_micro": 0.027822580645161292,
"eval_precision_weighted": 0.008800287744676547,
"eval_recall_macro": 0.005697363214948901,
"eval_recall_micro": 0.027822580645161292,
"eval_recall_weighted": 0.027822580645161292,
"eval_runtime": 5.6109,
"eval_samples_per_second": 441.997,
"eval_steps_per_second": 27.625,
"step": 2480
},
{
"epoch": 2.05,
"learning_rate": 4.4184587813620074e-05,
"loss": 6.5756,
"step": 2542
},
{
"epoch": 2.1,
"learning_rate": 4.390681003584229e-05,
"loss": 6.5611,
"step": 2604
},
{
"epoch": 2.15,
"learning_rate": 4.362903225806452e-05,
"loss": 6.6159,
"step": 2666
},
{
"epoch": 2.2,
"learning_rate": 4.335125448028674e-05,
"loss": 6.5746,
"step": 2728
},
{
"epoch": 2.25,
"learning_rate": 4.307347670250896e-05,
"loss": 6.4498,
"step": 2790
},
{
"epoch": 2.3,
"learning_rate": 4.279569892473119e-05,
"loss": 6.5829,
"step": 2852
},
{
"epoch": 2.35,
"learning_rate": 4.2517921146953405e-05,
"loss": 6.4411,
"step": 2914
},
{
"epoch": 2.4,
"learning_rate": 4.224014336917563e-05,
"loss": 6.5321,
"step": 2976
},
{
"epoch": 2.45,
"learning_rate": 4.196236559139785e-05,
"loss": 6.5179,
"step": 3038
},
{
"epoch": 2.5,
"learning_rate": 4.1684587813620074e-05,
"loss": 6.5047,
"step": 3100
},
{
"epoch": 2.55,
"learning_rate": 4.14068100358423e-05,
"loss": 6.5946,
"step": 3162
},
{
"epoch": 2.6,
"learning_rate": 4.112903225806452e-05,
"loss": 6.519,
"step": 3224
},
{
"epoch": 2.65,
"learning_rate": 4.0851254480286736e-05,
"loss": 6.4516,
"step": 3286
},
{
"epoch": 2.7,
"learning_rate": 4.057347670250896e-05,
"loss": 6.5272,
"step": 3348
},
{
"epoch": 2.75,
"learning_rate": 4.029569892473119e-05,
"loss": 6.5356,
"step": 3410
},
{
"epoch": 2.8,
"learning_rate": 4.0017921146953405e-05,
"loss": 6.5008,
"step": 3472
},
{
"epoch": 2.85,
"learning_rate": 3.974014336917563e-05,
"loss": 6.5357,
"step": 3534
},
{
"epoch": 2.9,
"learning_rate": 3.94668458781362e-05,
"loss": 6.5125,
"step": 3596
},
{
"epoch": 2.95,
"learning_rate": 3.918906810035842e-05,
"loss": 6.5182,
"step": 3658
},
{
"epoch": 3.0,
"learning_rate": 3.891129032258065e-05,
"loss": 6.4911,
"step": 3720
},
{
"epoch": 3.0,
"eval_accuracy": 0.03951612903225806,
"eval_f1_macro": 0.005772741775243502,
"eval_f1_micro": 0.03951612903225806,
"eval_f1_weighted": 0.01871310289711212,
"eval_loss": 6.559847354888916,
"eval_precision_macro": 0.005108167219150119,
"eval_precision_micro": 0.03951612903225806,
"eval_precision_weighted": 0.015116924332361547,
"eval_recall_macro": 0.010902113654450544,
"eval_recall_micro": 0.03951612903225806,
"eval_recall_weighted": 0.03951612903225806,
"eval_runtime": 5.604,
"eval_samples_per_second": 442.538,
"eval_steps_per_second": 27.659,
"step": 3720
},
{
"epoch": 3.05,
"learning_rate": 3.863351254480287e-05,
"loss": 6.111,
"step": 3782
},
{
"epoch": 3.1,
"learning_rate": 3.835573476702509e-05,
"loss": 6.2635,
"step": 3844
},
{
"epoch": 3.15,
"learning_rate": 3.8077956989247316e-05,
"loss": 6.1807,
"step": 3906
},
{
"epoch": 3.2,
"learning_rate": 3.780017921146954e-05,
"loss": 6.2969,
"step": 3968
},
{
"epoch": 3.25,
"learning_rate": 3.752240143369175e-05,
"loss": 6.1509,
"step": 4030
},
{
"epoch": 3.3,
"learning_rate": 3.724462365591398e-05,
"loss": 6.1196,
"step": 4092
},
{
"epoch": 3.35,
"learning_rate": 3.69668458781362e-05,
"loss": 6.1425,
"step": 4154
},
{
"epoch": 3.4,
"learning_rate": 3.668906810035843e-05,
"loss": 6.1487,
"step": 4216
},
{
"epoch": 3.45,
"learning_rate": 3.641129032258065e-05,
"loss": 6.1336,
"step": 4278
},
{
"epoch": 3.5,
"learning_rate": 3.6133512544802866e-05,
"loss": 6.1035,
"step": 4340
},
{
"epoch": 3.55,
"learning_rate": 3.585573476702509e-05,
"loss": 6.168,
"step": 4402
},
{
"epoch": 3.6,
"learning_rate": 3.5577956989247316e-05,
"loss": 6.283,
"step": 4464
},
{
"epoch": 3.65,
"learning_rate": 3.5300179211469535e-05,
"loss": 6.146,
"step": 4526
},
{
"epoch": 3.7,
"learning_rate": 3.502240143369176e-05,
"loss": 6.1099,
"step": 4588
},
{
"epoch": 3.75,
"learning_rate": 3.4744623655913985e-05,
"loss": 6.2278,
"step": 4650
},
{
"epoch": 3.8,
"learning_rate": 3.4466845878136204e-05,
"loss": 6.0957,
"step": 4712
},
{
"epoch": 3.85,
"learning_rate": 3.418906810035842e-05,
"loss": 6.0431,
"step": 4774
},
{
"epoch": 3.9,
"learning_rate": 3.391129032258065e-05,
"loss": 6.0954,
"step": 4836
},
{
"epoch": 3.95,
"learning_rate": 3.363351254480287e-05,
"loss": 6.1678,
"step": 4898
},
{
"epoch": 4.0,
"learning_rate": 3.335573476702509e-05,
"loss": 6.1257,
"step": 4960
},
{
"epoch": 4.0,
"eval_accuracy": 0.04596774193548387,
"eval_f1_macro": 0.007553613837545024,
"eval_f1_micro": 0.04596774193548386,
"eval_f1_weighted": 0.02342410713485415,
"eval_loss": 6.5507588386535645,
"eval_precision_macro": 0.0064405619945614515,
"eval_precision_micro": 0.04596774193548387,
"eval_precision_weighted": 0.020352886732684962,
"eval_recall_macro": 0.017759772448305164,
"eval_recall_micro": 0.04596774193548387,
"eval_recall_weighted": 0.04596774193548387,
"eval_runtime": 5.6061,
"eval_samples_per_second": 442.376,
"eval_steps_per_second": 27.649,
"step": 4960
}
],
"logging_steps": 62,
"max_steps": 12400,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 2638150224863232.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}