weapon_1 / checkpoint-190 /trainer_state.json
tosullivan's picture
Upload folder using huggingface_hub
c6e9748 verified
raw
history blame contribute delete
No virus
29.8 kB
{
"best_metric": 0.8072579503059387,
"best_model_checkpoint": "/tmp/model/checkpoint-190",
"epoch": 10.0,
"eval_steps": 500,
"global_step": 190,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05,
"learning_rate": 5.263157894736843e-06,
"loss": 1.6094,
"step": 1
},
{
"epoch": 0.11,
"learning_rate": 1.0526315789473686e-05,
"loss": 1.6094,
"step": 2
},
{
"epoch": 0.16,
"learning_rate": 1.5789473684210526e-05,
"loss": 1.6096,
"step": 3
},
{
"epoch": 0.21,
"learning_rate": 2.1052631578947372e-05,
"loss": 1.6089,
"step": 4
},
{
"epoch": 0.26,
"learning_rate": 2.6315789473684212e-05,
"loss": 1.6085,
"step": 5
},
{
"epoch": 0.32,
"learning_rate": 3.157894736842105e-05,
"loss": 1.6068,
"step": 6
},
{
"epoch": 0.37,
"learning_rate": 3.68421052631579e-05,
"loss": 1.6105,
"step": 7
},
{
"epoch": 0.42,
"learning_rate": 4.2105263157894745e-05,
"loss": 1.6058,
"step": 8
},
{
"epoch": 0.47,
"learning_rate": 4.736842105263158e-05,
"loss": 1.6108,
"step": 9
},
{
"epoch": 0.53,
"learning_rate": 5.2631578947368424e-05,
"loss": 1.6079,
"step": 10
},
{
"epoch": 0.58,
"learning_rate": 5.7894736842105274e-05,
"loss": 1.6044,
"step": 11
},
{
"epoch": 0.63,
"learning_rate": 6.31578947368421e-05,
"loss": 1.6069,
"step": 12
},
{
"epoch": 0.68,
"learning_rate": 6.842105263157895e-05,
"loss": 1.6007,
"step": 13
},
{
"epoch": 0.74,
"learning_rate": 7.36842105263158e-05,
"loss": 1.6083,
"step": 14
},
{
"epoch": 0.79,
"learning_rate": 7.894736842105263e-05,
"loss": 1.6077,
"step": 15
},
{
"epoch": 0.84,
"learning_rate": 8.421052631578949e-05,
"loss": 1.5968,
"step": 16
},
{
"epoch": 0.89,
"learning_rate": 8.947368421052632e-05,
"loss": 1.5903,
"step": 17
},
{
"epoch": 0.95,
"learning_rate": 9.473684210526316e-05,
"loss": 1.6045,
"step": 18
},
{
"epoch": 1.0,
"learning_rate": 0.0001,
"loss": 1.5974,
"step": 19
},
{
"epoch": 1.0,
"eval_accuracy": 0.3076923076923077,
"eval_f1_macro": 0.09411764705882353,
"eval_f1_micro": 0.3076923076923077,
"eval_f1_weighted": 0.14479638009049772,
"eval_loss": 1.5974059104919434,
"eval_precision_macro": 0.06153846153846154,
"eval_precision_micro": 0.3076923076923077,
"eval_precision_weighted": 0.09467455621301776,
"eval_recall_macro": 0.2,
"eval_recall_micro": 0.3076923076923077,
"eval_recall_weighted": 0.3076923076923077,
"eval_runtime": 0.2784,
"eval_samples_per_second": 140.073,
"eval_steps_per_second": 10.775,
"step": 19
},
{
"epoch": 1.05,
"learning_rate": 0.00010526315789473685,
"loss": 1.5782,
"step": 20
},
{
"epoch": 1.11,
"learning_rate": 0.00011052631578947368,
"loss": 1.5568,
"step": 21
},
{
"epoch": 1.16,
"learning_rate": 0.00011578947368421055,
"loss": 1.603,
"step": 22
},
{
"epoch": 1.21,
"learning_rate": 0.00011578947368421055,
"loss": 1.5393,
"step": 23
},
{
"epoch": 1.26,
"learning_rate": 0.00012105263157894738,
"loss": 1.5972,
"step": 24
},
{
"epoch": 1.32,
"learning_rate": 0.0001263157894736842,
"loss": 1.6077,
"step": 25
},
{
"epoch": 1.37,
"learning_rate": 0.00013157894736842105,
"loss": 1.5841,
"step": 26
},
{
"epoch": 1.42,
"learning_rate": 0.0001368421052631579,
"loss": 1.5846,
"step": 27
},
{
"epoch": 1.47,
"learning_rate": 0.00014210526315789476,
"loss": 1.5916,
"step": 28
},
{
"epoch": 1.53,
"learning_rate": 0.0001473684210526316,
"loss": 1.5476,
"step": 29
},
{
"epoch": 1.58,
"learning_rate": 0.00015263157894736842,
"loss": 1.6294,
"step": 30
},
{
"epoch": 1.63,
"learning_rate": 0.00015789473684210527,
"loss": 1.571,
"step": 31
},
{
"epoch": 1.68,
"learning_rate": 0.0001631578947368421,
"loss": 1.6475,
"step": 32
},
{
"epoch": 1.74,
"learning_rate": 0.00016842105263157898,
"loss": 1.5652,
"step": 33
},
{
"epoch": 1.79,
"learning_rate": 0.0001736842105263158,
"loss": 1.5198,
"step": 34
},
{
"epoch": 1.84,
"learning_rate": 0.00017894736842105264,
"loss": 1.5658,
"step": 35
},
{
"epoch": 1.89,
"learning_rate": 0.00018421052631578948,
"loss": 1.5823,
"step": 36
},
{
"epoch": 1.95,
"learning_rate": 0.00018947368421052632,
"loss": 1.5813,
"step": 37
},
{
"epoch": 2.0,
"learning_rate": 0.00019473684210526317,
"loss": 1.5338,
"step": 38
},
{
"epoch": 2.0,
"eval_accuracy": 0.3076923076923077,
"eval_f1_macro": 0.09411764705882353,
"eval_f1_micro": 0.3076923076923077,
"eval_f1_weighted": 0.14479638009049772,
"eval_loss": 1.571889877319336,
"eval_precision_macro": 0.06153846153846154,
"eval_precision_micro": 0.3076923076923077,
"eval_precision_weighted": 0.09467455621301776,
"eval_recall_macro": 0.2,
"eval_recall_micro": 0.3076923076923077,
"eval_recall_weighted": 0.3076923076923077,
"eval_runtime": 0.1048,
"eval_samples_per_second": 372.122,
"eval_steps_per_second": 28.625,
"step": 38
},
{
"epoch": 2.05,
"learning_rate": 0.0002,
"loss": 1.5117,
"step": 39
},
{
"epoch": 2.11,
"learning_rate": 0.00020526315789473685,
"loss": 1.4995,
"step": 40
},
{
"epoch": 2.16,
"learning_rate": 0.0002105263157894737,
"loss": 1.6392,
"step": 41
},
{
"epoch": 2.21,
"learning_rate": 0.00021578947368421054,
"loss": 1.4529,
"step": 42
},
{
"epoch": 2.26,
"learning_rate": 0.00022105263157894735,
"loss": 1.5848,
"step": 43
},
{
"epoch": 2.32,
"learning_rate": 0.0002263157894736842,
"loss": 1.5591,
"step": 44
},
{
"epoch": 2.37,
"learning_rate": 0.0002315789473684211,
"loss": 1.5641,
"step": 45
},
{
"epoch": 2.42,
"learning_rate": 0.0002368421052631579,
"loss": 1.499,
"step": 46
},
{
"epoch": 2.47,
"learning_rate": 0.00024210526315789475,
"loss": 1.6476,
"step": 47
},
{
"epoch": 2.53,
"learning_rate": 0.0002473684210526316,
"loss": 1.4727,
"step": 48
},
{
"epoch": 2.58,
"learning_rate": 0.0002526315789473684,
"loss": 1.5519,
"step": 49
},
{
"epoch": 2.63,
"learning_rate": 0.0002578947368421053,
"loss": 1.5057,
"step": 50
},
{
"epoch": 2.68,
"learning_rate": 0.0002631578947368421,
"loss": 1.6801,
"step": 51
},
{
"epoch": 2.74,
"learning_rate": 0.00026842105263157897,
"loss": 1.5518,
"step": 52
},
{
"epoch": 2.79,
"learning_rate": 0.0002736842105263158,
"loss": 1.4608,
"step": 53
},
{
"epoch": 2.84,
"learning_rate": 0.0002789473684210526,
"loss": 1.5961,
"step": 54
},
{
"epoch": 2.89,
"learning_rate": 0.0002842105263157895,
"loss": 1.3564,
"step": 55
},
{
"epoch": 2.95,
"learning_rate": 0.00028947368421052634,
"loss": 1.5493,
"step": 56
},
{
"epoch": 3.0,
"learning_rate": 0.0002947368421052632,
"loss": 1.5723,
"step": 57
},
{
"epoch": 3.0,
"eval_accuracy": 0.3076923076923077,
"eval_f1_macro": 0.09411764705882353,
"eval_f1_micro": 0.3076923076923077,
"eval_f1_weighted": 0.14479638009049772,
"eval_loss": 1.5472002029418945,
"eval_precision_macro": 0.06153846153846154,
"eval_precision_micro": 0.3076923076923077,
"eval_precision_weighted": 0.09467455621301776,
"eval_recall_macro": 0.2,
"eval_recall_micro": 0.3076923076923077,
"eval_recall_weighted": 0.3076923076923077,
"eval_runtime": 0.1034,
"eval_samples_per_second": 377.243,
"eval_steps_per_second": 29.019,
"step": 57
},
{
"epoch": 3.05,
"learning_rate": 0.00030000000000000003,
"loss": 1.3807,
"step": 58
},
{
"epoch": 3.11,
"learning_rate": 0.00030526315789473684,
"loss": 1.4009,
"step": 59
},
{
"epoch": 3.16,
"learning_rate": 0.0003105263157894737,
"loss": 1.3475,
"step": 60
},
{
"epoch": 3.21,
"learning_rate": 0.00031578947368421053,
"loss": 1.4923,
"step": 61
},
{
"epoch": 3.26,
"learning_rate": 0.0003210526315789474,
"loss": 1.3802,
"step": 62
},
{
"epoch": 3.32,
"learning_rate": 0.0003263157894736842,
"loss": 1.4757,
"step": 63
},
{
"epoch": 3.37,
"learning_rate": 0.00033157894736842103,
"loss": 1.4177,
"step": 64
},
{
"epoch": 3.42,
"learning_rate": 0.00033684210526315796,
"loss": 1.7288,
"step": 65
},
{
"epoch": 3.47,
"learning_rate": 0.00034210526315789477,
"loss": 1.4219,
"step": 66
},
{
"epoch": 3.53,
"learning_rate": 0.0003473684210526316,
"loss": 1.6314,
"step": 67
},
{
"epoch": 3.58,
"learning_rate": 0.00035263157894736846,
"loss": 1.5195,
"step": 68
},
{
"epoch": 3.63,
"learning_rate": 0.0003578947368421053,
"loss": 1.4379,
"step": 69
},
{
"epoch": 3.68,
"learning_rate": 0.00036315789473684214,
"loss": 1.6594,
"step": 70
},
{
"epoch": 3.74,
"learning_rate": 0.00036842105263157896,
"loss": 1.5372,
"step": 71
},
{
"epoch": 3.79,
"learning_rate": 0.0003736842105263158,
"loss": 1.6277,
"step": 72
},
{
"epoch": 3.84,
"learning_rate": 0.00037894736842105265,
"loss": 1.3156,
"step": 73
},
{
"epoch": 3.89,
"learning_rate": 0.00038421052631578946,
"loss": 1.7242,
"step": 74
},
{
"epoch": 3.95,
"learning_rate": 0.00038947368421052633,
"loss": 1.4844,
"step": 75
},
{
"epoch": 4.0,
"learning_rate": 0.0003947368421052632,
"loss": 1.4152,
"step": 76
},
{
"epoch": 4.0,
"eval_accuracy": 0.3076923076923077,
"eval_f1_macro": 0.09411764705882353,
"eval_f1_micro": 0.3076923076923077,
"eval_f1_weighted": 0.14479638009049772,
"eval_loss": 1.4960436820983887,
"eval_precision_macro": 0.06153846153846154,
"eval_precision_micro": 0.3076923076923077,
"eval_precision_weighted": 0.09467455621301776,
"eval_recall_macro": 0.2,
"eval_recall_micro": 0.3076923076923077,
"eval_recall_weighted": 0.3076923076923077,
"eval_runtime": 0.1073,
"eval_samples_per_second": 363.387,
"eval_steps_per_second": 27.953,
"step": 76
},
{
"epoch": 4.05,
"learning_rate": 0.0004,
"loss": 1.2319,
"step": 77
},
{
"epoch": 4.11,
"learning_rate": 0.0004052631578947369,
"loss": 1.5189,
"step": 78
},
{
"epoch": 4.16,
"learning_rate": 0.0004105263157894737,
"loss": 1.5146,
"step": 79
},
{
"epoch": 4.21,
"learning_rate": 0.0004157894736842105,
"loss": 1.4695,
"step": 80
},
{
"epoch": 4.26,
"learning_rate": 0.0004210526315789474,
"loss": 1.343,
"step": 81
},
{
"epoch": 4.32,
"learning_rate": 0.0004263157894736842,
"loss": 1.4702,
"step": 82
},
{
"epoch": 4.37,
"learning_rate": 0.0004315789473684211,
"loss": 1.6262,
"step": 83
},
{
"epoch": 4.42,
"learning_rate": 0.0004368421052631579,
"loss": 1.5298,
"step": 84
},
{
"epoch": 4.47,
"learning_rate": 0.0004421052631578947,
"loss": 1.2922,
"step": 85
},
{
"epoch": 4.53,
"learning_rate": 0.0004473684210526316,
"loss": 1.2764,
"step": 86
},
{
"epoch": 4.58,
"learning_rate": 0.0004526315789473684,
"loss": 1.2834,
"step": 87
},
{
"epoch": 4.63,
"learning_rate": 0.00045789473684210527,
"loss": 1.3713,
"step": 88
},
{
"epoch": 4.68,
"learning_rate": 0.0004631578947368422,
"loss": 1.5645,
"step": 89
},
{
"epoch": 4.74,
"learning_rate": 0.000468421052631579,
"loss": 1.3019,
"step": 90
},
{
"epoch": 4.79,
"learning_rate": 0.0004736842105263158,
"loss": 1.3534,
"step": 91
},
{
"epoch": 4.84,
"learning_rate": 0.0004789473684210527,
"loss": 1.4572,
"step": 92
},
{
"epoch": 4.89,
"learning_rate": 0.0004842105263157895,
"loss": 1.5372,
"step": 93
},
{
"epoch": 4.95,
"learning_rate": 0.0004894736842105264,
"loss": 1.4614,
"step": 94
},
{
"epoch": 5.0,
"learning_rate": 0.0004947368421052632,
"loss": 1.214,
"step": 95
},
{
"epoch": 5.0,
"eval_accuracy": 0.41025641025641024,
"eval_f1_macro": 0.21165501165501163,
"eval_f1_micro": 0.41025641025641024,
"eval_f1_weighted": 0.29765106688183607,
"eval_loss": 1.392277717590332,
"eval_precision_macro": 0.17407407407407408,
"eval_precision_micro": 0.41025641025641024,
"eval_precision_weighted": 0.24216524216524218,
"eval_recall_macro": 0.2866666666666667,
"eval_recall_micro": 0.41025641025641024,
"eval_recall_weighted": 0.41025641025641024,
"eval_runtime": 0.1067,
"eval_samples_per_second": 365.557,
"eval_steps_per_second": 28.12,
"step": 95
},
{
"epoch": 5.05,
"learning_rate": 0.0005,
"loss": 1.1431,
"step": 96
},
{
"epoch": 5.11,
"learning_rate": 0.0005052631578947368,
"loss": 1.1986,
"step": 97
},
{
"epoch": 5.16,
"learning_rate": 0.0005105263157894738,
"loss": 1.3605,
"step": 98
},
{
"epoch": 5.21,
"learning_rate": 0.0005157894736842106,
"loss": 1.4709,
"step": 99
},
{
"epoch": 5.26,
"learning_rate": 0.0005210526315789474,
"loss": 1.4103,
"step": 100
},
{
"epoch": 5.32,
"learning_rate": 0.0005263157894736842,
"loss": 1.5161,
"step": 101
},
{
"epoch": 5.37,
"learning_rate": 0.000531578947368421,
"loss": 1.4339,
"step": 102
},
{
"epoch": 5.42,
"learning_rate": 0.0005368421052631579,
"loss": 1.2471,
"step": 103
},
{
"epoch": 5.47,
"learning_rate": 0.0005421052631578948,
"loss": 1.3335,
"step": 104
},
{
"epoch": 5.53,
"learning_rate": 0.0005473684210526316,
"loss": 1.1644,
"step": 105
},
{
"epoch": 5.58,
"learning_rate": 0.0005526315789473684,
"loss": 1.5695,
"step": 106
},
{
"epoch": 5.63,
"learning_rate": 0.0005578947368421052,
"loss": 1.099,
"step": 107
},
{
"epoch": 5.68,
"learning_rate": 0.0005631578947368421,
"loss": 1.3556,
"step": 108
},
{
"epoch": 5.74,
"learning_rate": 0.000568421052631579,
"loss": 1.4156,
"step": 109
},
{
"epoch": 5.79,
"learning_rate": 0.0005736842105263159,
"loss": 1.3777,
"step": 110
},
{
"epoch": 5.84,
"learning_rate": 0.0005789473684210527,
"loss": 1.8594,
"step": 111
},
{
"epoch": 5.89,
"learning_rate": 0.0005842105263157895,
"loss": 0.995,
"step": 112
},
{
"epoch": 5.95,
"learning_rate": 0.0005894736842105264,
"loss": 1.2018,
"step": 113
},
{
"epoch": 6.0,
"learning_rate": 0.0005947368421052632,
"loss": 0.9971,
"step": 114
},
{
"epoch": 6.0,
"eval_accuracy": 0.48717948717948717,
"eval_f1_macro": 0.33415204678362576,
"eval_f1_micro": 0.48717948717948717,
"eval_f1_weighted": 0.4098065677013045,
"eval_loss": 1.2535957098007202,
"eval_precision_macro": 0.35025641025641024,
"eval_precision_micro": 0.48717948717948717,
"eval_precision_weighted": 0.4003944773175543,
"eval_recall_macro": 0.3733333333333333,
"eval_recall_micro": 0.48717948717948717,
"eval_recall_weighted": 0.48717948717948717,
"eval_runtime": 0.1096,
"eval_samples_per_second": 355.926,
"eval_steps_per_second": 27.379,
"step": 114
},
{
"epoch": 6.05,
"learning_rate": 0.0006000000000000001,
"loss": 1.1295,
"step": 115
},
{
"epoch": 6.11,
"learning_rate": 0.0006052631578947369,
"loss": 1.4946,
"step": 116
},
{
"epoch": 6.16,
"learning_rate": 0.0006105263157894737,
"loss": 1.8178,
"step": 117
},
{
"epoch": 6.21,
"learning_rate": 0.0006157894736842106,
"loss": 1.1574,
"step": 118
},
{
"epoch": 6.26,
"learning_rate": 0.0006210526315789474,
"loss": 0.9329,
"step": 119
},
{
"epoch": 6.32,
"learning_rate": 0.0006263157894736842,
"loss": 1.2374,
"step": 120
},
{
"epoch": 6.37,
"learning_rate": 0.0006315789473684211,
"loss": 1.5585,
"step": 121
},
{
"epoch": 6.42,
"learning_rate": 0.0006368421052631579,
"loss": 1.0872,
"step": 122
},
{
"epoch": 6.47,
"learning_rate": 0.0006421052631578948,
"loss": 0.9432,
"step": 123
},
{
"epoch": 6.53,
"learning_rate": 0.0006473684210526316,
"loss": 1.2214,
"step": 124
},
{
"epoch": 6.58,
"learning_rate": 0.0006526315789473684,
"loss": 1.0156,
"step": 125
},
{
"epoch": 6.63,
"learning_rate": 0.0006578947368421052,
"loss": 1.2343,
"step": 126
},
{
"epoch": 6.68,
"learning_rate": 0.0006631578947368421,
"loss": 1.2906,
"step": 127
},
{
"epoch": 6.74,
"learning_rate": 0.000668421052631579,
"loss": 1.5074,
"step": 128
},
{
"epoch": 6.79,
"learning_rate": 0.0006736842105263159,
"loss": 1.0378,
"step": 129
},
{
"epoch": 6.84,
"learning_rate": 0.0006789473684210527,
"loss": 1.3764,
"step": 130
},
{
"epoch": 6.89,
"learning_rate": 0.0006842105263157895,
"loss": 0.9798,
"step": 131
},
{
"epoch": 6.95,
"learning_rate": 0.0006894736842105264,
"loss": 1.0204,
"step": 132
},
{
"epoch": 7.0,
"learning_rate": 0.0006947368421052632,
"loss": 1.0225,
"step": 133
},
{
"epoch": 7.0,
"eval_accuracy": 0.6923076923076923,
"eval_f1_macro": 0.5685314685314686,
"eval_f1_micro": 0.6923076923076923,
"eval_f1_weighted": 0.6504691889307274,
"eval_loss": 1.1684703826904297,
"eval_precision_macro": 0.5473015873015872,
"eval_precision_micro": 0.6923076923076923,
"eval_precision_weighted": 0.6344322344322344,
"eval_recall_macro": 0.6142857142857142,
"eval_recall_micro": 0.6923076923076923,
"eval_recall_weighted": 0.6923076923076923,
"eval_runtime": 0.0989,
"eval_samples_per_second": 394.464,
"eval_steps_per_second": 30.343,
"step": 133
},
{
"epoch": 7.05,
"learning_rate": 0.0007000000000000001,
"loss": 0.8319,
"step": 134
},
{
"epoch": 7.11,
"learning_rate": 0.0007052631578947369,
"loss": 1.0276,
"step": 135
},
{
"epoch": 7.16,
"learning_rate": 0.0007105263157894737,
"loss": 0.9607,
"step": 136
},
{
"epoch": 7.21,
"learning_rate": 0.0007157894736842105,
"loss": 1.2387,
"step": 137
},
{
"epoch": 7.26,
"learning_rate": 0.0007210526315789474,
"loss": 1.2796,
"step": 138
},
{
"epoch": 7.32,
"learning_rate": 0.0007263157894736843,
"loss": 0.7215,
"step": 139
},
{
"epoch": 7.37,
"learning_rate": 0.0007315789473684211,
"loss": 1.0518,
"step": 140
},
{
"epoch": 7.42,
"learning_rate": 0.0007368421052631579,
"loss": 0.7813,
"step": 141
},
{
"epoch": 7.47,
"learning_rate": 0.0007421052631578947,
"loss": 1.0502,
"step": 142
},
{
"epoch": 7.53,
"learning_rate": 0.0007473684210526316,
"loss": 1.378,
"step": 143
},
{
"epoch": 7.58,
"learning_rate": 0.0007526315789473685,
"loss": 1.2363,
"step": 144
},
{
"epoch": 7.63,
"learning_rate": 0.0007526315789473685,
"loss": 1.4102,
"step": 145
},
{
"epoch": 7.68,
"learning_rate": 0.0007578947368421053,
"loss": 0.8184,
"step": 146
},
{
"epoch": 7.74,
"learning_rate": 0.0007631578947368421,
"loss": 1.0921,
"step": 147
},
{
"epoch": 7.79,
"learning_rate": 0.0007684210526315789,
"loss": 0.7126,
"step": 148
},
{
"epoch": 7.84,
"learning_rate": 0.0007736842105263159,
"loss": 1.0646,
"step": 149
},
{
"epoch": 7.89,
"learning_rate": 0.0007789473684210527,
"loss": 0.9532,
"step": 150
},
{
"epoch": 7.95,
"learning_rate": 0.0007842105263157896,
"loss": 0.7724,
"step": 151
},
{
"epoch": 8.0,
"learning_rate": 0.0007894736842105264,
"loss": 0.8753,
"step": 152
},
{
"epoch": 8.0,
"eval_accuracy": 0.717948717948718,
"eval_f1_macro": 0.5992857142857144,
"eval_f1_micro": 0.717948717948718,
"eval_f1_weighted": 0.6808608058608059,
"eval_loss": 1.0412697792053223,
"eval_precision_macro": 0.5655555555555555,
"eval_precision_micro": 0.717948717948718,
"eval_precision_weighted": 0.6524216524216524,
"eval_recall_macro": 0.6433333333333333,
"eval_recall_micro": 0.717948717948718,
"eval_recall_weighted": 0.717948717948718,
"eval_runtime": 0.1034,
"eval_samples_per_second": 377.218,
"eval_steps_per_second": 29.017,
"step": 152
},
{
"epoch": 8.05,
"learning_rate": 0.0007947368421052632,
"loss": 0.6541,
"step": 153
},
{
"epoch": 8.11,
"learning_rate": 0.0008,
"loss": 1.2863,
"step": 154
},
{
"epoch": 8.16,
"learning_rate": 0.0008052631578947369,
"loss": 1.0602,
"step": 155
},
{
"epoch": 8.21,
"learning_rate": 0.0008105263157894738,
"loss": 1.2225,
"step": 156
},
{
"epoch": 8.26,
"learning_rate": 0.0008157894736842106,
"loss": 0.9046,
"step": 157
},
{
"epoch": 8.32,
"learning_rate": 0.0008210526315789474,
"loss": 0.6939,
"step": 158
},
{
"epoch": 8.37,
"learning_rate": 0.0008263157894736842,
"loss": 0.644,
"step": 159
},
{
"epoch": 8.42,
"learning_rate": 0.000831578947368421,
"loss": 0.9403,
"step": 160
},
{
"epoch": 8.47,
"learning_rate": 0.000836842105263158,
"loss": 0.5107,
"step": 161
},
{
"epoch": 8.53,
"learning_rate": 0.0008421052631578948,
"loss": 1.0886,
"step": 162
},
{
"epoch": 8.58,
"learning_rate": 0.0008473684210526316,
"loss": 1.3385,
"step": 163
},
{
"epoch": 8.63,
"learning_rate": 0.0008526315789473684,
"loss": 1.2766,
"step": 164
},
{
"epoch": 8.68,
"learning_rate": 0.0008578947368421052,
"loss": 0.6848,
"step": 165
},
{
"epoch": 8.74,
"learning_rate": 0.0008631578947368422,
"loss": 0.9881,
"step": 166
},
{
"epoch": 8.79,
"learning_rate": 0.000868421052631579,
"loss": 0.6309,
"step": 167
},
{
"epoch": 8.84,
"learning_rate": 0.0008736842105263158,
"loss": 0.7585,
"step": 168
},
{
"epoch": 8.89,
"learning_rate": 0.0008789473684210526,
"loss": 0.6653,
"step": 169
},
{
"epoch": 8.95,
"learning_rate": 0.0008842105263157894,
"loss": 1.0764,
"step": 170
},
{
"epoch": 9.0,
"learning_rate": 0.0008894736842105263,
"loss": 0.6241,
"step": 171
},
{
"epoch": 9.0,
"eval_accuracy": 0.7435897435897436,
"eval_f1_macro": 0.6221158958001063,
"eval_f1_micro": 0.7435897435897437,
"eval_f1_weighted": 0.7144862934336619,
"eval_loss": 0.9631038308143616,
"eval_precision_macro": 0.6077777777777779,
"eval_precision_micro": 0.7435897435897436,
"eval_precision_weighted": 0.7235042735042736,
"eval_recall_macro": 0.6766666666666666,
"eval_recall_micro": 0.7435897435897436,
"eval_recall_weighted": 0.7435897435897436,
"eval_runtime": 0.1019,
"eval_samples_per_second": 382.713,
"eval_steps_per_second": 29.439,
"step": 171
},
{
"epoch": 9.05,
"learning_rate": 0.0008947368421052632,
"loss": 0.6013,
"step": 172
},
{
"epoch": 9.11,
"learning_rate": 0.0009,
"loss": 0.8411,
"step": 173
},
{
"epoch": 9.16,
"learning_rate": 0.0009052631578947368,
"loss": 0.9775,
"step": 174
},
{
"epoch": 9.21,
"learning_rate": 0.0009105263157894736,
"loss": 0.9363,
"step": 175
},
{
"epoch": 9.26,
"learning_rate": 0.0009157894736842105,
"loss": 0.348,
"step": 176
},
{
"epoch": 9.32,
"learning_rate": 0.0009210526315789473,
"loss": 0.7705,
"step": 177
},
{
"epoch": 9.37,
"learning_rate": 0.0009263157894736844,
"loss": 0.7397,
"step": 178
},
{
"epoch": 9.42,
"learning_rate": 0.0009315789473684212,
"loss": 0.3971,
"step": 179
},
{
"epoch": 9.47,
"learning_rate": 0.000936842105263158,
"loss": 1.4934,
"step": 180
},
{
"epoch": 9.53,
"learning_rate": 0.0009421052631578948,
"loss": 1.2191,
"step": 181
},
{
"epoch": 9.58,
"learning_rate": 0.0009473684210526316,
"loss": 1.3495,
"step": 182
},
{
"epoch": 9.63,
"learning_rate": 0.0009526315789473686,
"loss": 1.2079,
"step": 183
},
{
"epoch": 9.68,
"learning_rate": 0.0009578947368421054,
"loss": 0.9686,
"step": 184
},
{
"epoch": 9.74,
"learning_rate": 0.0009631578947368422,
"loss": 1.6675,
"step": 185
},
{
"epoch": 9.79,
"learning_rate": 0.000968421052631579,
"loss": 0.7272,
"step": 186
},
{
"epoch": 9.84,
"learning_rate": 0.0009736842105263158,
"loss": 0.563,
"step": 187
},
{
"epoch": 9.89,
"learning_rate": 0.0009789473684210528,
"loss": 1.2433,
"step": 188
},
{
"epoch": 9.95,
"learning_rate": 0.0009842105263157895,
"loss": 1.353,
"step": 189
},
{
"epoch": 10.0,
"learning_rate": 0.0009894736842105264,
"loss": 0.5994,
"step": 190
},
{
"epoch": 10.0,
"eval_accuracy": 0.717948717948718,
"eval_f1_macro": 0.608552036199095,
"eval_f1_micro": 0.717948717948718,
"eval_f1_weighted": 0.6738484743009631,
"eval_loss": 0.8072579503059387,
"eval_precision_macro": 0.5864285714285714,
"eval_precision_micro": 0.717948717948718,
"eval_precision_weighted": 0.6695970695970695,
"eval_recall_macro": 0.6633333333333333,
"eval_recall_micro": 0.717948717948718,
"eval_recall_weighted": 0.717948717948718,
"eval_runtime": 0.1002,
"eval_samples_per_second": 389.316,
"eval_steps_per_second": 29.947,
"step": 190
}
],
"logging_steps": 1,
"max_steps": 95000,
"num_input_tokens_seen": 0,
"num_train_epochs": 5000,
"save_steps": 500,
"total_flos": 3.228625190633472e+16,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}