umberto-uncased-covid-sentiment / trainer_state.json
g8a9's picture
Upload folder using huggingface_hub
1ab8edf
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"eval_steps": 20,
"global_step": 300,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.08,
"learning_rate": 2e-05,
"loss": 0.9626,
"step": 5
},
{
"epoch": 0.17,
"learning_rate": 2e-05,
"loss": 0.7551,
"step": 10
},
{
"epoch": 0.25,
"learning_rate": 2e-05,
"loss": 0.6737,
"step": 15
},
{
"epoch": 0.33,
"learning_rate": 2e-05,
"loss": 0.5923,
"step": 20
},
{
"epoch": 0.33,
"eval_f1": 0.2973817897616256,
"eval_loss": 0.6075014472007751,
"eval_runtime": 2.0329,
"eval_samples_per_second": 464.848,
"eval_steps_per_second": 14.757,
"step": 20
},
{
"epoch": 0.42,
"learning_rate": 2e-05,
"loss": 0.5935,
"step": 25
},
{
"epoch": 0.5,
"learning_rate": 2e-05,
"loss": 0.6499,
"step": 30
},
{
"epoch": 0.58,
"learning_rate": 2e-05,
"loss": 0.5639,
"step": 35
},
{
"epoch": 0.67,
"learning_rate": 2e-05,
"loss": 0.6694,
"step": 40
},
{
"epoch": 0.67,
"eval_f1": 0.2973817897616256,
"eval_loss": 0.5937851071357727,
"eval_runtime": 2.0028,
"eval_samples_per_second": 471.842,
"eval_steps_per_second": 14.979,
"step": 40
},
{
"epoch": 0.75,
"learning_rate": 2e-05,
"loss": 0.5923,
"step": 45
},
{
"epoch": 0.83,
"learning_rate": 2e-05,
"loss": 0.6133,
"step": 50
},
{
"epoch": 0.92,
"learning_rate": 2e-05,
"loss": 0.5702,
"step": 55
},
{
"epoch": 1.0,
"learning_rate": 2e-05,
"loss": 0.5225,
"step": 60
},
{
"epoch": 1.0,
"eval_f1": 0.3024223872822884,
"eval_loss": 0.534581184387207,
"eval_runtime": 2.0063,
"eval_samples_per_second": 471.005,
"eval_steps_per_second": 14.953,
"step": 60
},
{
"epoch": 1.08,
"learning_rate": 2e-05,
"loss": 0.4961,
"step": 65
},
{
"epoch": 1.17,
"learning_rate": 2e-05,
"loss": 0.4879,
"step": 70
},
{
"epoch": 1.25,
"learning_rate": 2e-05,
"loss": 0.5475,
"step": 75
},
{
"epoch": 1.33,
"learning_rate": 2e-05,
"loss": 0.5458,
"step": 80
},
{
"epoch": 1.33,
"eval_f1": 0.4681950604946577,
"eval_loss": 0.5171502828598022,
"eval_runtime": 2.0079,
"eval_samples_per_second": 470.651,
"eval_steps_per_second": 14.941,
"step": 80
},
{
"epoch": 1.42,
"learning_rate": 2e-05,
"loss": 0.5224,
"step": 85
},
{
"epoch": 1.5,
"learning_rate": 2e-05,
"loss": 0.5344,
"step": 90
},
{
"epoch": 1.58,
"learning_rate": 2e-05,
"loss": 0.4791,
"step": 95
},
{
"epoch": 1.67,
"learning_rate": 2e-05,
"loss": 0.5052,
"step": 100
},
{
"epoch": 1.67,
"eval_f1": 0.43614802647951817,
"eval_loss": 0.5550346970558167,
"eval_runtime": 2.0143,
"eval_samples_per_second": 469.148,
"eval_steps_per_second": 14.894,
"step": 100
},
{
"epoch": 1.75,
"learning_rate": 2e-05,
"loss": 0.5821,
"step": 105
},
{
"epoch": 1.83,
"learning_rate": 2e-05,
"loss": 0.4842,
"step": 110
},
{
"epoch": 1.92,
"learning_rate": 2e-05,
"loss": 0.4613,
"step": 115
},
{
"epoch": 2.0,
"learning_rate": 2e-05,
"loss": 0.3689,
"step": 120
},
{
"epoch": 2.0,
"eval_f1": 0.4394838882203503,
"eval_loss": 0.5187910795211792,
"eval_runtime": 2.0147,
"eval_samples_per_second": 469.048,
"eval_steps_per_second": 14.89,
"step": 120
},
{
"epoch": 2.08,
"learning_rate": 2e-05,
"loss": 0.4172,
"step": 125
},
{
"epoch": 2.17,
"learning_rate": 2e-05,
"loss": 0.4645,
"step": 130
},
{
"epoch": 2.25,
"learning_rate": 2e-05,
"loss": 0.4628,
"step": 135
},
{
"epoch": 2.33,
"learning_rate": 2e-05,
"loss": 0.4097,
"step": 140
},
{
"epoch": 2.33,
"eval_f1": 0.4676343432076015,
"eval_loss": 0.4918379485607147,
"eval_runtime": 2.021,
"eval_samples_per_second": 467.58,
"eval_steps_per_second": 14.844,
"step": 140
},
{
"epoch": 2.42,
"learning_rate": 2e-05,
"loss": 0.4716,
"step": 145
},
{
"epoch": 2.5,
"learning_rate": 2e-05,
"loss": 0.4947,
"step": 150
},
{
"epoch": 2.58,
"learning_rate": 2e-05,
"loss": 0.4288,
"step": 155
},
{
"epoch": 2.67,
"learning_rate": 2e-05,
"loss": 0.4904,
"step": 160
},
{
"epoch": 2.67,
"eval_f1": 0.4640191084468284,
"eval_loss": 0.4755867123603821,
"eval_runtime": 2.0186,
"eval_samples_per_second": 468.144,
"eval_steps_per_second": 14.862,
"step": 160
},
{
"epoch": 2.75,
"learning_rate": 2e-05,
"loss": 0.4391,
"step": 165
},
{
"epoch": 2.83,
"learning_rate": 2e-05,
"loss": 0.408,
"step": 170
},
{
"epoch": 2.92,
"learning_rate": 2e-05,
"loss": 0.4724,
"step": 175
},
{
"epoch": 3.0,
"learning_rate": 2e-05,
"loss": 0.3696,
"step": 180
},
{
"epoch": 3.0,
"eval_f1": 0.46883968839688395,
"eval_loss": 0.47884294390678406,
"eval_runtime": 2.0223,
"eval_samples_per_second": 467.285,
"eval_steps_per_second": 14.834,
"step": 180
},
{
"epoch": 3.08,
"learning_rate": 2e-05,
"loss": 0.4094,
"step": 185
},
{
"epoch": 3.17,
"learning_rate": 2e-05,
"loss": 0.4162,
"step": 190
},
{
"epoch": 3.25,
"learning_rate": 2e-05,
"loss": 0.3975,
"step": 195
},
{
"epoch": 3.33,
"learning_rate": 2e-05,
"loss": 0.3631,
"step": 200
},
{
"epoch": 3.33,
"eval_f1": 0.4735017608176137,
"eval_loss": 0.4751051366329193,
"eval_runtime": 2.0238,
"eval_samples_per_second": 466.941,
"eval_steps_per_second": 14.824,
"step": 200
},
{
"epoch": 3.42,
"learning_rate": 2e-05,
"loss": 0.3953,
"step": 205
},
{
"epoch": 3.5,
"learning_rate": 2e-05,
"loss": 0.3781,
"step": 210
},
{
"epoch": 3.58,
"learning_rate": 2e-05,
"loss": 0.4447,
"step": 215
},
{
"epoch": 3.67,
"learning_rate": 2e-05,
"loss": 0.4345,
"step": 220
},
{
"epoch": 3.67,
"eval_f1": 0.4796518931606489,
"eval_loss": 0.4609261453151703,
"eval_runtime": 2.0228,
"eval_samples_per_second": 467.18,
"eval_steps_per_second": 14.831,
"step": 220
},
{
"epoch": 3.75,
"learning_rate": 2e-05,
"loss": 0.3724,
"step": 225
},
{
"epoch": 3.83,
"learning_rate": 2e-05,
"loss": 0.366,
"step": 230
},
{
"epoch": 3.92,
"learning_rate": 2e-05,
"loss": 0.3763,
"step": 235
},
{
"epoch": 4.0,
"learning_rate": 2e-05,
"loss": 0.3398,
"step": 240
},
{
"epoch": 4.0,
"eval_f1": 0.5672850456534463,
"eval_loss": 0.46373221278190613,
"eval_runtime": 2.0191,
"eval_samples_per_second": 468.022,
"eval_steps_per_second": 14.858,
"step": 240
},
{
"epoch": 4.08,
"learning_rate": 2e-05,
"loss": 0.3861,
"step": 245
},
{
"epoch": 4.17,
"learning_rate": 2e-05,
"loss": 0.315,
"step": 250
},
{
"epoch": 4.25,
"learning_rate": 2e-05,
"loss": 0.3501,
"step": 255
},
{
"epoch": 4.33,
"learning_rate": 2e-05,
"loss": 0.2985,
"step": 260
},
{
"epoch": 4.33,
"eval_f1": 0.5091622097836045,
"eval_loss": 0.4849531948566437,
"eval_runtime": 2.0286,
"eval_samples_per_second": 465.831,
"eval_steps_per_second": 14.788,
"step": 260
},
{
"epoch": 4.42,
"learning_rate": 2e-05,
"loss": 0.3734,
"step": 265
},
{
"epoch": 4.5,
"learning_rate": 2e-05,
"loss": 0.3658,
"step": 270
},
{
"epoch": 4.58,
"learning_rate": 2e-05,
"loss": 0.3268,
"step": 275
},
{
"epoch": 4.67,
"learning_rate": 2e-05,
"loss": 0.3248,
"step": 280
},
{
"epoch": 4.67,
"eval_f1": 0.6056820231448513,
"eval_loss": 0.508940577507019,
"eval_runtime": 2.0241,
"eval_samples_per_second": 466.871,
"eval_steps_per_second": 14.821,
"step": 280
},
{
"epoch": 4.75,
"learning_rate": 2e-05,
"loss": 0.3573,
"step": 285
},
{
"epoch": 4.83,
"learning_rate": 2e-05,
"loss": 0.3298,
"step": 290
},
{
"epoch": 4.92,
"learning_rate": 2e-05,
"loss": 0.2909,
"step": 295
},
{
"epoch": 5.0,
"learning_rate": 2e-05,
"loss": 0.3323,
"step": 300
},
{
"epoch": 5.0,
"eval_f1": 0.6159893659893659,
"eval_loss": 0.46433570981025696,
"eval_runtime": 2.0165,
"eval_samples_per_second": 468.623,
"eval_steps_per_second": 14.877,
"step": 300
},
{
"epoch": 5.0,
"step": 300,
"total_flos": 9950949458472960.0,
"train_loss": 0.4641021112600962,
"train_runtime": 274.9964,
"train_samples_per_second": 137.529,
"train_steps_per_second": 1.091
}
],
"logging_steps": 5,
"max_steps": 300,
"num_train_epochs": 5,
"save_steps": 20,
"total_flos": 9950949458472960.0,
"trial_name": null,
"trial_params": null
}