opus-em-deberta-3-large-v2 / trainer_state.json
Kerem P
End of training
8c429e8
raw
history blame
No virus
10 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 9.977715877437326,
"eval_steps": 500,
"global_step": 1790,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.11,
"learning_rate": 2e-05,
"loss": 95.476,
"step": 20
},
{
"epoch": 0.22,
"learning_rate": 2e-05,
"loss": 48.4218,
"step": 40
},
{
"epoch": 0.33,
"learning_rate": 2e-05,
"loss": 13.7763,
"step": 60
},
{
"epoch": 0.45,
"learning_rate": 2e-05,
"loss": 1.648,
"step": 80
},
{
"epoch": 0.56,
"learning_rate": 2e-05,
"loss": 1.4463,
"step": 100
},
{
"epoch": 0.67,
"learning_rate": 2e-05,
"loss": 0.9103,
"step": 120
},
{
"epoch": 0.78,
"learning_rate": 2e-05,
"loss": 0.6693,
"step": 140
},
{
"epoch": 0.89,
"learning_rate": 2e-05,
"loss": 0.8188,
"step": 160
},
{
"epoch": 1.0,
"learning_rate": 2e-05,
"loss": 0.7446,
"step": 180
},
{
"epoch": 1.11,
"learning_rate": 2e-05,
"loss": 0.6158,
"step": 200
},
{
"epoch": 1.23,
"learning_rate": 2e-05,
"loss": 0.9035,
"step": 220
},
{
"epoch": 1.34,
"learning_rate": 2e-05,
"loss": 0.9486,
"step": 240
},
{
"epoch": 1.45,
"learning_rate": 2e-05,
"loss": 0.7198,
"step": 260
},
{
"epoch": 1.56,
"learning_rate": 2e-05,
"loss": 1.5337,
"step": 280
},
{
"epoch": 1.67,
"learning_rate": 2e-05,
"loss": 1.3312,
"step": 300
},
{
"epoch": 1.78,
"learning_rate": 2e-05,
"loss": 0.8632,
"step": 320
},
{
"epoch": 1.89,
"learning_rate": 2e-05,
"loss": 0.8279,
"step": 340
},
{
"epoch": 2.01,
"learning_rate": 2e-05,
"loss": 0.9823,
"step": 360
},
{
"epoch": 2.12,
"learning_rate": 2e-05,
"loss": 0.7963,
"step": 380
},
{
"epoch": 2.23,
"learning_rate": 2e-05,
"loss": 0.7861,
"step": 400
},
{
"epoch": 2.34,
"learning_rate": 2e-05,
"loss": 0.9859,
"step": 420
},
{
"epoch": 2.45,
"learning_rate": 2e-05,
"loss": 1.0639,
"step": 440
},
{
"epoch": 2.56,
"learning_rate": 2e-05,
"loss": 0.7174,
"step": 460
},
{
"epoch": 2.67,
"learning_rate": 2e-05,
"loss": 0.7996,
"step": 480
},
{
"epoch": 2.79,
"learning_rate": 2e-05,
"loss": 0.7602,
"step": 500
},
{
"epoch": 2.9,
"learning_rate": 2e-05,
"loss": 0.8562,
"step": 520
},
{
"epoch": 3.01,
"learning_rate": 2e-05,
"loss": 1.1248,
"step": 540
},
{
"epoch": 3.13,
"learning_rate": 2e-05,
"loss": 0.5689,
"step": 560
},
{
"epoch": 3.24,
"learning_rate": 2e-05,
"loss": 0.7794,
"step": 580
},
{
"epoch": 3.35,
"learning_rate": 2e-05,
"loss": 0.9319,
"step": 600
},
{
"epoch": 3.46,
"learning_rate": 2e-05,
"loss": 1.0026,
"step": 620
},
{
"epoch": 3.57,
"learning_rate": 2e-05,
"loss": 0.5143,
"step": 640
},
{
"epoch": 3.68,
"learning_rate": 2e-05,
"loss": 0.8727,
"step": 660
},
{
"epoch": 3.79,
"learning_rate": 2e-05,
"loss": 0.7792,
"step": 680
},
{
"epoch": 3.91,
"learning_rate": 2e-05,
"loss": 0.7502,
"step": 700
},
{
"epoch": 4.02,
"learning_rate": 2e-05,
"loss": 0.7851,
"step": 720
},
{
"epoch": 4.13,
"learning_rate": 2e-05,
"loss": 1.3474,
"step": 740
},
{
"epoch": 4.24,
"learning_rate": 2e-05,
"loss": 1.5022,
"step": 760
},
{
"epoch": 4.35,
"learning_rate": 2e-05,
"loss": 0.9111,
"step": 780
},
{
"epoch": 4.46,
"learning_rate": 2e-05,
"loss": 2.2783,
"step": 800
},
{
"epoch": 4.57,
"learning_rate": 2e-05,
"loss": 1.1359,
"step": 820
},
{
"epoch": 4.69,
"learning_rate": 2e-05,
"loss": 1.839,
"step": 840
},
{
"epoch": 4.8,
"learning_rate": 2e-05,
"loss": 0.7254,
"step": 860
},
{
"epoch": 4.91,
"learning_rate": 2e-05,
"loss": 1.331,
"step": 880
},
{
"epoch": 5.02,
"learning_rate": 2e-05,
"loss": 0.6719,
"step": 900
},
{
"epoch": 5.13,
"learning_rate": 2e-05,
"loss": 0.8933,
"step": 920
},
{
"epoch": 5.24,
"learning_rate": 2e-05,
"loss": 0.7205,
"step": 940
},
{
"epoch": 5.35,
"learning_rate": 2e-05,
"loss": 0.7708,
"step": 960
},
{
"epoch": 5.47,
"learning_rate": 2e-05,
"loss": 0.8574,
"step": 980
},
{
"epoch": 5.58,
"learning_rate": 2e-05,
"loss": 0.7874,
"step": 1000
},
{
"epoch": 5.69,
"learning_rate": 2e-05,
"loss": 0.7614,
"step": 1020
},
{
"epoch": 5.8,
"learning_rate": 2e-05,
"loss": 0.788,
"step": 1040
},
{
"epoch": 5.91,
"learning_rate": 2e-05,
"loss": 0.5983,
"step": 1060
},
{
"epoch": 6.02,
"learning_rate": 2e-05,
"loss": 0.7172,
"step": 1080
},
{
"epoch": 6.13,
"learning_rate": 2e-05,
"loss": 0.7758,
"step": 1100
},
{
"epoch": 6.25,
"learning_rate": 2e-05,
"loss": 0.7793,
"step": 1120
},
{
"epoch": 6.36,
"learning_rate": 2e-05,
"loss": 0.958,
"step": 1140
},
{
"epoch": 6.47,
"learning_rate": 2e-05,
"loss": 0.9366,
"step": 1160
},
{
"epoch": 6.58,
"learning_rate": 2e-05,
"loss": 0.876,
"step": 1180
},
{
"epoch": 6.69,
"learning_rate": 2e-05,
"loss": 0.9187,
"step": 1200
},
{
"epoch": 6.8,
"learning_rate": 2e-05,
"loss": 0.6457,
"step": 1220
},
{
"epoch": 6.91,
"learning_rate": 2e-05,
"loss": 0.6107,
"step": 1240
},
{
"epoch": 7.03,
"learning_rate": 2e-05,
"loss": 1.2681,
"step": 1260
},
{
"epoch": 7.14,
"learning_rate": 2e-05,
"loss": 0.824,
"step": 1280
},
{
"epoch": 7.25,
"learning_rate": 2e-05,
"loss": 0.7997,
"step": 1300
},
{
"epoch": 7.36,
"learning_rate": 2e-05,
"loss": 0.6423,
"step": 1320
},
{
"epoch": 7.47,
"learning_rate": 2e-05,
"loss": 0.7917,
"step": 1340
},
{
"epoch": 7.58,
"learning_rate": 2e-05,
"loss": 0.6285,
"step": 1360
},
{
"epoch": 7.69,
"learning_rate": 2e-05,
"loss": 0.7219,
"step": 1380
},
{
"epoch": 7.81,
"learning_rate": 2e-05,
"loss": 1.3192,
"step": 1400
},
{
"epoch": 7.92,
"learning_rate": 2e-05,
"loss": 0.9599,
"step": 1420
},
{
"epoch": 8.03,
"learning_rate": 2e-05,
"loss": 1.0558,
"step": 1440
},
{
"epoch": 8.14,
"learning_rate": 2e-05,
"loss": 0.829,
"step": 1460
},
{
"epoch": 8.25,
"learning_rate": 2e-05,
"loss": 0.6611,
"step": 1480
},
{
"epoch": 8.36,
"learning_rate": 2e-05,
"loss": 0.9619,
"step": 1500
},
{
"epoch": 8.47,
"learning_rate": 2e-05,
"loss": 0.838,
"step": 1520
},
{
"epoch": 8.58,
"learning_rate": 2e-05,
"loss": 0.6923,
"step": 1540
},
{
"epoch": 8.7,
"learning_rate": 2e-05,
"loss": 0.7675,
"step": 1560
},
{
"epoch": 8.81,
"learning_rate": 2e-05,
"loss": 1.038,
"step": 1580
},
{
"epoch": 8.92,
"learning_rate": 2e-05,
"loss": 0.5607,
"step": 1600
},
{
"epoch": 9.03,
"learning_rate": 2e-05,
"loss": 0.8019,
"step": 1620
},
{
"epoch": 9.14,
"learning_rate": 2e-05,
"loss": 0.7824,
"step": 1640
},
{
"epoch": 9.25,
"learning_rate": 2e-05,
"loss": 0.55,
"step": 1660
},
{
"epoch": 9.36,
"learning_rate": 2e-05,
"loss": 0.625,
"step": 1680
},
{
"epoch": 9.48,
"learning_rate": 2e-05,
"loss": 0.8339,
"step": 1700
},
{
"epoch": 9.59,
"learning_rate": 2e-05,
"loss": 0.5907,
"step": 1720
},
{
"epoch": 9.7,
"learning_rate": 2e-05,
"loss": 0.9135,
"step": 1740
},
{
"epoch": 9.81,
"learning_rate": 2e-05,
"loss": 1.3289,
"step": 1760
},
{
"epoch": 9.92,
"learning_rate": 2e-05,
"loss": 1.1937,
"step": 1780
},
{
"epoch": 9.98,
"step": 1790,
"total_flos": 4485735925153792.0,
"train_loss": 0.6437026796394221,
"train_runtime": 3731.8169,
"train_samples_per_second": 15.389,
"train_steps_per_second": 0.48
}
],
"logging_steps": 20,
"max_steps": 1790,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 4485735925153792.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}