model_005_2000 / trainer_state.json
arthoho66's picture
Upload 20 files
8ef1f34
{
"best_metric": 102.0,
"best_model_checkpoint": "./model_005/checkpoint-2000",
"epoch": 100.0,
"global_step": 2000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.25,
"learning_rate": 4.4e-08,
"loss": 0.8082,
"step": 25
},
{
"epoch": 2.5,
"learning_rate": 9.4e-08,
"loss": 0.8811,
"step": 50
},
{
"epoch": 3.75,
"learning_rate": 1.44e-07,
"loss": 0.7267,
"step": 75
},
{
"epoch": 5.0,
"learning_rate": 1.94e-07,
"loss": 0.7555,
"step": 100
},
{
"epoch": 6.25,
"learning_rate": 2.4399999999999996e-07,
"loss": 0.6289,
"step": 125
},
{
"epoch": 7.5,
"learning_rate": 2.9399999999999996e-07,
"loss": 0.6541,
"step": 150
},
{
"epoch": 8.75,
"learning_rate": 3.4399999999999996e-07,
"loss": 0.6237,
"step": 175
},
{
"epoch": 10.0,
"learning_rate": 3.94e-07,
"loss": 0.6355,
"step": 200
},
{
"epoch": 11.25,
"learning_rate": 4.44e-07,
"loss": 0.5445,
"step": 225
},
{
"epoch": 12.5,
"learning_rate": 4.94e-07,
"loss": 0.5228,
"step": 250
},
{
"epoch": 13.75,
"learning_rate": 5.44e-07,
"loss": 0.6126,
"step": 275
},
{
"epoch": 15.0,
"learning_rate": 5.939999999999999e-07,
"loss": 0.5067,
"step": 300
},
{
"epoch": 16.25,
"learning_rate": 6.44e-07,
"loss": 0.4905,
"step": 325
},
{
"epoch": 17.5,
"learning_rate": 6.939999999999999e-07,
"loss": 0.46,
"step": 350
},
{
"epoch": 18.75,
"learning_rate": 7.44e-07,
"loss": 0.4899,
"step": 375
},
{
"epoch": 20.0,
"learning_rate": 7.94e-07,
"loss": 0.4079,
"step": 400
},
{
"epoch": 21.25,
"learning_rate": 8.439999999999999e-07,
"loss": 0.4024,
"step": 425
},
{
"epoch": 22.5,
"learning_rate": 8.939999999999999e-07,
"loss": 0.3725,
"step": 450
},
{
"epoch": 23.75,
"learning_rate": 9.439999999999999e-07,
"loss": 0.3226,
"step": 475
},
{
"epoch": 25.0,
"learning_rate": 9.94e-07,
"loss": 0.3195,
"step": 500
},
{
"epoch": 26.25,
"learning_rate": 9.959999999999999e-07,
"loss": 0.3305,
"step": 525
},
{
"epoch": 27.5,
"learning_rate": 9.914545454545455e-07,
"loss": 0.2994,
"step": 550
},
{
"epoch": 28.75,
"learning_rate": 9.86909090909091e-07,
"loss": 0.2754,
"step": 575
},
{
"epoch": 30.0,
"learning_rate": 9.823636363636362e-07,
"loss": 0.2541,
"step": 600
},
{
"epoch": 31.25,
"learning_rate": 9.778181818181817e-07,
"loss": 0.2476,
"step": 625
},
{
"epoch": 32.5,
"learning_rate": 9.732727272727272e-07,
"loss": 0.1813,
"step": 650
},
{
"epoch": 33.75,
"learning_rate": 9.687272727272726e-07,
"loss": 0.1728,
"step": 675
},
{
"epoch": 35.0,
"learning_rate": 9.64181818181818e-07,
"loss": 0.2009,
"step": 700
},
{
"epoch": 36.25,
"learning_rate": 9.596363636363635e-07,
"loss": 0.1723,
"step": 725
},
{
"epoch": 37.5,
"learning_rate": 9.55090909090909e-07,
"loss": 0.1857,
"step": 750
},
{
"epoch": 38.75,
"learning_rate": 9.505454545454546e-07,
"loss": 0.1074,
"step": 775
},
{
"epoch": 40.0,
"learning_rate": 9.459999999999999e-07,
"loss": 0.1422,
"step": 800
},
{
"epoch": 41.25,
"learning_rate": 9.414545454545454e-07,
"loss": 0.1562,
"step": 825
},
{
"epoch": 42.5,
"learning_rate": 9.369090909090908e-07,
"loss": 0.0662,
"step": 850
},
{
"epoch": 43.75,
"learning_rate": 9.323636363636363e-07,
"loss": 0.0854,
"step": 875
},
{
"epoch": 45.0,
"learning_rate": 9.278181818181818e-07,
"loss": 0.1199,
"step": 900
},
{
"epoch": 46.25,
"learning_rate": 9.232727272727272e-07,
"loss": 0.0943,
"step": 925
},
{
"epoch": 47.5,
"learning_rate": 9.187272727272727e-07,
"loss": 0.0702,
"step": 950
},
{
"epoch": 48.75,
"learning_rate": 9.141818181818181e-07,
"loss": 0.0529,
"step": 975
},
{
"epoch": 50.0,
"learning_rate": 9.096363636363636e-07,
"loss": 0.1109,
"step": 1000
},
{
"epoch": 50.0,
"eval_loss": 0.2612619996070862,
"eval_runtime": 10.5374,
"eval_samples_per_second": 2.183,
"eval_steps_per_second": 0.285,
"eval_wer": 103.49999999999999,
"step": 1000
},
{
"epoch": 51.25,
"learning_rate": 9.050909090909091e-07,
"loss": 0.1209,
"step": 1025
},
{
"epoch": 52.5,
"learning_rate": 9.005454545454545e-07,
"loss": 0.0639,
"step": 1050
},
{
"epoch": 53.75,
"learning_rate": 8.96e-07,
"loss": 0.0627,
"step": 1075
},
{
"epoch": 55.0,
"learning_rate": 8.914545454545454e-07,
"loss": 0.0265,
"step": 1100
},
{
"epoch": 56.25,
"learning_rate": 8.869090909090908e-07,
"loss": 0.0695,
"step": 1125
},
{
"epoch": 57.5,
"learning_rate": 8.823636363636364e-07,
"loss": 0.0396,
"step": 1150
},
{
"epoch": 58.75,
"learning_rate": 8.778181818181818e-07,
"loss": 0.0206,
"step": 1175
},
{
"epoch": 60.0,
"learning_rate": 8.732727272727273e-07,
"loss": 0.0479,
"step": 1200
},
{
"epoch": 61.25,
"learning_rate": 8.687272727272726e-07,
"loss": 0.0663,
"step": 1225
},
{
"epoch": 62.5,
"learning_rate": 8.641818181818181e-07,
"loss": 0.0417,
"step": 1250
},
{
"epoch": 63.75,
"learning_rate": 8.596363636363637e-07,
"loss": 0.0286,
"step": 1275
},
{
"epoch": 65.0,
"learning_rate": 8.550909090909091e-07,
"loss": 0.0902,
"step": 1300
},
{
"epoch": 66.25,
"learning_rate": 8.505454545454545e-07,
"loss": 0.1234,
"step": 1325
},
{
"epoch": 67.5,
"learning_rate": 8.459999999999999e-07,
"loss": 0.0291,
"step": 1350
},
{
"epoch": 68.75,
"learning_rate": 8.414545454545454e-07,
"loss": 0.042,
"step": 1375
},
{
"epoch": 70.0,
"learning_rate": 8.36909090909091e-07,
"loss": 0.0828,
"step": 1400
},
{
"epoch": 71.25,
"learning_rate": 8.323636363636364e-07,
"loss": 0.0753,
"step": 1425
},
{
"epoch": 72.5,
"learning_rate": 8.278181818181818e-07,
"loss": 0.0356,
"step": 1450
},
{
"epoch": 73.75,
"learning_rate": 8.232727272727272e-07,
"loss": 0.0671,
"step": 1475
},
{
"epoch": 75.0,
"learning_rate": 8.187272727272727e-07,
"loss": 0.0507,
"step": 1500
},
{
"epoch": 76.25,
"learning_rate": 8.141818181818181e-07,
"loss": 0.0764,
"step": 1525
},
{
"epoch": 77.5,
"learning_rate": 8.096363636363636e-07,
"loss": 0.0204,
"step": 1550
},
{
"epoch": 78.75,
"learning_rate": 8.050909090909091e-07,
"loss": 0.053,
"step": 1575
},
{
"epoch": 80.0,
"learning_rate": 8.005454545454545e-07,
"loss": 0.0317,
"step": 1600
},
{
"epoch": 81.25,
"learning_rate": 7.96e-07,
"loss": 0.1174,
"step": 1625
},
{
"epoch": 82.5,
"learning_rate": 7.914545454545453e-07,
"loss": 0.0212,
"step": 1650
},
{
"epoch": 83.75,
"learning_rate": 7.869090909090909e-07,
"loss": 0.0286,
"step": 1675
},
{
"epoch": 85.0,
"learning_rate": 7.823636363636364e-07,
"loss": 0.0235,
"step": 1700
},
{
"epoch": 86.25,
"learning_rate": 7.778181818181818e-07,
"loss": 0.0648,
"step": 1725
},
{
"epoch": 87.5,
"learning_rate": 7.732727272727272e-07,
"loss": 0.0332,
"step": 1750
},
{
"epoch": 88.75,
"learning_rate": 7.687272727272726e-07,
"loss": 0.045,
"step": 1775
},
{
"epoch": 90.0,
"learning_rate": 7.641818181818182e-07,
"loss": 0.0575,
"step": 1800
},
{
"epoch": 91.25,
"learning_rate": 7.596363636363637e-07,
"loss": 0.0383,
"step": 1825
},
{
"epoch": 92.5,
"learning_rate": 7.550909090909091e-07,
"loss": 0.0255,
"step": 1850
},
{
"epoch": 93.75,
"learning_rate": 7.505454545454545e-07,
"loss": 0.0287,
"step": 1875
},
{
"epoch": 95.0,
"learning_rate": 7.459999999999999e-07,
"loss": 0.0351,
"step": 1900
},
{
"epoch": 96.25,
"learning_rate": 7.414545454545455e-07,
"loss": 0.0593,
"step": 1925
},
{
"epoch": 97.5,
"learning_rate": 7.36909090909091e-07,
"loss": 0.0392,
"step": 1950
},
{
"epoch": 98.75,
"learning_rate": 7.323636363636363e-07,
"loss": 0.0206,
"step": 1975
},
{
"epoch": 100.0,
"learning_rate": 7.278181818181818e-07,
"loss": 0.0222,
"step": 2000
},
{
"epoch": 100.0,
"eval_loss": 0.29138991236686707,
"eval_runtime": 10.4785,
"eval_samples_per_second": 2.195,
"eval_steps_per_second": 0.286,
"eval_wer": 102.0,
"step": 2000
}
],
"max_steps": 6000,
"num_train_epochs": 300,
"total_flos": 2.04120981504e+19,
"trial_name": null,
"trial_params": null
}