xlm-roberta-large-bn / trainer_state.json
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"global_step": 9565,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05,
"eval_loss": 1.9894485473632812,
"eval_runtime": 4.9418,
"eval_samples_per_second": 161.884,
"eval_steps_per_second": 20.236,
"step": 100
},
{
"epoch": 0.1,
"eval_loss": 1.8623613119125366,
"eval_runtime": 4.9523,
"eval_samples_per_second": 161.541,
"eval_steps_per_second": 20.193,
"step": 200
},
{
"epoch": 0.16,
"eval_loss": 1.9076178073883057,
"eval_runtime": 4.9532,
"eval_samples_per_second": 161.512,
"eval_steps_per_second": 20.189,
"step": 300
},
{
"epoch": 0.21,
"eval_loss": 1.815214991569519,
"eval_runtime": 4.9711,
"eval_samples_per_second": 160.929,
"eval_steps_per_second": 20.116,
"step": 400
},
{
"epoch": 0.26,
"eval_loss": 1.7864209413528442,
"eval_runtime": 4.9618,
"eval_samples_per_second": 161.233,
"eval_steps_per_second": 20.154,
"step": 500
},
{
"epoch": 0.31,
"eval_loss": 1.8693923950195312,
"eval_runtime": 4.9535,
"eval_samples_per_second": 161.502,
"eval_steps_per_second": 20.188,
"step": 600
},
{
"epoch": 0.37,
"eval_loss": 1.8722848892211914,
"eval_runtime": 4.9564,
"eval_samples_per_second": 161.408,
"eval_steps_per_second": 20.176,
"step": 700
},
{
"epoch": 0.42,
"eval_loss": 1.7493282556533813,
"eval_runtime": 4.9483,
"eval_samples_per_second": 161.671,
"eval_steps_per_second": 20.209,
"step": 800
},
{
"epoch": 0.47,
"eval_loss": 1.9245165586471558,
"eval_runtime": 4.9768,
"eval_samples_per_second": 160.745,
"eval_steps_per_second": 20.093,
"step": 900
},
{
"epoch": 0.52,
"learning_rate": 8.954521693674857e-06,
"loss": 2.0644,
"step": 1000
},
{
"epoch": 0.52,
"eval_loss": 1.74406099319458,
"eval_runtime": 5.0299,
"eval_samples_per_second": 159.048,
"eval_steps_per_second": 19.881,
"step": 1000
},
{
"epoch": 0.58,
"eval_loss": 1.8684289455413818,
"eval_runtime": 4.9554,
"eval_samples_per_second": 161.439,
"eval_steps_per_second": 20.18,
"step": 1100
},
{
"epoch": 0.63,
"eval_loss": 1.8639626502990723,
"eval_runtime": 4.9751,
"eval_samples_per_second": 160.802,
"eval_steps_per_second": 20.1,
"step": 1200
},
{
"epoch": 0.68,
"eval_loss": 1.829074501991272,
"eval_runtime": 4.9541,
"eval_samples_per_second": 161.482,
"eval_steps_per_second": 20.185,
"step": 1300
},
{
"epoch": 0.73,
"eval_loss": 1.806042194366455,
"eval_runtime": 4.9361,
"eval_samples_per_second": 162.072,
"eval_steps_per_second": 20.259,
"step": 1400
},
{
"epoch": 0.78,
"eval_loss": 1.8194879293441772,
"eval_runtime": 4.953,
"eval_samples_per_second": 161.517,
"eval_steps_per_second": 20.19,
"step": 1500
},
{
"epoch": 0.84,
"eval_loss": 1.7241334915161133,
"eval_runtime": 4.943,
"eval_samples_per_second": 161.845,
"eval_steps_per_second": 20.231,
"step": 1600
},
{
"epoch": 0.89,
"eval_loss": 1.8226971626281738,
"eval_runtime": 4.9746,
"eval_samples_per_second": 160.818,
"eval_steps_per_second": 20.102,
"step": 1700
},
{
"epoch": 0.94,
"eval_loss": 1.6978307962417603,
"eval_runtime": 4.9672,
"eval_samples_per_second": 161.057,
"eval_steps_per_second": 20.132,
"step": 1800
},
{
"epoch": 0.99,
"eval_loss": 1.7191663980484009,
"eval_runtime": 4.9517,
"eval_samples_per_second": 161.559,
"eval_steps_per_second": 20.195,
"step": 1900
},
{
"epoch": 1.05,
"learning_rate": 7.909043387349713e-06,
"loss": 1.9856,
"step": 2000
},
{
"epoch": 1.05,
"eval_loss": 1.7717660665512085,
"eval_runtime": 4.9947,
"eval_samples_per_second": 160.17,
"eval_steps_per_second": 20.021,
"step": 2000
},
{
"epoch": 1.1,
"eval_loss": 1.7920727729797363,
"eval_runtime": 4.9637,
"eval_samples_per_second": 161.171,
"eval_steps_per_second": 20.146,
"step": 2100
},
{
"epoch": 1.15,
"eval_loss": 1.7202229499816895,
"eval_runtime": 4.9505,
"eval_samples_per_second": 161.6,
"eval_steps_per_second": 20.2,
"step": 2200
},
{
"epoch": 1.2,
"eval_loss": 1.7953866720199585,
"eval_runtime": 4.9571,
"eval_samples_per_second": 161.384,
"eval_steps_per_second": 20.173,
"step": 2300
},
{
"epoch": 1.25,
"eval_loss": 1.7172852754592896,
"eval_runtime": 4.9472,
"eval_samples_per_second": 161.708,
"eval_steps_per_second": 20.214,
"step": 2400
},
{
"epoch": 1.31,
"eval_loss": 1.7360247373580933,
"eval_runtime": 4.9571,
"eval_samples_per_second": 161.384,
"eval_steps_per_second": 20.173,
"step": 2500
},
{
"epoch": 1.36,
"eval_loss": 1.8252145051956177,
"eval_runtime": 4.9593,
"eval_samples_per_second": 161.312,
"eval_steps_per_second": 20.164,
"step": 2600
},
{
"epoch": 1.41,
"eval_loss": 1.6163873672485352,
"eval_runtime": 4.9617,
"eval_samples_per_second": 161.234,
"eval_steps_per_second": 20.154,
"step": 2700
},
{
"epoch": 1.46,
"eval_loss": 1.7062618732452393,
"eval_runtime": 4.9606,
"eval_samples_per_second": 161.272,
"eval_steps_per_second": 20.159,
"step": 2800
},
{
"epoch": 1.52,
"eval_loss": 1.63948392868042,
"eval_runtime": 4.9584,
"eval_samples_per_second": 161.343,
"eval_steps_per_second": 20.168,
"step": 2900
},
{
"epoch": 1.57,
"learning_rate": 6.863565081024569e-06,
"loss": 1.8559,
"step": 3000
},
{
"epoch": 1.57,
"eval_loss": 1.7427096366882324,
"eval_runtime": 5.0112,
"eval_samples_per_second": 159.642,
"eval_steps_per_second": 19.955,
"step": 3000
},
{
"epoch": 1.62,
"eval_loss": 1.6909571886062622,
"eval_runtime": 4.9514,
"eval_samples_per_second": 161.571,
"eval_steps_per_second": 20.196,
"step": 3100
},
{
"epoch": 1.67,
"eval_loss": 1.7176247835159302,
"eval_runtime": 4.949,
"eval_samples_per_second": 161.648,
"eval_steps_per_second": 20.206,
"step": 3200
},
{
"epoch": 1.73,
"eval_loss": 1.6864755153656006,
"eval_runtime": 4.954,
"eval_samples_per_second": 161.487,
"eval_steps_per_second": 20.186,
"step": 3300
},
{
"epoch": 1.78,
"eval_loss": 1.7220550775527954,
"eval_runtime": 4.9485,
"eval_samples_per_second": 161.665,
"eval_steps_per_second": 20.208,
"step": 3400
},
{
"epoch": 1.83,
"eval_loss": 1.7085040807724,
"eval_runtime": 4.9559,
"eval_samples_per_second": 161.424,
"eval_steps_per_second": 20.178,
"step": 3500
},
{
"epoch": 1.88,
"eval_loss": 1.6278865337371826,
"eval_runtime": 4.9576,
"eval_samples_per_second": 161.368,
"eval_steps_per_second": 20.171,
"step": 3600
},
{
"epoch": 1.93,
"eval_loss": 1.716471552848816,
"eval_runtime": 4.9658,
"eval_samples_per_second": 161.103,
"eval_steps_per_second": 20.138,
"step": 3700
},
{
"epoch": 1.99,
"eval_loss": 1.6102560758590698,
"eval_runtime": 4.9726,
"eval_samples_per_second": 160.883,
"eval_steps_per_second": 20.11,
"step": 3800
},
{
"epoch": 2.04,
"eval_loss": 1.6070889234542847,
"eval_runtime": 4.9783,
"eval_samples_per_second": 160.697,
"eval_steps_per_second": 20.087,
"step": 3900
},
{
"epoch": 2.09,
"learning_rate": 5.8180867746994255e-06,
"loss": 1.8098,
"step": 4000
},
{
"epoch": 2.09,
"eval_loss": 1.6875723600387573,
"eval_runtime": 4.9893,
"eval_samples_per_second": 160.344,
"eval_steps_per_second": 20.043,
"step": 4000
},
{
"epoch": 2.14,
"eval_loss": 1.5920379161834717,
"eval_runtime": 4.9822,
"eval_samples_per_second": 160.571,
"eval_steps_per_second": 20.071,
"step": 4100
},
{
"epoch": 2.2,
"eval_loss": 1.6611220836639404,
"eval_runtime": 4.9604,
"eval_samples_per_second": 161.279,
"eval_steps_per_second": 20.16,
"step": 4200
},
{
"epoch": 2.25,
"eval_loss": 1.6467419862747192,
"eval_runtime": 5.0216,
"eval_samples_per_second": 159.312,
"eval_steps_per_second": 19.914,
"step": 4300
},
{
"epoch": 2.3,
"eval_loss": 1.6913862228393555,
"eval_runtime": 4.9754,
"eval_samples_per_second": 160.791,
"eval_steps_per_second": 20.099,
"step": 4400
},
{
"epoch": 2.35,
"eval_loss": 1.7248626947402954,
"eval_runtime": 4.9681,
"eval_samples_per_second": 161.027,
"eval_steps_per_second": 20.128,
"step": 4500
},
{
"epoch": 2.4,
"eval_loss": 1.668603539466858,
"eval_runtime": 4.9751,
"eval_samples_per_second": 160.799,
"eval_steps_per_second": 20.1,
"step": 4600
},
{
"epoch": 2.46,
"eval_loss": 1.571094036102295,
"eval_runtime": 4.9862,
"eval_samples_per_second": 160.444,
"eval_steps_per_second": 20.055,
"step": 4700
},
{
"epoch": 2.51,
"eval_loss": 1.6627862453460693,
"eval_runtime": 4.9621,
"eval_samples_per_second": 161.224,
"eval_steps_per_second": 20.153,
"step": 4800
},
{
"epoch": 2.56,
"eval_loss": 1.5440031290054321,
"eval_runtime": 4.9714,
"eval_samples_per_second": 160.921,
"eval_steps_per_second": 20.115,
"step": 4900
},
{
"epoch": 2.61,
"learning_rate": 4.7726084683742815e-06,
"loss": 1.7335,
"step": 5000
},
{
"epoch": 2.61,
"eval_loss": 1.5720436573028564,
"eval_runtime": 5.0224,
"eval_samples_per_second": 159.285,
"eval_steps_per_second": 19.911,
"step": 5000
},
{
"epoch": 2.67,
"eval_loss": 1.6262255907058716,
"eval_runtime": 4.9733,
"eval_samples_per_second": 160.86,
"eval_steps_per_second": 20.108,
"step": 5100
},
{
"epoch": 2.72,
"eval_loss": 1.579461693763733,
"eval_runtime": 4.9655,
"eval_samples_per_second": 161.111,
"eval_steps_per_second": 20.139,
"step": 5200
},
{
"epoch": 2.77,
"eval_loss": 1.64106023311615,
"eval_runtime": 4.9697,
"eval_samples_per_second": 160.974,
"eval_steps_per_second": 20.122,
"step": 5300
},
{
"epoch": 2.82,
"eval_loss": 1.6266947984695435,
"eval_runtime": 4.9845,
"eval_samples_per_second": 160.496,
"eval_steps_per_second": 20.062,
"step": 5400
},
{
"epoch": 2.88,
"eval_loss": 1.5381076335906982,
"eval_runtime": 4.9925,
"eval_samples_per_second": 160.241,
"eval_steps_per_second": 20.03,
"step": 5500
},
{
"epoch": 2.93,
"eval_loss": 1.6071343421936035,
"eval_runtime": 4.9709,
"eval_samples_per_second": 160.937,
"eval_steps_per_second": 20.117,
"step": 5600
},
{
"epoch": 2.98,
"eval_loss": 1.57161283493042,
"eval_runtime": 4.984,
"eval_samples_per_second": 160.515,
"eval_steps_per_second": 20.064,
"step": 5700
},
{
"epoch": 3.03,
"eval_loss": 1.6507529020309448,
"eval_runtime": 4.9769,
"eval_samples_per_second": 160.744,
"eval_steps_per_second": 20.093,
"step": 5800
},
{
"epoch": 3.08,
"eval_loss": 1.6694835424423218,
"eval_runtime": 4.9622,
"eval_samples_per_second": 161.22,
"eval_steps_per_second": 20.153,
"step": 5900
},
{
"epoch": 3.14,
"learning_rate": 3.727130162049138e-06,
"loss": 1.7197,
"step": 6000
},
{
"epoch": 3.14,
"eval_loss": 1.5861258506774902,
"eval_runtime": 5.0223,
"eval_samples_per_second": 159.29,
"eval_steps_per_second": 19.911,
"step": 6000
},
{
"epoch": 3.19,
"eval_loss": 1.6594449281692505,
"eval_runtime": 4.9785,
"eval_samples_per_second": 160.692,
"eval_steps_per_second": 20.087,
"step": 6100
},
{
"epoch": 3.24,
"eval_loss": 1.5593453645706177,
"eval_runtime": 4.9715,
"eval_samples_per_second": 160.917,
"eval_steps_per_second": 20.115,
"step": 6200
},
{
"epoch": 3.29,
"eval_loss": 1.5538159608840942,
"eval_runtime": 4.9601,
"eval_samples_per_second": 161.285,
"eval_steps_per_second": 20.161,
"step": 6300
},
{
"epoch": 3.35,
"eval_loss": 1.626267671585083,
"eval_runtime": 4.9624,
"eval_samples_per_second": 161.213,
"eval_steps_per_second": 20.152,
"step": 6400
},
{
"epoch": 3.4,
"eval_loss": 1.5642911195755005,
"eval_runtime": 4.9736,
"eval_samples_per_second": 160.85,
"eval_steps_per_second": 20.106,
"step": 6500
},
{
"epoch": 3.45,
"eval_loss": 1.610222578048706,
"eval_runtime": 4.9769,
"eval_samples_per_second": 160.743,
"eval_steps_per_second": 20.093,
"step": 6600
},
{
"epoch": 3.5,
"eval_loss": 1.5607407093048096,
"eval_runtime": 4.9764,
"eval_samples_per_second": 160.76,
"eval_steps_per_second": 20.095,
"step": 6700
},
{
"epoch": 3.55,
"eval_loss": 1.4866421222686768,
"eval_runtime": 4.9715,
"eval_samples_per_second": 160.918,
"eval_steps_per_second": 20.115,
"step": 6800
},
{
"epoch": 3.61,
"eval_loss": 1.6732252836227417,
"eval_runtime": 4.9779,
"eval_samples_per_second": 160.711,
"eval_steps_per_second": 20.089,
"step": 6900
},
{
"epoch": 3.66,
"learning_rate": 2.681651855723994e-06,
"loss": 1.6777,
"step": 7000
},
{
"epoch": 3.66,
"eval_loss": 1.6289987564086914,
"eval_runtime": 5.0056,
"eval_samples_per_second": 159.819,
"eval_steps_per_second": 19.977,
"step": 7000
},
{
"epoch": 3.71,
"eval_loss": 1.62163507938385,
"eval_runtime": 4.9678,
"eval_samples_per_second": 161.037,
"eval_steps_per_second": 20.13,
"step": 7100
},
{
"epoch": 3.76,
"eval_loss": 1.5280898809432983,
"eval_runtime": 4.9766,
"eval_samples_per_second": 160.752,
"eval_steps_per_second": 20.094,
"step": 7200
},
{
"epoch": 3.82,
"eval_loss": 1.5369991064071655,
"eval_runtime": 4.9693,
"eval_samples_per_second": 160.988,
"eval_steps_per_second": 20.123,
"step": 7300
},
{
"epoch": 3.87,
"eval_loss": 1.4636890888214111,
"eval_runtime": 4.9847,
"eval_samples_per_second": 160.491,
"eval_steps_per_second": 20.061,
"step": 7400
},
{
"epoch": 3.92,
"eval_loss": 1.5771527290344238,
"eval_runtime": 4.969,
"eval_samples_per_second": 160.999,
"eval_steps_per_second": 20.125,
"step": 7500
},
{
"epoch": 3.97,
"eval_loss": 1.5966529846191406,
"eval_runtime": 4.9711,
"eval_samples_per_second": 160.931,
"eval_steps_per_second": 20.116,
"step": 7600
},
{
"epoch": 4.03,
"eval_loss": 1.5244237184524536,
"eval_runtime": 4.9746,
"eval_samples_per_second": 160.816,
"eval_steps_per_second": 20.102,
"step": 7700
},
{
"epoch": 4.08,
"eval_loss": 1.5681614875793457,
"eval_runtime": 4.9878,
"eval_samples_per_second": 160.392,
"eval_steps_per_second": 20.049,
"step": 7800
},
{
"epoch": 4.13,
"eval_loss": 1.565970778465271,
"eval_runtime": 4.9728,
"eval_samples_per_second": 160.874,
"eval_steps_per_second": 20.109,
"step": 7900
},
{
"epoch": 4.18,
"learning_rate": 1.63617354939885e-06,
"loss": 1.6225,
"step": 8000
},
{
"epoch": 4.18,
"eval_loss": 1.539146065711975,
"eval_runtime": 5.023,
"eval_samples_per_second": 159.266,
"eval_steps_per_second": 19.908,
"step": 8000
},
{
"epoch": 4.23,
"eval_loss": 1.6028637886047363,
"eval_runtime": 4.9743,
"eval_samples_per_second": 160.826,
"eval_steps_per_second": 20.103,
"step": 8100
},
{
"epoch": 4.29,
"eval_loss": 1.5656570196151733,
"eval_runtime": 4.9679,
"eval_samples_per_second": 161.033,
"eval_steps_per_second": 20.129,
"step": 8200
},
{
"epoch": 4.34,
"eval_loss": 1.5960673093795776,
"eval_runtime": 5.0005,
"eval_samples_per_second": 159.985,
"eval_steps_per_second": 19.998,
"step": 8300
},
{
"epoch": 4.39,
"eval_loss": 1.6787633895874023,
"eval_runtime": 4.9828,
"eval_samples_per_second": 160.552,
"eval_steps_per_second": 20.069,
"step": 8400
},
{
"epoch": 4.44,
"eval_loss": 1.6510525941848755,
"eval_runtime": 4.9935,
"eval_samples_per_second": 160.209,
"eval_steps_per_second": 20.026,
"step": 8500
},
{
"epoch": 4.5,
"eval_loss": 1.5883086919784546,
"eval_runtime": 4.9809,
"eval_samples_per_second": 160.614,
"eval_steps_per_second": 20.077,
"step": 8600
},
{
"epoch": 4.55,
"eval_loss": 1.502740502357483,
"eval_runtime": 4.9768,
"eval_samples_per_second": 160.746,
"eval_steps_per_second": 20.093,
"step": 8700
},
{
"epoch": 4.6,
"eval_loss": 1.6155657768249512,
"eval_runtime": 4.9737,
"eval_samples_per_second": 160.848,
"eval_steps_per_second": 20.106,
"step": 8800
},
{
"epoch": 4.65,
"eval_loss": 1.5786443948745728,
"eval_runtime": 4.9685,
"eval_samples_per_second": 161.015,
"eval_steps_per_second": 20.127,
"step": 8900
},
{
"epoch": 4.7,
"learning_rate": 5.906952430737063e-07,
"loss": 1.6198,
"step": 9000
},
{
"epoch": 4.7,
"eval_loss": 1.5171962976455688,
"eval_runtime": 5.0043,
"eval_samples_per_second": 159.861,
"eval_steps_per_second": 19.983,
"step": 9000
},
{
"epoch": 4.76,
"eval_loss": 1.5533108711242676,
"eval_runtime": 4.976,
"eval_samples_per_second": 160.773,
"eval_steps_per_second": 20.097,
"step": 9100
},
{
"epoch": 4.81,
"eval_loss": 1.4648842811584473,
"eval_runtime": 4.9677,
"eval_samples_per_second": 161.04,
"eval_steps_per_second": 20.13,
"step": 9200
},
{
"epoch": 4.86,
"eval_loss": 1.5831387042999268,
"eval_runtime": 4.9721,
"eval_samples_per_second": 160.898,
"eval_steps_per_second": 20.112,
"step": 9300
},
{
"epoch": 4.91,
"eval_loss": 1.524021029472351,
"eval_runtime": 4.9684,
"eval_samples_per_second": 161.017,
"eval_steps_per_second": 20.127,
"step": 9400
},
{
"epoch": 4.97,
"eval_loss": 1.6083089113235474,
"eval_runtime": 4.9658,
"eval_samples_per_second": 161.103,
"eval_steps_per_second": 20.138,
"step": 9500
},
{
"epoch": 5.0,
"step": 9565,
"total_flos": 6408621045709344.0,
"train_loss": 1.7776700918673223,
"train_runtime": 8899.7428,
"train_samples_per_second": 8.596,
"train_steps_per_second": 1.075
}
],
"max_steps": 9565,
"num_train_epochs": 5,
"total_flos": 6408621045709344.0,
"trial_name": null,
"trial_params": null
}