xlm-roberta-large-hi / trainer_state.json
shax's picture v1 018b51c
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"global_step": 9565,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05,
"eval_loss": 2.07426118850708,
"eval_runtime": 4.5218,
"eval_samples_per_second": 176.919,
"eval_steps_per_second": 22.115,
"step": 100
},
{
"epoch": 0.1,
"eval_loss": 2.1192378997802734,
"eval_runtime": 4.5039,
"eval_samples_per_second": 177.622,
"eval_steps_per_second": 22.203,
"step": 200
},
{
"epoch": 0.16,
"eval_loss": 2.0722081661224365,
"eval_runtime": 4.515,
"eval_samples_per_second": 177.189,
"eval_steps_per_second": 22.149,
"step": 300
},
{
"epoch": 0.21,
"eval_loss": 1.9933481216430664,
"eval_runtime": 4.523,
"eval_samples_per_second": 176.873,
"eval_steps_per_second": 22.109,
"step": 400
},
{
"epoch": 0.26,
"eval_loss": 1.922950029373169,
"eval_runtime": 4.5117,
"eval_samples_per_second": 177.317,
"eval_steps_per_second": 22.165,
"step": 500
},
{
"epoch": 0.31,
"eval_loss": 2.1238279342651367,
"eval_runtime": 4.5134,
"eval_samples_per_second": 177.248,
"eval_steps_per_second": 22.156,
"step": 600
},
{
"epoch": 0.37,
"eval_loss": 1.962709665298462,
"eval_runtime": 4.5323,
"eval_samples_per_second": 176.511,
"eval_steps_per_second": 22.064,
"step": 700
},
{
"epoch": 0.42,
"eval_loss": 1.95835542678833,
"eval_runtime": 4.5072,
"eval_samples_per_second": 177.495,
"eval_steps_per_second": 22.187,
"step": 800
},
{
"epoch": 0.47,
"eval_loss": 2.001847505569458,
"eval_runtime": 4.5141,
"eval_samples_per_second": 177.221,
"eval_steps_per_second": 22.153,
"step": 900
},
{
"epoch": 0.52,
"learning_rate": 8.954521693674857e-06,
"loss": 2.153,
"step": 1000
},
{
"epoch": 0.52,
"eval_loss": 2.0234880447387695,
"eval_runtime": 4.5971,
"eval_samples_per_second": 174.022,
"eval_steps_per_second": 21.753,
"step": 1000
},
{
"epoch": 0.58,
"eval_loss": 1.8845772743225098,
"eval_runtime": 4.5129,
"eval_samples_per_second": 177.269,
"eval_steps_per_second": 22.159,
"step": 1100
},
{
"epoch": 0.63,
"eval_loss": 1.9476629495620728,
"eval_runtime": 4.5112,
"eval_samples_per_second": 177.337,
"eval_steps_per_second": 22.167,
"step": 1200
},
{
"epoch": 0.68,
"eval_loss": 1.9745019674301147,
"eval_runtime": 4.5047,
"eval_samples_per_second": 177.592,
"eval_steps_per_second": 22.199,
"step": 1300
},
{
"epoch": 0.73,
"eval_loss": 1.9027578830718994,
"eval_runtime": 4.5114,
"eval_samples_per_second": 177.33,
"eval_steps_per_second": 22.166,
"step": 1400
},
{
"epoch": 0.78,
"eval_loss": 1.8273848295211792,
"eval_runtime": 4.5108,
"eval_samples_per_second": 177.35,
"eval_steps_per_second": 22.169,
"step": 1500
},
{
"epoch": 0.84,
"eval_loss": 1.8856213092803955,
"eval_runtime": 4.5175,
"eval_samples_per_second": 177.09,
"eval_steps_per_second": 22.136,
"step": 1600
},
{
"epoch": 0.89,
"eval_loss": 1.923270583152771,
"eval_runtime": 4.5201,
"eval_samples_per_second": 176.988,
"eval_steps_per_second": 22.123,
"step": 1700
},
{
"epoch": 0.94,
"eval_loss": 2.0153915882110596,
"eval_runtime": 4.5025,
"eval_samples_per_second": 177.679,
"eval_steps_per_second": 22.21,
"step": 1800
},
{
"epoch": 0.99,
"eval_loss": 1.9392834901809692,
"eval_runtime": 4.5093,
"eval_samples_per_second": 177.412,
"eval_steps_per_second": 22.177,
"step": 1900
},
{
"epoch": 1.05,
"learning_rate": 7.909043387349713e-06,
"loss": 2.0544,
"step": 2000
},
{
"epoch": 1.05,
"eval_loss": 1.8981847763061523,
"eval_runtime": 4.5359,
"eval_samples_per_second": 176.371,
"eval_steps_per_second": 22.046,
"step": 2000
},
{
"epoch": 1.1,
"eval_loss": 1.7835720777511597,
"eval_runtime": 4.507,
"eval_samples_per_second": 177.502,
"eval_steps_per_second": 22.188,
"step": 2100
},
{
"epoch": 1.15,
"eval_loss": 1.8530174493789673,
"eval_runtime": 4.5153,
"eval_samples_per_second": 177.176,
"eval_steps_per_second": 22.147,
"step": 2200
},
{
"epoch": 1.2,
"eval_loss": 1.923173189163208,
"eval_runtime": 4.499,
"eval_samples_per_second": 177.817,
"eval_steps_per_second": 22.227,
"step": 2300
},
{
"epoch": 1.25,
"eval_loss": 1.8458834886550903,
"eval_runtime": 4.4986,
"eval_samples_per_second": 177.834,
"eval_steps_per_second": 22.229,
"step": 2400
},
{
"epoch": 1.31,
"eval_loss": 1.9027942419052124,
"eval_runtime": 4.5007,
"eval_samples_per_second": 177.752,
"eval_steps_per_second": 22.219,
"step": 2500
},
{
"epoch": 1.36,
"eval_loss": 1.9002165794372559,
"eval_runtime": 4.5078,
"eval_samples_per_second": 177.472,
"eval_steps_per_second": 22.184,
"step": 2600
},
{
"epoch": 1.41,
"eval_loss": 1.8325750827789307,
"eval_runtime": 4.4946,
"eval_samples_per_second": 177.992,
"eval_steps_per_second": 22.249,
"step": 2700
},
{
"epoch": 1.46,
"eval_loss": 1.7771005630493164,
"eval_runtime": 4.5026,
"eval_samples_per_second": 177.675,
"eval_steps_per_second": 22.209,
"step": 2800
},
{
"epoch": 1.52,
"eval_loss": 1.8039147853851318,
"eval_runtime": 4.5124,
"eval_samples_per_second": 177.289,
"eval_steps_per_second": 22.161,
"step": 2900
},
{
"epoch": 1.57,
"learning_rate": 6.863565081024569e-06,
"loss": 1.9527,
"step": 3000
},
{
"epoch": 1.57,
"eval_loss": 1.7938885688781738,
"eval_runtime": 4.5501,
"eval_samples_per_second": 175.821,
"eval_steps_per_second": 21.978,
"step": 3000
},
{
"epoch": 1.62,
"eval_loss": 1.8785758018493652,
"eval_runtime": 4.5047,
"eval_samples_per_second": 177.594,
"eval_steps_per_second": 22.199,
"step": 3100
},
{
"epoch": 1.67,
"eval_loss": 1.9068948030471802,
"eval_runtime": 4.5135,
"eval_samples_per_second": 177.248,
"eval_steps_per_second": 22.156,
"step": 3200
},
{
"epoch": 1.73,
"eval_loss": 1.904042363166809,
"eval_runtime": 4.501,
"eval_samples_per_second": 177.74,
"eval_steps_per_second": 22.218,
"step": 3300
},
{
"epoch": 1.78,
"eval_loss": 1.7986688613891602,
"eval_runtime": 4.5011,
"eval_samples_per_second": 177.733,
"eval_steps_per_second": 22.217,
"step": 3400
},
{
"epoch": 1.83,
"eval_loss": 1.8004988431930542,
"eval_runtime": 4.5008,
"eval_samples_per_second": 177.745,
"eval_steps_per_second": 22.218,
"step": 3500
},
{
"epoch": 1.88,
"eval_loss": 1.7914013862609863,
"eval_runtime": 4.5051,
"eval_samples_per_second": 177.575,
"eval_steps_per_second": 22.197,
"step": 3600
},
{
"epoch": 1.93,
"eval_loss": 1.809700608253479,
"eval_runtime": 4.5139,
"eval_samples_per_second": 177.231,
"eval_steps_per_second": 22.154,
"step": 3700
},
{
"epoch": 1.99,
"eval_loss": 1.7780152559280396,
"eval_runtime": 4.5094,
"eval_samples_per_second": 177.406,
"eval_steps_per_second": 22.176,
"step": 3800
},
{
"epoch": 2.04,
"eval_loss": 1.8305178880691528,
"eval_runtime": 4.5176,
"eval_samples_per_second": 177.085,
"eval_steps_per_second": 22.136,
"step": 3900
},
{
"epoch": 2.09,
"learning_rate": 5.8180867746994255e-06,
"loss": 1.8792,
"step": 4000
},
{
"epoch": 2.09,
"eval_loss": 1.8090060949325562,
"eval_runtime": 4.5767,
"eval_samples_per_second": 174.798,
"eval_steps_per_second": 21.85,
"step": 4000
},
{
"epoch": 2.14,
"eval_loss": 1.8800756931304932,
"eval_runtime": 4.5273,
"eval_samples_per_second": 176.707,
"eval_steps_per_second": 22.088,
"step": 4100
},
{
"epoch": 2.2,
"eval_loss": 1.8428661823272705,
"eval_runtime": 4.5326,
"eval_samples_per_second": 176.498,
"eval_steps_per_second": 22.062,
"step": 4200
},
{
"epoch": 2.25,
"eval_loss": 1.6927213668823242,
"eval_runtime": 4.5371,
"eval_samples_per_second": 176.326,
"eval_steps_per_second": 22.041,
"step": 4300
},
{
"epoch": 2.3,
"eval_loss": 1.8311362266540527,
"eval_runtime": 4.5735,
"eval_samples_per_second": 174.921,
"eval_steps_per_second": 21.865,
"step": 4400
},
{
"epoch": 2.35,
"eval_loss": 1.7397173643112183,
"eval_runtime": 4.5487,
"eval_samples_per_second": 175.875,
"eval_steps_per_second": 21.984,
"step": 4500
},
{
"epoch": 2.4,
"eval_loss": 1.7626467943191528,
"eval_runtime": 4.524,
"eval_samples_per_second": 176.836,
"eval_steps_per_second": 22.105,
"step": 4600
},
{
"epoch": 2.46,
"eval_loss": 1.7439056634902954,
"eval_runtime": 4.5428,
"eval_samples_per_second": 176.104,
"eval_steps_per_second": 22.013,
"step": 4700
},
{
"epoch": 2.51,
"eval_loss": 1.8606833219528198,
"eval_runtime": 4.5132,
"eval_samples_per_second": 177.258,
"eval_steps_per_second": 22.157,
"step": 4800
},
{
"epoch": 2.56,
"eval_loss": 1.756654977798462,
"eval_runtime": 4.5487,
"eval_samples_per_second": 175.876,
"eval_steps_per_second": 21.984,
"step": 4900
},
{
"epoch": 2.61,
"learning_rate": 4.7726084683742815e-06,
"loss": 1.8665,
"step": 5000
},
{
"epoch": 2.61,
"eval_loss": 1.7967605590820312,
"eval_runtime": 4.5714,
"eval_samples_per_second": 175.001,
"eval_steps_per_second": 21.875,
"step": 5000
},
{
"epoch": 2.67,
"eval_loss": 1.6581268310546875,
"eval_runtime": 4.5516,
"eval_samples_per_second": 175.762,
"eval_steps_per_second": 21.97,
"step": 5100
},
{
"epoch": 2.72,
"eval_loss": 1.746982455253601,
"eval_runtime": 4.5429,
"eval_samples_per_second": 176.098,
"eval_steps_per_second": 22.012,
"step": 5200
},
{
"epoch": 2.77,
"eval_loss": 1.7988170385360718,
"eval_runtime": 4.5109,
"eval_samples_per_second": 177.348,
"eval_steps_per_second": 22.169,
"step": 5300
},
{
"epoch": 2.82,
"eval_loss": 1.7785247564315796,
"eval_runtime": 4.5225,
"eval_samples_per_second": 176.894,
"eval_steps_per_second": 22.112,
"step": 5400
},
{
"epoch": 2.88,
"eval_loss": 1.773697853088379,
"eval_runtime": 4.5413,
"eval_samples_per_second": 176.159,
"eval_steps_per_second": 22.02,
"step": 5500
},
{
"epoch": 2.93,
"eval_loss": 1.7781400680541992,
"eval_runtime": 4.5399,
"eval_samples_per_second": 176.215,
"eval_steps_per_second": 22.027,
"step": 5600
},
{
"epoch": 2.98,
"eval_loss": 1.7938991785049438,
"eval_runtime": 4.5409,
"eval_samples_per_second": 176.178,
"eval_steps_per_second": 22.022,
"step": 5700
},
{
"epoch": 3.03,
"eval_loss": 1.8099168539047241,
"eval_runtime": 4.531,
"eval_samples_per_second": 176.562,
"eval_steps_per_second": 22.07,
"step": 5800
},
{
"epoch": 3.08,
"eval_loss": 1.6920589208602905,
"eval_runtime": 4.5199,
"eval_samples_per_second": 176.996,
"eval_steps_per_second": 22.125,
"step": 5900
},
{
"epoch": 3.14,
"learning_rate": 3.727130162049138e-06,
"loss": 1.8279,
"step": 6000
},
{
"epoch": 3.14,
"eval_loss": 1.797145128250122,
"eval_runtime": 4.5816,
"eval_samples_per_second": 174.611,
"eval_steps_per_second": 21.826,
"step": 6000
},
{
"epoch": 3.19,
"eval_loss": 1.7772976160049438,
"eval_runtime": 4.5504,
"eval_samples_per_second": 175.809,
"eval_steps_per_second": 21.976,
"step": 6100
},
{
"epoch": 3.24,
"eval_loss": 1.7359503507614136,
"eval_runtime": 4.5413,
"eval_samples_per_second": 176.162,
"eval_steps_per_second": 22.02,
"step": 6200
},
{
"epoch": 3.29,
"eval_loss": 1.784250020980835,
"eval_runtime": 4.5269,
"eval_samples_per_second": 176.722,
"eval_steps_per_second": 22.09,
"step": 6300
},
{
"epoch": 3.35,
"eval_loss": 1.7917590141296387,
"eval_runtime": 4.539,
"eval_samples_per_second": 176.25,
"eval_steps_per_second": 22.031,
"step": 6400
},
{
"epoch": 3.4,
"eval_loss": 1.8440731763839722,
"eval_runtime": 4.5363,
"eval_samples_per_second": 176.356,
"eval_steps_per_second": 22.045,
"step": 6500
},
{
"epoch": 3.45,
"eval_loss": 1.7029565572738647,
"eval_runtime": 4.5307,
"eval_samples_per_second": 176.574,
"eval_steps_per_second": 22.072,
"step": 6600
},
{
"epoch": 3.5,
"eval_loss": 1.795900583267212,
"eval_runtime": 4.5612,
"eval_samples_per_second": 175.391,
"eval_steps_per_second": 21.924,
"step": 6700
},
{
"epoch": 3.55,
"eval_loss": 1.7106671333312988,
"eval_runtime": 4.5484,
"eval_samples_per_second": 175.888,
"eval_steps_per_second": 21.986,
"step": 6800
},
{
"epoch": 3.61,
"eval_loss": 1.7240768671035767,
"eval_runtime": 4.5058,
"eval_samples_per_second": 177.55,
"eval_steps_per_second": 22.194,
"step": 6900
},
{
"epoch": 3.66,
"learning_rate": 2.681651855723994e-06,
"loss": 1.7734,
"step": 7000
},
{
"epoch": 3.66,
"eval_loss": 1.690421462059021,
"eval_runtime": 4.5879,
"eval_samples_per_second": 174.371,
"eval_steps_per_second": 21.796,
"step": 7000
},
{
"epoch": 3.71,
"eval_loss": 1.7392058372497559,
"eval_runtime": 4.5577,
"eval_samples_per_second": 175.528,
"eval_steps_per_second": 21.941,
"step": 7100
},
{
"epoch": 3.76,
"eval_loss": 1.6490362882614136,
"eval_runtime": 4.5443,
"eval_samples_per_second": 176.043,
"eval_steps_per_second": 22.005,
"step": 7200
},
{
"epoch": 3.82,
"eval_loss": 1.7596625089645386,
"eval_runtime": 4.534,
"eval_samples_per_second": 176.445,
"eval_steps_per_second": 22.056,
"step": 7300
},
{
"epoch": 3.87,
"eval_loss": 1.7427598237991333,
"eval_runtime": 4.5411,
"eval_samples_per_second": 176.168,
"eval_steps_per_second": 22.021,
"step": 7400
},
{
"epoch": 3.92,
"eval_loss": 1.7453887462615967,
"eval_runtime": 4.5345,
"eval_samples_per_second": 176.424,
"eval_steps_per_second": 22.053,
"step": 7500
},
{
"epoch": 3.97,
"eval_loss": 1.6702288389205933,
"eval_runtime": 4.5374,
"eval_samples_per_second": 176.311,
"eval_steps_per_second": 22.039,
"step": 7600
},
{
"epoch": 4.03,
"eval_loss": 1.8289847373962402,
"eval_runtime": 5.0124,
"eval_samples_per_second": 159.603,
"eval_steps_per_second": 19.95,
"step": 7700
},
{
"epoch": 4.08,
"eval_loss": 1.8589656352996826,
"eval_runtime": 4.5769,
"eval_samples_per_second": 174.79,
"eval_steps_per_second": 21.849,
"step": 7800
},
{
"epoch": 4.13,
"eval_loss": 1.7294247150421143,
"eval_runtime": 4.5529,
"eval_samples_per_second": 175.711,
"eval_steps_per_second": 21.964,
"step": 7900
},
{
"epoch": 4.18,
"learning_rate": 1.63617354939885e-06,
"loss": 1.7346,
"step": 8000
},
{
"epoch": 4.18,
"eval_loss": 1.7356834411621094,
"eval_runtime": 4.5781,
"eval_samples_per_second": 174.745,
"eval_steps_per_second": 21.843,
"step": 8000
},
{
"epoch": 4.23,
"eval_loss": 1.8036160469055176,
"eval_runtime": 4.5507,
"eval_samples_per_second": 175.796,
"eval_steps_per_second": 21.974,
"step": 8100
},
{
"epoch": 4.29,
"eval_loss": 1.6360139846801758,
"eval_runtime": 4.5357,
"eval_samples_per_second": 176.379,
"eval_steps_per_second": 22.047,
"step": 8200
},
{
"epoch": 4.34,
"eval_loss": 1.710027813911438,
"eval_runtime": 4.5265,
"eval_samples_per_second": 176.736,
"eval_steps_per_second": 22.092,
"step": 8300
},
{
"epoch": 4.39,
"eval_loss": 1.7591100931167603,
"eval_runtime": 4.5309,
"eval_samples_per_second": 176.565,
"eval_steps_per_second": 22.071,
"step": 8400
},
{
"epoch": 4.44,
"eval_loss": 1.6441596746444702,
"eval_runtime": 4.5328,
"eval_samples_per_second": 176.493,
"eval_steps_per_second": 22.062,
"step": 8500
},
{
"epoch": 4.5,
"eval_loss": 1.6503033638000488,
"eval_runtime": 4.5354,
"eval_samples_per_second": 176.39,
"eval_steps_per_second": 22.049,
"step": 8600
},
{
"epoch": 4.55,
"eval_loss": 1.6993392705917358,
"eval_runtime": 4.5291,
"eval_samples_per_second": 176.634,
"eval_steps_per_second": 22.079,
"step": 8700
},
{
"epoch": 4.6,
"eval_loss": 1.741347312927246,
"eval_runtime": 4.5403,
"eval_samples_per_second": 176.201,
"eval_steps_per_second": 22.025,
"step": 8800
},
{
"epoch": 4.65,
"eval_loss": 1.7068595886230469,
"eval_runtime": 4.5165,
"eval_samples_per_second": 177.126,
"eval_steps_per_second": 22.141,
"step": 8900
},
{
"epoch": 4.7,
"learning_rate": 5.906952430737063e-07,
"loss": 1.7046,
"step": 9000
},
{
"epoch": 4.7,
"eval_loss": 1.741222858428955,
"eval_runtime": 4.5635,
"eval_samples_per_second": 175.305,
"eval_steps_per_second": 21.913,
"step": 9000
},
{
"epoch": 4.76,
"eval_loss": 1.695675015449524,
"eval_runtime": 4.542,
"eval_samples_per_second": 176.133,
"eval_steps_per_second": 22.017,
"step": 9100
},
{
"epoch": 4.81,
"eval_loss": 1.8147244453430176,
"eval_runtime": 4.5361,
"eval_samples_per_second": 176.364,
"eval_steps_per_second": 22.045,
"step": 9200
},
{
"epoch": 4.86,
"eval_loss": 1.6123968362808228,
"eval_runtime": 4.5504,
"eval_samples_per_second": 175.81,
"eval_steps_per_second": 21.976,
"step": 9300
},
{
"epoch": 4.91,
"eval_loss": 1.6993104219436646,
"eval_runtime": 4.5406,
"eval_samples_per_second": 176.188,
"eval_steps_per_second": 22.023,
"step": 9400
},
{
"epoch": 4.97,
"eval_loss": 1.6907097101211548,
"eval_runtime": 4.5386,
"eval_samples_per_second": 176.266,
"eval_steps_per_second": 22.033,
"step": 9500
},
{
"epoch": 5.0,
"step": 9565,
"total_flos": 6026840964129600.0,
"train_loss": 1.8707820080738695,
"train_runtime": 8847.0854,
"train_samples_per_second": 8.647,
"train_steps_per_second": 1.081
}
],
"max_steps": 9565,
"num_train_epochs": 5,
"total_flos": 6026840964129600.0,
"trial_name": null,
"trial_params": null
}