twitter-roberta-large-2022-154m / trainer_state.json
danlou's picture
Upload 12 files
ff26480
raw
history blame
9.15 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9920199230667882,
"global_step": 24000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"learning_rate": 9.999793741991486e-06,
"loss": 1.742,
"step": 500
},
{
"epoch": 0.04,
"learning_rate": 9.999587070640269e-06,
"loss": 1.7307,
"step": 1000
},
{
"epoch": 0.06,
"learning_rate": 9.999380399289051e-06,
"loss": 1.7414,
"step": 1500
},
{
"epoch": 0.08,
"learning_rate": 9.999173727937835e-06,
"loss": 1.7396,
"step": 2000
},
{
"epoch": 0.08,
"eval_accuracy": 0.6747098550914694,
"eval_loss": 1.6244006156921387,
"eval_runtime": 13.0499,
"eval_samples_per_second": 306.516,
"eval_steps_per_second": 0.766,
"step": 2000
},
{
"epoch": 0.1,
"learning_rate": 9.998967056586617e-06,
"loss": 1.738,
"step": 2500
},
{
"epoch": 0.12,
"learning_rate": 9.9987603852354e-06,
"loss": 1.7357,
"step": 3000
},
{
"epoch": 0.14,
"learning_rate": 9.998553713884183e-06,
"loss": 1.7349,
"step": 3500
},
{
"epoch": 0.17,
"learning_rate": 9.998347042532966e-06,
"loss": 1.7309,
"step": 4000
},
{
"epoch": 0.17,
"eval_accuracy": 0.6821878279118573,
"eval_loss": 1.6006274223327637,
"eval_runtime": 13.0815,
"eval_samples_per_second": 305.775,
"eval_steps_per_second": 0.764,
"step": 4000
},
{
"epoch": 0.19,
"learning_rate": 9.998140371181748e-06,
"loss": 1.7322,
"step": 4500
},
{
"epoch": 0.21,
"learning_rate": 9.99793369983053e-06,
"loss": 1.7302,
"step": 5000
},
{
"epoch": 0.23,
"learning_rate": 9.997727028479312e-06,
"loss": 1.7281,
"step": 5500
},
{
"epoch": 0.25,
"learning_rate": 9.997520357128095e-06,
"loss": 1.7273,
"step": 6000
},
{
"epoch": 0.25,
"eval_accuracy": 0.6896347482724581,
"eval_loss": 1.5701262950897217,
"eval_runtime": 13.0657,
"eval_samples_per_second": 306.145,
"eval_steps_per_second": 0.765,
"step": 6000
},
{
"epoch": 0.27,
"learning_rate": 9.997313685776879e-06,
"loss": 1.7252,
"step": 6500
},
{
"epoch": 0.29,
"learning_rate": 9.997107427768363e-06,
"loss": 1.7231,
"step": 7000
},
{
"epoch": 0.31,
"learning_rate": 9.996901169759848e-06,
"loss": 1.7223,
"step": 7500
},
{
"epoch": 0.33,
"learning_rate": 9.99669449840863e-06,
"loss": 1.7215,
"step": 8000
},
{
"epoch": 0.33,
"eval_accuracy": 0.6799366211130917,
"eval_loss": 1.6207975149154663,
"eval_runtime": 11.6877,
"eval_samples_per_second": 342.241,
"eval_steps_per_second": 0.856,
"step": 8000
},
{
"epoch": 0.35,
"learning_rate": 9.996487827057414e-06,
"loss": 1.7182,
"step": 8500
},
{
"epoch": 0.37,
"learning_rate": 9.996281155706197e-06,
"loss": 1.7189,
"step": 9000
},
{
"epoch": 0.39,
"learning_rate": 9.996074484354979e-06,
"loss": 1.7169,
"step": 9500
},
{
"epoch": 0.41,
"learning_rate": 9.995868226346464e-06,
"loss": 1.7173,
"step": 10000
},
{
"epoch": 0.41,
"eval_accuracy": 0.6820005349023803,
"eval_loss": 1.6004101037979126,
"eval_runtime": 13.0596,
"eval_samples_per_second": 306.287,
"eval_steps_per_second": 0.766,
"step": 10000
},
{
"epoch": 0.43,
"learning_rate": 9.995661554995247e-06,
"loss": 1.7152,
"step": 10500
},
{
"epoch": 0.45,
"learning_rate": 9.99545488364403e-06,
"loss": 1.7126,
"step": 11000
},
{
"epoch": 0.48,
"learning_rate": 9.995248212292813e-06,
"loss": 1.7114,
"step": 11500
},
{
"epoch": 0.5,
"learning_rate": 9.995041954284299e-06,
"loss": 1.7098,
"step": 12000
},
{
"epoch": 0.5,
"eval_accuracy": 0.6854211592498203,
"eval_loss": 1.6027569770812988,
"eval_runtime": 13.0635,
"eval_samples_per_second": 306.196,
"eval_steps_per_second": 0.765,
"step": 12000
},
{
"epoch": 0.52,
"learning_rate": 9.99483528293308e-06,
"loss": 1.7092,
"step": 12500
},
{
"epoch": 0.54,
"learning_rate": 9.994628611581863e-06,
"loss": 1.7081,
"step": 13000
},
{
"epoch": 0.56,
"learning_rate": 9.994421940230647e-06,
"loss": 1.7071,
"step": 13500
},
{
"epoch": 0.58,
"learning_rate": 9.99421526887943e-06,
"loss": 1.7066,
"step": 14000
},
{
"epoch": 0.58,
"eval_accuracy": 0.6892430278884463,
"eval_loss": 1.568811058998108,
"eval_runtime": 11.6681,
"eval_samples_per_second": 342.815,
"eval_steps_per_second": 0.857,
"step": 14000
},
{
"epoch": 0.6,
"learning_rate": 9.994008597528212e-06,
"loss": 1.7062,
"step": 14500
},
{
"epoch": 0.62,
"learning_rate": 9.993801926176994e-06,
"loss": 1.7041,
"step": 15000
},
{
"epoch": 0.64,
"learning_rate": 9.993595254825776e-06,
"loss": 1.7028,
"step": 15500
},
{
"epoch": 0.66,
"learning_rate": 9.99338858347456e-06,
"loss": 1.702,
"step": 16000
},
{
"epoch": 0.66,
"eval_accuracy": 0.689404839764552,
"eval_loss": 1.5777287483215332,
"eval_runtime": 13.0408,
"eval_samples_per_second": 306.729,
"eval_steps_per_second": 0.767,
"step": 16000
},
{
"epoch": 0.68,
"learning_rate": 9.993181912123342e-06,
"loss": 1.6984,
"step": 16500
},
{
"epoch": 0.7,
"learning_rate": 9.992975240772125e-06,
"loss": 1.6999,
"step": 17000
},
{
"epoch": 0.72,
"learning_rate": 9.99276898276361e-06,
"loss": 1.6983,
"step": 17500
},
{
"epoch": 0.74,
"learning_rate": 9.992562724755096e-06,
"loss": 1.6972,
"step": 18000
},
{
"epoch": 0.74,
"eval_accuracy": 0.6915187376725839,
"eval_loss": 1.5638251304626465,
"eval_runtime": 11.6615,
"eval_samples_per_second": 343.008,
"eval_steps_per_second": 0.858,
"step": 18000
},
{
"epoch": 0.76,
"learning_rate": 9.992356053403878e-06,
"loss": 1.694,
"step": 18500
},
{
"epoch": 0.79,
"learning_rate": 9.992149795395364e-06,
"loss": 1.6954,
"step": 19000
},
{
"epoch": 0.81,
"learning_rate": 9.991943124044146e-06,
"loss": 1.6946,
"step": 19500
},
{
"epoch": 0.83,
"learning_rate": 9.991736452692928e-06,
"loss": 1.6939,
"step": 20000
},
{
"epoch": 0.83,
"eval_accuracy": 0.6928982725527831,
"eval_loss": 1.5452697277069092,
"eval_runtime": 13.0226,
"eval_samples_per_second": 307.159,
"eval_steps_per_second": 0.768,
"step": 20000
},
{
"epoch": 0.85,
"learning_rate": 9.99152978134171e-06,
"loss": 1.6927,
"step": 20500
},
{
"epoch": 0.87,
"learning_rate": 9.991323109990494e-06,
"loss": 1.6901,
"step": 21000
},
{
"epoch": 0.89,
"learning_rate": 9.991116438639277e-06,
"loss": 1.6892,
"step": 21500
},
{
"epoch": 0.91,
"learning_rate": 9.990909767288059e-06,
"loss": 1.6898,
"step": 22000
},
{
"epoch": 0.91,
"eval_accuracy": 0.683527801316733,
"eval_loss": 1.5818458795547485,
"eval_runtime": 13.0663,
"eval_samples_per_second": 306.132,
"eval_steps_per_second": 0.765,
"step": 22000
},
{
"epoch": 0.93,
"learning_rate": 9.990703095936843e-06,
"loss": 1.6889,
"step": 22500
},
{
"epoch": 0.95,
"learning_rate": 9.990496424585625e-06,
"loss": 1.6855,
"step": 23000
},
{
"epoch": 0.97,
"learning_rate": 9.990289753234407e-06,
"loss": 1.6853,
"step": 23500
},
{
"epoch": 0.99,
"learning_rate": 9.990083908568595e-06,
"loss": 1.6856,
"step": 24000
},
{
"epoch": 0.99,
"eval_accuracy": 0.6877091672681934,
"eval_loss": 1.5628535747528076,
"eval_runtime": 13.0163,
"eval_samples_per_second": 307.306,
"eval_steps_per_second": 0.768,
"step": 24000
}
],
"max_steps": 24193000,
"num_train_epochs": 1000,
"total_flos": 1.78960468082688e+19,
"trial_name": null,
"trial_params": null
}