the-grateful-dead / trainer_state.json
AlekseyKorshuk's picture
huggingartists
642b122
{
"best_metric": 1.695717692375183,
"best_model_checkpoint": "output/the-grateful-dead/checkpoint-289",
"epoch": 1.0,
"global_step": 289,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"learning_rate": 0.00013709869490949843,
"loss": 3.4767,
"step": 5
},
{
"epoch": 0.03,
"learning_rate": 0.00013679507884269807,
"loss": 3.3233,
"step": 10
},
{
"epoch": 0.05,
"learning_rate": 0.00013629004853001075,
"loss": 3.034,
"step": 15
},
{
"epoch": 0.07,
"learning_rate": 0.00013558509557906132,
"loss": 3.0326,
"step": 20
},
{
"epoch": 0.09,
"learning_rate": 0.00013468230206922286,
"loss": 3.0633,
"step": 25
},
{
"epoch": 0.1,
"learning_rate": 0.00013358433440219266,
"loss": 2.6577,
"step": 30
},
{
"epoch": 0.12,
"learning_rate": 0.00013229443542677182,
"loss": 2.9416,
"step": 35
},
{
"epoch": 0.14,
"learning_rate": 0.00013081641486110732,
"loss": 2.8888,
"step": 40
},
{
"epoch": 0.16,
"learning_rate": 0.00012915463804068515,
"loss": 2.6352,
"step": 45
},
{
"epoch": 0.17,
"learning_rate": 0.00012731401302530662,
"loss": 2.9574,
"step": 50
},
{
"epoch": 0.19,
"learning_rate": 0.00012529997610312806,
"loss": 2.7793,
"step": 55
},
{
"epoch": 0.21,
"learning_rate": 0.00012311847573457732,
"loss": 2.5208,
"step": 60
},
{
"epoch": 0.22,
"learning_rate": 0.00012077595498356889,
"loss": 2.5793,
"step": 65
},
{
"epoch": 0.24,
"learning_rate": 0.00011827933248790689,
"loss": 2.5326,
"step": 70
},
{
"epoch": 0.26,
"learning_rate": 0.00011563598202508025,
"loss": 2.5729,
"step": 75
},
{
"epoch": 0.28,
"learning_rate": 0.00011285371073380224,
"loss": 2.6515,
"step": 80
},
{
"epoch": 0.29,
"learning_rate": 0.000109940736055617,
"loss": 2.59,
"step": 85
},
{
"epoch": 0.31,
"learning_rate": 0.00010690566146467614,
"loss": 2.62,
"step": 90
},
{
"epoch": 0.33,
"learning_rate": 0.00010375745105736788,
"loss": 2.5348,
"step": 95
},
{
"epoch": 0.35,
"learning_rate": 0.00010050540307684762,
"loss": 2.3504,
"step": 100
},
{
"epoch": 0.36,
"learning_rate": 9.715912245066583e-05,
"loss": 2.4245,
"step": 105
},
{
"epoch": 0.38,
"learning_rate": 9.372849242260327e-05,
"loss": 2.3522,
"step": 110
},
{
"epoch": 0.4,
"learning_rate": 9.022364536249872e-05,
"loss": 2.2091,
"step": 115
},
{
"epoch": 0.42,
"learning_rate": 8.665493284028274e-05,
"loss": 2.2353,
"step": 120
},
{
"epoch": 0.43,
"learning_rate": 8.303289505260362e-05,
"loss": 2.2043,
"step": 125
},
{
"epoch": 0.45,
"learning_rate": 7.936822969234422e-05,
"loss": 2.25,
"step": 130
},
{
"epoch": 0.47,
"learning_rate": 7.56717603529733e-05,
"loss": 2.2438,
"step": 135
},
{
"epoch": 0.48,
"learning_rate": 7.195440456104955e-05,
"loss": 2.2188,
"step": 140
},
{
"epoch": 0.5,
"learning_rate": 6.822714153129379e-05,
"loss": 2.1493,
"step": 145
},
{
"epoch": 0.52,
"learning_rate": 6.450097973946566e-05,
"loss": 2.3241,
"step": 150
},
{
"epoch": 0.54,
"learning_rate": 6.0786924408817136e-05,
"loss": 2.1267,
"step": 155
},
{
"epoch": 0.55,
"learning_rate": 5.709594500615288e-05,
"loss": 2.0304,
"step": 160
},
{
"epoch": 0.57,
"learning_rate": 5.343894284349703e-05,
"loss": 1.9702,
"step": 165
},
{
"epoch": 0.59,
"learning_rate": 4.982671888105512e-05,
"loss": 2.0854,
"step": 170
},
{
"epoch": 0.61,
"learning_rate": 4.62699418265656e-05,
"loss": 2.0528,
"step": 175
},
{
"epoch": 0.62,
"learning_rate": 4.277911662525875e-05,
"loss": 1.9738,
"step": 180
},
{
"epoch": 0.64,
"learning_rate": 3.936455343348859e-05,
"loss": 2.1449,
"step": 185
},
{
"epoch": 0.66,
"learning_rate": 3.6036337167674e-05,
"loss": 1.9034,
"step": 190
},
{
"epoch": 0.67,
"learning_rate": 3.280429771848598e-05,
"loss": 1.9098,
"step": 195
},
{
"epoch": 0.69,
"learning_rate": 2.9677980918254152e-05,
"loss": 1.9427,
"step": 200
},
{
"epoch": 0.71,
"learning_rate": 2.6666620347339532e-05,
"loss": 2.243,
"step": 205
},
{
"epoch": 0.73,
"learning_rate": 2.377911006274416e-05,
"loss": 1.9729,
"step": 210
},
{
"epoch": 0.74,
"learning_rate": 2.1023978329503206e-05,
"loss": 2.0628,
"step": 215
},
{
"epoch": 0.76,
"learning_rate": 1.8409362432444018e-05,
"loss": 1.8724,
"step": 220
},
{
"epoch": 0.78,
"learning_rate": 1.594298464270594e-05,
"loss": 2.0964,
"step": 225
},
{
"epoch": 0.8,
"learning_rate": 1.3632129410003275e-05,
"loss": 1.9228,
"step": 230
},
{
"epoch": 0.81,
"learning_rate": 1.1483621847994469e-05,
"loss": 1.797,
"step": 235
},
{
"epoch": 0.83,
"learning_rate": 9.503807576300871e-06,
"loss": 2.0553,
"step": 240
},
{
"epoch": 0.85,
"learning_rate": 7.698533978711721e-06,
"loss": 1.8949,
"step": 245
},
{
"epoch": 0.87,
"learning_rate": 6.073132932929161e-06,
"loss": 1.8748,
"step": 250
},
{
"epoch": 0.88,
"learning_rate": 4.6324050628611986e-06,
"loss": 1.9711,
"step": 255
},
{
"epoch": 0.9,
"learning_rate": 3.3806055599732885e-06,
"loss": 1.9954,
"step": 260
},
{
"epoch": 0.92,
"learning_rate": 2.3214316155753564e-06,
"loss": 1.8547,
"step": 265
},
{
"epoch": 0.93,
"learning_rate": 1.4580115011628363e-06,
"loss": 1.8839,
"step": 270
},
{
"epoch": 0.95,
"learning_rate": 7.928953290631928e-07,
"loss": 1.9188,
"step": 275
},
{
"epoch": 0.97,
"learning_rate": 3.280475206760945e-07,
"loss": 1.8588,
"step": 280
},
{
"epoch": 0.99,
"learning_rate": 6.484100455249007e-08,
"loss": 1.8212,
"step": 285
},
{
"epoch": 1.0,
"eval_loss": 1.695717692375183,
"eval_runtime": 5.4445,
"eval_samples_per_second": 76.959,
"eval_steps_per_second": 9.735,
"step": 289
}
],
"max_steps": 289,
"num_train_epochs": 1,
"total_flos": 302053588992000.0,
"trial_name": null,
"trial_params": null
}