nsfw-story-generator2 / trainer_state.json
coffeeee's picture
added model files
0f7b0b9
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.954646181119811,
"global_step": 100000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.07,
"learning_rate": 6.249924149859892e-05,
"loss": 2.9841,
"step": 2500
},
{
"epoch": 0.07,
"eval_loss": 2.880531072616577,
"eval_runtime": 5041.8037,
"eval_samples_per_second": 11.934,
"eval_steps_per_second": 2.984,
"step": 2500
},
{
"epoch": 0.15,
"learning_rate": 6.249460454641857e-05,
"loss": 2.9159,
"step": 5000
},
{
"epoch": 0.15,
"eval_loss": 2.843543291091919,
"eval_runtime": 5041.0801,
"eval_samples_per_second": 11.936,
"eval_steps_per_second": 2.984,
"step": 5000
},
{
"epoch": 0.22,
"learning_rate": 6.248574782066793e-05,
"loss": 2.8874,
"step": 7500
},
{
"epoch": 0.22,
"eval_loss": 2.8224122524261475,
"eval_runtime": 5041.8102,
"eval_samples_per_second": 11.934,
"eval_steps_per_second": 2.984,
"step": 7500
},
{
"epoch": 0.3,
"learning_rate": 6.24726796015593e-05,
"loss": 2.867,
"step": 10000
},
{
"epoch": 0.3,
"eval_loss": 2.805563449859619,
"eval_runtime": 5045.7151,
"eval_samples_per_second": 11.925,
"eval_steps_per_second": 2.981,
"step": 10000
},
{
"epoch": 0.37,
"learning_rate": 6.245539895286441e-05,
"loss": 2.8462,
"step": 12500
},
{
"epoch": 0.44,
"learning_rate": 6.243390820601403e-05,
"loss": 2.8343,
"step": 15000
},
{
"epoch": 0.52,
"learning_rate": 6.240821026044725e-05,
"loss": 2.8232,
"step": 17500
},
{
"epoch": 0.59,
"learning_rate": 6.237830858322031e-05,
"loss": 2.8154,
"step": 20000
},
{
"epoch": 0.59,
"eval_loss": 2.7630834579467773,
"eval_runtime": 5044.7761,
"eval_samples_per_second": 11.927,
"eval_steps_per_second": 2.982,
"step": 20000
},
{
"epoch": 0.66,
"learning_rate": 6.234420720853886e-05,
"loss": 2.8085,
"step": 22500
},
{
"epoch": 0.74,
"learning_rate": 6.230591073721361e-05,
"loss": 2.7959,
"step": 25000
},
{
"epoch": 0.81,
"learning_rate": 6.22634243360397e-05,
"loss": 2.7934,
"step": 27500
},
{
"epoch": 0.89,
"learning_rate": 6.221675373709958e-05,
"loss": 2.7856,
"step": 30000
},
{
"epoch": 0.89,
"eval_loss": 2.7356297969818115,
"eval_runtime": 5045.7013,
"eval_samples_per_second": 11.925,
"eval_steps_per_second": 2.981,
"step": 30000
},
{
"epoch": 0.96,
"learning_rate": 6.216590523698961e-05,
"loss": 2.7796,
"step": 32500
},
{
"epoch": 1.03,
"learning_rate": 6.211090854583099e-05,
"loss": 2.7579,
"step": 35000
},
{
"epoch": 1.11,
"learning_rate": 6.205172705145689e-05,
"loss": 2.7374,
"step": 37500
},
{
"epoch": 1.18,
"learning_rate": 6.198836374494218e-05,
"loss": 2.7324,
"step": 40000
},
{
"epoch": 1.18,
"eval_loss": 2.7184464931488037,
"eval_runtime": 5041.2928,
"eval_samples_per_second": 11.935,
"eval_steps_per_second": 2.984,
"step": 40000
},
{
"epoch": 1.26,
"learning_rate": 6.192087786506709e-05,
"loss": 2.7345,
"step": 42500
},
{
"epoch": 1.33,
"learning_rate": 6.18492540023217e-05,
"loss": 2.7306,
"step": 45000
},
{
"epoch": 1.4,
"learning_rate": 6.177350181988941e-05,
"loss": 2.7291,
"step": 47500
},
{
"epoch": 1.48,
"learning_rate": 6.169363153792874e-05,
"loss": 2.7255,
"step": 50000
},
{
"epoch": 1.48,
"eval_loss": 2.7048535346984863,
"eval_runtime": 5061.5104,
"eval_samples_per_second": 11.888,
"eval_steps_per_second": 2.972,
"step": 50000
},
{
"epoch": 1.55,
"learning_rate": 6.160961950708177e-05,
"loss": 2.725,
"step": 52500
},
{
"epoch": 1.63,
"learning_rate": 6.152154427075951e-05,
"loss": 2.7212,
"step": 55000
},
{
"epoch": 1.7,
"learning_rate": 6.142938492793726e-05,
"loss": 2.7196,
"step": 57500
},
{
"epoch": 1.77,
"learning_rate": 6.133315391235702e-05,
"loss": 2.7169,
"step": 60000
},
{
"epoch": 1.77,
"eval_loss": 2.6941745281219482,
"eval_runtime": 5046.2528,
"eval_samples_per_second": 11.924,
"eval_steps_per_second": 2.981,
"step": 60000
},
{
"epoch": 1.85,
"learning_rate": 6.12328232645584e-05,
"loss": 2.7174,
"step": 62500
},
{
"epoch": 1.92,
"learning_rate": 6.112848678433687e-05,
"loss": 2.7153,
"step": 65000
},
{
"epoch": 1.99,
"learning_rate": 6.102011922724016e-05,
"loss": 2.7117,
"step": 67500
},
{
"epoch": 2.07,
"learning_rate": 6.090778098663474e-05,
"loss": 2.6718,
"step": 70000
},
{
"epoch": 2.07,
"eval_loss": 2.6850435733795166,
"eval_runtime": 5045.0709,
"eval_samples_per_second": 11.926,
"eval_steps_per_second": 2.982,
"step": 70000
},
{
"epoch": 2.14,
"learning_rate": 6.0791397277177804e-05,
"loss": 2.6687,
"step": 72500
},
{
"epoch": 2.22,
"learning_rate": 6.0671027969511556e-05,
"loss": 2.6724,
"step": 75000
},
{
"epoch": 2.29,
"learning_rate": 6.0546638755690396e-05,
"loss": 2.6703,
"step": 77500
},
{
"epoch": 2.36,
"learning_rate": 6.041839805391616e-05,
"loss": 2.6717,
"step": 80000
},
{
"epoch": 2.36,
"eval_loss": 2.67754864692688,
"eval_runtime": 5038.9811,
"eval_samples_per_second": 11.941,
"eval_steps_per_second": 2.985,
"step": 80000
},
{
"epoch": 2.44,
"learning_rate": 6.028617152972819e-05,
"loss": 2.6733,
"step": 82500
},
{
"epoch": 2.51,
"learning_rate": 6.0150027570214874e-05,
"loss": 2.6751,
"step": 85000
},
{
"epoch": 2.59,
"learning_rate": 6.000998454333341e-05,
"loss": 2.6738,
"step": 87500
},
{
"epoch": 2.66,
"learning_rate": 5.9866061343086405e-05,
"loss": 2.6747,
"step": 90000
},
{
"epoch": 2.66,
"eval_loss": 2.6707887649536133,
"eval_runtime": 5038.9349,
"eval_samples_per_second": 11.941,
"eval_steps_per_second": 2.985,
"step": 90000
},
{
"epoch": 2.73,
"learning_rate": 5.971821747960996e-05,
"loss": 2.676,
"step": 92500
},
{
"epoch": 2.81,
"learning_rate": 5.9566591173123494e-05,
"loss": 2.6739,
"step": 95000
},
{
"epoch": 2.88,
"learning_rate": 5.941120747883403e-05,
"loss": 2.6756,
"step": 97500
},
{
"epoch": 2.95,
"learning_rate": 5.925196295535967e-05,
"loss": 2.6714,
"step": 100000
},
{
"epoch": 2.95,
"eval_loss": 2.663831949234009,
"eval_runtime": 5038.524,
"eval_samples_per_second": 11.942,
"eval_steps_per_second": 2.986,
"step": 100000
}
],
"max_steps": 676900,
"num_train_epochs": 20,
"total_flos": 2.418000801633927e+18,
"trial_name": null,
"trial_params": null
}