Llama-2-8k-2m-rethink / trainer_state.json
ccore's picture
Upload folder using huggingface_hub
aa406ec
raw
history blame
4.63 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.935064935064935,
"eval_steps": 500,
"global_step": 190,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.13,
"learning_rate": 0.0001,
"loss": 2.0038,
"step": 5
},
{
"epoch": 0.26,
"learning_rate": 0.0001,
"loss": 1.9796,
"step": 10
},
{
"epoch": 0.39,
"learning_rate": 0.0001,
"loss": 1.9617,
"step": 15
},
{
"epoch": 0.52,
"learning_rate": 0.0001,
"loss": 2.0267,
"step": 20
},
{
"epoch": 0.65,
"learning_rate": 0.0001,
"loss": 1.9767,
"step": 25
},
{
"epoch": 0.78,
"learning_rate": 0.0001,
"loss": 1.9649,
"step": 30
},
{
"epoch": 0.91,
"learning_rate": 0.0001,
"loss": 2.0785,
"step": 35
},
{
"epoch": 1.04,
"learning_rate": 0.0001,
"loss": 1.8929,
"step": 40
},
{
"epoch": 1.17,
"learning_rate": 0.0001,
"loss": 1.9942,
"step": 45
},
{
"epoch": 1.3,
"learning_rate": 0.0001,
"loss": 1.8956,
"step": 50
},
{
"epoch": 1.43,
"learning_rate": 0.0001,
"loss": 1.8971,
"step": 55
},
{
"epoch": 1.56,
"learning_rate": 0.0001,
"loss": 1.8397,
"step": 60
},
{
"epoch": 1.69,
"learning_rate": 0.0001,
"loss": 1.9135,
"step": 65
},
{
"epoch": 1.82,
"learning_rate": 0.0001,
"loss": 1.8782,
"step": 70
},
{
"epoch": 1.95,
"learning_rate": 0.0001,
"loss": 1.8756,
"step": 75
},
{
"epoch": 2.08,
"learning_rate": 0.0001,
"loss": 1.887,
"step": 80
},
{
"epoch": 2.21,
"learning_rate": 0.0001,
"loss": 1.8408,
"step": 85
},
{
"epoch": 2.34,
"learning_rate": 0.0001,
"loss": 1.8352,
"step": 90
},
{
"epoch": 2.47,
"learning_rate": 0.0001,
"loss": 1.8403,
"step": 95
},
{
"epoch": 2.6,
"learning_rate": 0.0001,
"loss": 1.8519,
"step": 100
},
{
"epoch": 2.73,
"learning_rate": 0.0001,
"loss": 1.8683,
"step": 105
},
{
"epoch": 2.86,
"learning_rate": 0.0001,
"loss": 1.8082,
"step": 110
},
{
"epoch": 2.99,
"learning_rate": 0.0001,
"loss": 1.811,
"step": 115
},
{
"epoch": 3.12,
"learning_rate": 0.0001,
"loss": 1.8451,
"step": 120
},
{
"epoch": 3.25,
"learning_rate": 0.0001,
"loss": 1.7562,
"step": 125
},
{
"epoch": 3.38,
"learning_rate": 0.0001,
"loss": 1.8435,
"step": 130
},
{
"epoch": 3.51,
"learning_rate": 0.0001,
"loss": 1.7416,
"step": 135
},
{
"epoch": 3.64,
"learning_rate": 0.0001,
"loss": 1.8242,
"step": 140
},
{
"epoch": 3.77,
"learning_rate": 0.0001,
"loss": 1.7191,
"step": 145
},
{
"epoch": 3.9,
"learning_rate": 0.0001,
"loss": 1.8372,
"step": 150
},
{
"epoch": 4.03,
"learning_rate": 0.0001,
"loss": 1.6897,
"step": 155
},
{
"epoch": 4.16,
"learning_rate": 0.0001,
"loss": 1.7764,
"step": 160
},
{
"epoch": 4.29,
"learning_rate": 0.0001,
"loss": 1.7044,
"step": 165
},
{
"epoch": 4.42,
"learning_rate": 0.0001,
"loss": 1.6948,
"step": 170
},
{
"epoch": 4.55,
"learning_rate": 0.0001,
"loss": 1.7268,
"step": 175
},
{
"epoch": 4.68,
"learning_rate": 0.0001,
"loss": 1.7703,
"step": 180
},
{
"epoch": 4.81,
"learning_rate": 0.0001,
"loss": 1.7836,
"step": 185
},
{
"epoch": 4.94,
"learning_rate": 0.0001,
"loss": 1.7871,
"step": 190
},
{
"epoch": 4.94,
"step": 190,
"total_flos": 697572311040000.0,
"train_loss": 1.8531885046707957,
"train_runtime": 316.8119,
"train_samples_per_second": 4.861,
"train_steps_per_second": 0.6
}
],
"logging_steps": 5,
"max_steps": 190,
"num_train_epochs": 5,
"save_steps": 1000,
"total_flos": 697572311040000.0,
"trial_name": null,
"trial_params": null
}