CatGPT / trainer_state.json
baiges's picture
Upload folder using huggingface_hub
1c285c3 verified
raw
history blame contribute delete
No virus
3.94 kB
{
"best_metric": 1.3409229516983032,
"best_model_checkpoint": "results/checkpoint-2000",
"epoch": 1.4553392759687103,
"eval_steps": 250,
"global_step": 2000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.18191740949608878,
"grad_norm": 0.2860318124294281,
"learning_rate": 2.976310216494586e-05,
"loss": 1.4647,
"step": 250
},
{
"epoch": 0.18191740949608878,
"eval_loss": 1.352885127067566,
"eval_runtime": 13.6056,
"eval_samples_per_second": 163.241,
"eval_steps_per_second": 20.433,
"step": 250
},
{
"epoch": 0.36383481899217757,
"grad_norm": 0.2909524440765381,
"learning_rate": 2.8342402320213494e-05,
"loss": 1.4643,
"step": 500
},
{
"epoch": 0.36383481899217757,
"eval_loss": 1.3520773649215698,
"eval_runtime": 13.6233,
"eval_samples_per_second": 163.029,
"eval_steps_per_second": 20.406,
"step": 500
},
{
"epoch": 0.5457522284882663,
"grad_norm": 0.2827763557434082,
"learning_rate": 2.5756526053283042e-05,
"loss": 1.4622,
"step": 750
},
{
"epoch": 0.5457522284882663,
"eval_loss": 1.350306749343872,
"eval_runtime": 13.6314,
"eval_samples_per_second": 162.932,
"eval_steps_per_second": 20.394,
"step": 750
},
{
"epoch": 0.7276696379843551,
"grad_norm": 0.29242751002311707,
"learning_rate": 2.223129492047081e-05,
"loss": 1.4625,
"step": 1000
},
{
"epoch": 0.7276696379843551,
"eval_loss": 1.3474653959274292,
"eval_runtime": 13.6357,
"eval_samples_per_second": 162.882,
"eval_steps_per_second": 20.388,
"step": 1000
},
{
"epoch": 0.9095870474804438,
"grad_norm": 0.2826482653617859,
"learning_rate": 1.8074563242173716e-05,
"loss": 1.4591,
"step": 1250
},
{
"epoch": 0.9095870474804438,
"eval_loss": 1.3454625606536865,
"eval_runtime": 13.6252,
"eval_samples_per_second": 163.006,
"eval_steps_per_second": 20.403,
"step": 1250
},
{
"epoch": 1.0915044569765326,
"grad_norm": 0.28281259536743164,
"learning_rate": 1.3649333544377501e-05,
"loss": 1.4496,
"step": 1500
},
{
"epoch": 1.0915044569765326,
"eval_loss": 1.3437364101409912,
"eval_runtime": 13.6378,
"eval_samples_per_second": 162.856,
"eval_steps_per_second": 20.384,
"step": 1500
},
{
"epoch": 1.2734218664726216,
"grad_norm": 0.2805255651473999,
"learning_rate": 9.34205597173652e-06,
"loss": 1.4453,
"step": 1750
},
{
"epoch": 1.2734218664726216,
"eval_loss": 1.3426544666290283,
"eval_runtime": 13.6433,
"eval_samples_per_second": 162.791,
"eval_steps_per_second": 20.376,
"step": 1750
},
{
"epoch": 1.4553392759687103,
"grad_norm": 0.2710939645767212,
"learning_rate": 5.528880047481714e-06,
"loss": 1.4438,
"step": 2000
},
{
"epoch": 1.4553392759687103,
"eval_loss": 1.3409229516983032,
"eval_runtime": 13.6488,
"eval_samples_per_second": 162.725,
"eval_steps_per_second": 20.368,
"step": 2000
}
],
"logging_steps": 250,
"max_steps": 2748,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.67219584303104e+17,
"train_batch_size": 40,
"trial_name": null,
"trial_params": null
}