Granite / trainer_state.json
MohamedIFQ's picture
Upload 8 files
36b1dbe verified
{
"best_metric": 0.348395973443985,
"best_model_checkpoint": "plantuml-code-generation/checkpoint-300",
"epoch": 6.70391061452514,
"eval_steps": 50,
"global_step": 300,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.44692737430167595,
"grad_norm": 6.86284875869751,
"learning_rate": 8e-05,
"loss": 6.1395,
"step": 20
},
{
"epoch": 0.8938547486033519,
"grad_norm": 0.13778135180473328,
"learning_rate": 0.00016,
"loss": 1.2405,
"step": 40
},
{
"epoch": 1.1173184357541899,
"eval_loss": 0.6436046361923218,
"eval_runtime": 57.1725,
"eval_samples_per_second": 3.148,
"eval_steps_per_second": 0.402,
"step": 50
},
{
"epoch": 1.3407821229050279,
"grad_norm": 0.07968226820230484,
"learning_rate": 0.000192,
"loss": 0.6601,
"step": 60
},
{
"epoch": 1.7877094972067038,
"grad_norm": 0.0890984982252121,
"learning_rate": 0.00017600000000000002,
"loss": 0.5855,
"step": 80
},
{
"epoch": 2.2346368715083798,
"grad_norm": 0.09667715430259705,
"learning_rate": 0.00016,
"loss": 0.5319,
"step": 100
},
{
"epoch": 2.2346368715083798,
"eval_loss": 0.48206308484077454,
"eval_runtime": 57.1888,
"eval_samples_per_second": 3.147,
"eval_steps_per_second": 0.402,
"step": 100
},
{
"epoch": 2.6815642458100557,
"grad_norm": 0.12260276824235916,
"learning_rate": 0.000144,
"loss": 0.4477,
"step": 120
},
{
"epoch": 3.1284916201117317,
"grad_norm": 0.12728622555732727,
"learning_rate": 0.00012800000000000002,
"loss": 0.4241,
"step": 140
},
{
"epoch": 3.35195530726257,
"eval_loss": 0.4002044200897217,
"eval_runtime": 57.5487,
"eval_samples_per_second": 3.128,
"eval_steps_per_second": 0.4,
"step": 150
},
{
"epoch": 3.5754189944134076,
"grad_norm": 0.14667275547981262,
"learning_rate": 0.00011200000000000001,
"loss": 0.381,
"step": 160
},
{
"epoch": 4.022346368715084,
"grad_norm": 0.14442357420921326,
"learning_rate": 9.6e-05,
"loss": 0.3836,
"step": 180
},
{
"epoch": 4.4692737430167595,
"grad_norm": 0.16686952114105225,
"learning_rate": 8e-05,
"loss": 0.3524,
"step": 200
},
{
"epoch": 4.4692737430167595,
"eval_loss": 0.36753496527671814,
"eval_runtime": 57.537,
"eval_samples_per_second": 3.128,
"eval_steps_per_second": 0.4,
"step": 200
},
{
"epoch": 4.916201117318436,
"grad_norm": 0.1686471551656723,
"learning_rate": 6.400000000000001e-05,
"loss": 0.3484,
"step": 220
},
{
"epoch": 5.363128491620111,
"grad_norm": 0.18261033296585083,
"learning_rate": 4.8e-05,
"loss": 0.3547,
"step": 240
},
{
"epoch": 5.58659217877095,
"eval_loss": 0.35252371430397034,
"eval_runtime": 57.4948,
"eval_samples_per_second": 3.131,
"eval_steps_per_second": 0.4,
"step": 250
},
{
"epoch": 5.810055865921788,
"grad_norm": 0.1722261905670166,
"learning_rate": 3.2000000000000005e-05,
"loss": 0.3282,
"step": 260
},
{
"epoch": 6.256983240223463,
"grad_norm": 0.1778961718082428,
"learning_rate": 1.6000000000000003e-05,
"loss": 0.3403,
"step": 280
},
{
"epoch": 6.70391061452514,
"grad_norm": 0.18417717516422272,
"learning_rate": 0.0,
"loss": 0.3202,
"step": 300
},
{
"epoch": 6.70391061452514,
"eval_loss": 0.348395973443985,
"eval_runtime": 57.5812,
"eval_samples_per_second": 3.126,
"eval_steps_per_second": 0.399,
"step": 300
}
],
"logging_steps": 20,
"max_steps": 300,
"num_input_tokens_seen": 0,
"num_train_epochs": 7,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 4.9650799214592e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}