|
{ |
|
"best_metric": NaN, |
|
"best_model_checkpoint": "autotrain-hu2ir-jotv8/checkpoint-826", |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 826, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 12.926700592041016, |
|
"learning_rate": 7.459677419354839e-06, |
|
"loss": 6.1713, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 5.053235054016113, |
|
"learning_rate": 1.5524193548387098e-05, |
|
"loss": 2.7995, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.710062026977539, |
|
"learning_rate": 2.3790322580645163e-05, |
|
"loss": 1.076, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 3.625478982925415, |
|
"learning_rate": 3.205645161290323e-05, |
|
"loss": 0.9209, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.7764992713928223, |
|
"learning_rate": 4.032258064516129e-05, |
|
"loss": 0.8778, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 1.6964770555496216, |
|
"learning_rate": 4.858870967741936e-05, |
|
"loss": 0.8273, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.7574260234832764, |
|
"learning_rate": 4.923766816143498e-05, |
|
"loss": 0.8135, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.3349592685699463, |
|
"learning_rate": 4.831838565022422e-05, |
|
"loss": 0.8023, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.5791077613830566, |
|
"learning_rate": 4.769058295964125e-05, |
|
"loss": 0.7892, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": NaN, |
|
"learning_rate": 4.76457399103139e-05, |
|
"loss": 0.0972, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": NaN, |
|
"learning_rate": 4.760089686098655e-05, |
|
"loss": 0.1088, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": NaN, |
|
"learning_rate": 4.757847533632287e-05, |
|
"loss": 0.0467, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": NaN, |
|
"learning_rate": 4.757847533632287e-05, |
|
"loss": 0.0, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": NaN, |
|
"learning_rate": 4.757847533632287e-05, |
|
"loss": 0.0, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": NaN, |
|
"learning_rate": 4.755605381165919e-05, |
|
"loss": 0.1944, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": NaN, |
|
"learning_rate": 4.755605381165919e-05, |
|
"loss": 0.0, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": NaN, |
|
"learning_rate": 4.755605381165919e-05, |
|
"loss": 0.0, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": NaN, |
|
"learning_rate": 4.755605381165919e-05, |
|
"loss": 0.0, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": NaN, |
|
"learning_rate": 4.755605381165919e-05, |
|
"loss": 0.0, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": NaN, |
|
"learning_rate": 4.755605381165919e-05, |
|
"loss": 0.0, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": NaN, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 57.261, |
|
"eval_samples_per_second": 28.833, |
|
"eval_steps_per_second": 1.816, |
|
"step": 826 |
|
} |
|
], |
|
"logging_steps": 41, |
|
"max_steps": 2478, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 1005084998369280.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|