Marcio Lima Inácio
Add trained models
bd5119e
{
"best_metric": 0.11806972324848175,
"best_model_checkpoint": "results/Gloria_all/GlorIA-1.3B/checkpoint-100",
"epoch": 3.592814371257485,
"eval_steps": 100,
"global_step": 300,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.5988023952095808,
"grad_norm": 0.9138351082801819,
"learning_rate": 2.627329192546584e-05,
"loss": 0.1669,
"step": 50
},
{
"epoch": 1.1976047904191618,
"grad_norm": 0.5801578760147095,
"learning_rate": 2.1614906832298137e-05,
"loss": 0.1129,
"step": 100
},
{
"epoch": 1.1976047904191618,
"eval_accuracy": 0.9447699788056352,
"eval_f1": 0.4304367402071139,
"eval_loss": 0.11806972324848175,
"eval_precision": 0.4337568058076225,
"eval_recall": 0.42716711349419123,
"eval_runtime": 9.8143,
"eval_samples_per_second": 58.079,
"eval_steps_per_second": 2.445,
"step": 100
},
{
"epoch": 1.7964071856287425,
"grad_norm": 0.3607296645641327,
"learning_rate": 1.6956521739130433e-05,
"loss": 0.0915,
"step": 150
},
{
"epoch": 2.3952095808383236,
"grad_norm": 0.4452211260795593,
"learning_rate": 1.2298136645962733e-05,
"loss": 0.063,
"step": 200
},
{
"epoch": 2.3952095808383236,
"eval_accuracy": 0.9409051240493704,
"eval_f1": 0.40241075567918405,
"eval_loss": 0.18597252666950226,
"eval_precision": 0.41811175337186895,
"eval_recall": 0.387846291331546,
"eval_runtime": 9.9454,
"eval_samples_per_second": 57.313,
"eval_steps_per_second": 2.413,
"step": 200
},
{
"epoch": 2.9940119760479043,
"grad_norm": 0.5937349200248718,
"learning_rate": 7.63975155279503e-06,
"loss": 0.0551,
"step": 250
},
{
"epoch": 3.592814371257485,
"grad_norm": 0.6216064691543579,
"learning_rate": 2.981366459627329e-06,
"loss": 0.0292,
"step": 300
},
{
"epoch": 3.592814371257485,
"eval_accuracy": 0.940157087644932,
"eval_f1": 0.4222124283825474,
"eval_loss": 0.20892249047756195,
"eval_precision": 0.4165217391304348,
"eval_recall": 0.4280607685433423,
"eval_runtime": 9.707,
"eval_samples_per_second": 58.721,
"eval_steps_per_second": 2.472,
"step": 300
}
],
"logging_steps": 50,
"max_steps": 332,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 100,
"total_flos": 3032048467579320.0,
"train_batch_size": 24,
"trial_name": null,
"trial_params": null
}