SLM_vs_LLM_experiments
/
distilbert
/distilbert_base_uncased_twitter
/checkpoint-100
/trainer_state.json
{ | |
"best_metric": 0.48642775416374207, | |
"best_model_checkpoint": "../../experiments_checkpoints/MAdAiLab/distilbert/distilbert_base_uncased_twitter/checkpoint-100", | |
"epoch": 0.7352941176470589, | |
"eval_steps": 50, | |
"global_step": 100, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.07, | |
"grad_norm": 0.9765774607658386, | |
"learning_rate": 1.950980392156863e-05, | |
"loss": 0.6187, | |
"step": 10 | |
}, | |
{ | |
"epoch": 0.15, | |
"grad_norm": 1.5788111686706543, | |
"learning_rate": 1.9019607843137255e-05, | |
"loss": 0.5843, | |
"step": 20 | |
}, | |
{ | |
"epoch": 0.22, | |
"grad_norm": 3.5568511486053467, | |
"learning_rate": 1.8529411764705884e-05, | |
"loss": 0.5014, | |
"step": 30 | |
}, | |
{ | |
"epoch": 0.29, | |
"grad_norm": 1.6089941263198853, | |
"learning_rate": 1.8039215686274513e-05, | |
"loss": 0.4959, | |
"step": 40 | |
}, | |
{ | |
"epoch": 0.37, | |
"grad_norm": 2.2181787490844727, | |
"learning_rate": 1.7549019607843138e-05, | |
"loss": 0.4671, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.37, | |
"eval_accuracy": 0.7665441176470589, | |
"eval_f1_macro": 0.7213912692811775, | |
"eval_f1_micro": 0.7665441176470589, | |
"eval_loss": 0.4990096390247345, | |
"eval_runtime": 0.5393, | |
"eval_samples_per_second": 2017.389, | |
"eval_steps_per_second": 31.522, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.44, | |
"grad_norm": 1.884325623512268, | |
"learning_rate": 1.7058823529411767e-05, | |
"loss": 0.49, | |
"step": 60 | |
}, | |
{ | |
"epoch": 0.51, | |
"grad_norm": 2.515151262283325, | |
"learning_rate": 1.6568627450980395e-05, | |
"loss": 0.468, | |
"step": 70 | |
}, | |
{ | |
"epoch": 0.59, | |
"grad_norm": 1.46433424949646, | |
"learning_rate": 1.607843137254902e-05, | |
"loss": 0.4629, | |
"step": 80 | |
}, | |
{ | |
"epoch": 0.66, | |
"grad_norm": 3.789057731628418, | |
"learning_rate": 1.558823529411765e-05, | |
"loss": 0.4995, | |
"step": 90 | |
}, | |
{ | |
"epoch": 0.74, | |
"grad_norm": 1.6152119636535645, | |
"learning_rate": 1.5098039215686276e-05, | |
"loss": 0.4724, | |
"step": 100 | |
}, | |
{ | |
"epoch": 0.74, | |
"eval_accuracy": 0.7665441176470589, | |
"eval_f1_macro": 0.7129836193609764, | |
"eval_f1_micro": 0.7665441176470589, | |
"eval_loss": 0.48642775416374207, | |
"eval_runtime": 0.5851, | |
"eval_samples_per_second": 1859.58, | |
"eval_steps_per_second": 29.056, | |
"step": 100 | |
} | |
], | |
"logging_steps": 10, | |
"max_steps": 408, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 3, | |
"save_steps": 50, | |
"total_flos": 211947828019200.0, | |
"train_batch_size": 32, | |
"trial_name": null, | |
"trial_params": null | |
} | |