SLM_vs_LLM_experiments
/
max_seq_length_128_experiments
/distilbert
/distilbert_base_uncased_twitter
/train_results.json
{ | |
"epoch": 3.0, | |
"train_loss": 0.44109968811857936, | |
"train_runtime": 52.5835, | |
"train_samples": 8700, | |
"train_samples_per_second": 496.354, | |
"train_steps_per_second": 7.759 | |
} |