SLM_vs_LLM_experiments
/
max_seq_length_128_experiments
/Qwen
/Qwen1.5_1.8B_twitter
/all_results.json
{ | |
"epoch": 3.0, | |
"eval_accuracy": 0.7775735294117647, | |
"eval_f1_macro": 0.7420002194942226, | |
"eval_f1_micro": 0.7775735294117647, | |
"eval_loss": 0.5038631558418274, | |
"eval_runtime": 2.7898, | |
"eval_samples": 1088, | |
"eval_samples_per_second": 389.992, | |
"eval_steps_per_second": 12.187, | |
"test_accuracy": 0.7693014705882353, | |
"test_f1_macro": 0.73261662432013, | |
"test_f1_micro": 0.7693014705882353, | |
"test_loss": 0.5096076726913452, | |
"test_runtime": 2.8449, | |
"test_samples_per_second": 382.439, | |
"test_steps_per_second": 11.951, | |
"train_loss": 0.3625207788803998, | |
"train_runtime": 885.2091, | |
"train_samples": 8700, | |
"train_samples_per_second": 29.485, | |
"train_steps_per_second": 0.922 | |
} |