|
{ |
|
"best_metric": 0.6791029572486877, |
|
"best_model_checkpoint": "./results/checkpoint-342", |
|
"epoch": 4.0, |
|
"eval_steps": 500, |
|
"global_step": 684, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7781569965870307, |
|
"eval_confusion_matrix": [ |
|
[ |
|
58, |
|
0, |
|
1, |
|
1, |
|
10, |
|
9, |
|
1 |
|
], |
|
[ |
|
2, |
|
81, |
|
0, |
|
0, |
|
4, |
|
2, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
87, |
|
0, |
|
2, |
|
6, |
|
0 |
|
], |
|
[ |
|
1, |
|
0, |
|
0, |
|
67, |
|
5, |
|
2, |
|
0 |
|
], |
|
[ |
|
1, |
|
0, |
|
3, |
|
4, |
|
69, |
|
6, |
|
4 |
|
], |
|
[ |
|
1, |
|
0, |
|
6, |
|
1, |
|
12, |
|
56, |
|
0 |
|
], |
|
[ |
|
0, |
|
0, |
|
0, |
|
4, |
|
31, |
|
11, |
|
38 |
|
] |
|
], |
|
"eval_f1": 0.7777605036882297, |
|
"eval_loss": 0.6863528490066528, |
|
"eval_precision": 0.8141265117353299, |
|
"eval_recall": 0.7752230960697254, |
|
"eval_runtime": 414.2099, |
|
"eval_samples_per_second": 1.415, |
|
"eval_steps_per_second": 0.179, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8003412969283277, |
|
"eval_confusion_matrix": [ |
|
[ |
|
72, |
|
0, |
|
0, |
|
1, |
|
4, |
|
2, |
|
1 |
|
], |
|
[ |
|
2, |
|
84, |
|
0, |
|
0, |
|
0, |
|
0, |
|
3 |
|
], |
|
[ |
|
2, |
|
0, |
|
86, |
|
0, |
|
3, |
|
4, |
|
0 |
|
], |
|
[ |
|
7, |
|
0, |
|
0, |
|
65, |
|
2, |
|
1, |
|
0 |
|
], |
|
[ |
|
10, |
|
1, |
|
2, |
|
2, |
|
53, |
|
8, |
|
11 |
|
], |
|
[ |
|
5, |
|
0, |
|
10, |
|
1, |
|
9, |
|
51, |
|
0 |
|
], |
|
[ |
|
1, |
|
0, |
|
0, |
|
3, |
|
17, |
|
5, |
|
58 |
|
] |
|
], |
|
"eval_f1": 0.7977110437689846, |
|
"eval_loss": 0.6791029572486877, |
|
"eval_precision": 0.8015628505641682, |
|
"eval_recall": 0.7980677533763562, |
|
"eval_runtime": 417.5215, |
|
"eval_samples_per_second": 1.404, |
|
"eval_steps_per_second": 0.177, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"grad_norm": 0.1340806484222412, |
|
"learning_rate": 5.3801169590643275e-05, |
|
"loss": 0.3724, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8225255972696246, |
|
"eval_confusion_matrix": [ |
|
[ |
|
72, |
|
0, |
|
1, |
|
0, |
|
3, |
|
3, |
|
1 |
|
], |
|
[ |
|
1, |
|
81, |
|
1, |
|
0, |
|
2, |
|
0, |
|
4 |
|
], |
|
[ |
|
1, |
|
0, |
|
86, |
|
0, |
|
0, |
|
8, |
|
0 |
|
], |
|
[ |
|
5, |
|
0, |
|
0, |
|
67, |
|
1, |
|
2, |
|
0 |
|
], |
|
[ |
|
10, |
|
0, |
|
3, |
|
4, |
|
47, |
|
5, |
|
18 |
|
], |
|
[ |
|
1, |
|
0, |
|
5, |
|
1, |
|
1, |
|
67, |
|
1 |
|
], |
|
[ |
|
2, |
|
0, |
|
1, |
|
3, |
|
11, |
|
5, |
|
62 |
|
] |
|
], |
|
"eval_f1": 0.819196982637564, |
|
"eval_loss": 0.805719256401062, |
|
"eval_precision": 0.821570223887049, |
|
"eval_recall": 0.8240875601856804, |
|
"eval_runtime": 415.3032, |
|
"eval_samples_per_second": 1.411, |
|
"eval_steps_per_second": 0.178, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8242320819112628, |
|
"eval_confusion_matrix": [ |
|
[ |
|
71, |
|
0, |
|
0, |
|
1, |
|
3, |
|
4, |
|
1 |
|
], |
|
[ |
|
1, |
|
82, |
|
1, |
|
0, |
|
2, |
|
1, |
|
2 |
|
], |
|
[ |
|
1, |
|
0, |
|
85, |
|
0, |
|
1, |
|
8, |
|
0 |
|
], |
|
[ |
|
4, |
|
0, |
|
0, |
|
67, |
|
3, |
|
0, |
|
1 |
|
], |
|
[ |
|
5, |
|
0, |
|
2, |
|
6, |
|
50, |
|
4, |
|
20 |
|
], |
|
[ |
|
1, |
|
0, |
|
5, |
|
1, |
|
2, |
|
66, |
|
1 |
|
], |
|
[ |
|
1, |
|
0, |
|
0, |
|
3, |
|
15, |
|
3, |
|
62 |
|
] |
|
], |
|
"eval_f1": 0.8226354635422165, |
|
"eval_loss": 0.7115229368209839, |
|
"eval_precision": 0.8223101749006074, |
|
"eval_recall": 0.8254496320643309, |
|
"eval_runtime": 409.1206, |
|
"eval_samples_per_second": 1.432, |
|
"eval_steps_per_second": 0.181, |
|
"step": 684 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 684, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 500, |
|
"total_flos": 4.234352059828961e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|