{ "eval_samples": 1233, "test_false_f1": 0.7841191066997518, "test_loss": 0.672432005405426, "test_macro_f1": 0.6707393317114294, "test_micro_f1": 0.7907542579075426, "test_mixture_f1": 0.5116279069767442, "test_runtime": 40.6849, "test_samples_per_second": 30.306, "test_steps_per_second": 0.959, "test_true_f1": 0.9061976549413736, "test_unproven_f1": 0.48101265822784806 }