{
    "eval_samples": 1233,
    "test_false_f1": 0.7841191066997518,
    "test_loss": 0.672432005405426,
    "test_macro_f1": 0.6707393317114294,
    "test_micro_f1": 0.7907542579075426,
    "test_mixture_f1": 0.5116279069767442,
    "test_runtime": 40.6849,
    "test_samples_per_second": 30.306,
    "test_steps_per_second": 0.959,
    "test_true_f1": 0.9061976549413736,
    "test_unproven_f1": 0.48101265822784806
}