|
{ |
|
"best_metric": 0.5809483528137207, |
|
"best_model_checkpoint": "neurips-bert-combined1/checkpoint-100", |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 3.577862501144409, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.6737, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 5.035987377166748, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 0.6841, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 3.1154258251190186, |
|
"learning_rate": 2e-05, |
|
"loss": 0.7756, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 10.06010627746582, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 0.7759, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 7.218173503875732, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.7774, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 9.524176597595215, |
|
"learning_rate": 4e-05, |
|
"loss": 0.74, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 3.1684250831604004, |
|
"learning_rate": 4.666666666666667e-05, |
|
"loss": 0.6328, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 9.243531227111816, |
|
"learning_rate": 4.962962962962963e-05, |
|
"loss": 0.6749, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 15.164690971374512, |
|
"learning_rate": 4.888888888888889e-05, |
|
"loss": 0.7968, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 4.498668193817139, |
|
"learning_rate": 4.814814814814815e-05, |
|
"loss": 0.7803, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 10.785091400146484, |
|
"learning_rate": 4.740740740740741e-05, |
|
"loss": 0.6634, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 4.868218421936035, |
|
"learning_rate": 4.666666666666667e-05, |
|
"loss": 0.6974, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 6.348710060119629, |
|
"learning_rate": 4.592592592592593e-05, |
|
"loss": 0.6492, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 6.30601692199707, |
|
"learning_rate": 4.518518518518519e-05, |
|
"loss": 0.7361, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 6.230818748474121, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 0.6889, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 7.42860221862793, |
|
"learning_rate": 4.3703703703703705e-05, |
|
"loss": 0.6431, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 2.7333102226257324, |
|
"learning_rate": 4.296296296296296e-05, |
|
"loss": 0.7148, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 3.3061561584472656, |
|
"learning_rate": 4.222222222222222e-05, |
|
"loss": 0.6978, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 3.080080032348633, |
|
"learning_rate": 4.148148148148148e-05, |
|
"loss": 0.7332, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 2.468094825744629, |
|
"learning_rate": 4.074074074074074e-05, |
|
"loss": 0.6837, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 4.740555286407471, |
|
"learning_rate": 4e-05, |
|
"loss": 0.7024, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 4.630310535430908, |
|
"learning_rate": 3.925925925925926e-05, |
|
"loss": 0.6951, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 2.2872023582458496, |
|
"learning_rate": 3.851851851851852e-05, |
|
"loss": 0.6575, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 3.9264609813690186, |
|
"learning_rate": 3.777777777777778e-05, |
|
"loss": 0.6714, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 3.6678338050842285, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 0.6892, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.51, |
|
"eval_auc": 0.7687999999999999, |
|
"eval_f1": 0.6711409395973155, |
|
"eval_loss": 0.6739338636398315, |
|
"eval_precision": 0.5050505050505051, |
|
"eval_recall": 1.0, |
|
"eval_runtime": 16.1008, |
|
"eval_samples_per_second": 6.211, |
|
"eval_steps_per_second": 0.435, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 4.582466125488281, |
|
"learning_rate": 3.62962962962963e-05, |
|
"loss": 0.6436, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 6.386196613311768, |
|
"learning_rate": 3.555555555555556e-05, |
|
"loss": 0.6901, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 3.274430751800537, |
|
"learning_rate": 3.481481481481482e-05, |
|
"loss": 0.6233, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 8.776232719421387, |
|
"learning_rate": 3.4074074074074077e-05, |
|
"loss": 0.6726, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 8.833818435668945, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.6303, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 7.453307628631592, |
|
"learning_rate": 3.25925925925926e-05, |
|
"loss": 0.6011, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 2.839207887649536, |
|
"learning_rate": 3.185185185185185e-05, |
|
"loss": 0.6144, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 5.08197546005249, |
|
"learning_rate": 3.111111111111111e-05, |
|
"loss": 0.716, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 1.3599999999999999, |
|
"grad_norm": 7.713846206665039, |
|
"learning_rate": 3.037037037037037e-05, |
|
"loss": 0.7857, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 5.370303153991699, |
|
"learning_rate": 2.962962962962963e-05, |
|
"loss": 0.5537, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 4.6746296882629395, |
|
"learning_rate": 2.8888888888888888e-05, |
|
"loss": 0.5636, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 5.238672733306885, |
|
"learning_rate": 2.814814814814815e-05, |
|
"loss": 0.5311, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 4.931227207183838, |
|
"learning_rate": 2.7407407407407408e-05, |
|
"loss": 0.5448, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 13.78615665435791, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 0.6089, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 4.783405303955078, |
|
"learning_rate": 2.5925925925925925e-05, |
|
"loss": 0.422, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.6400000000000001, |
|
"grad_norm": 10.756999969482422, |
|
"learning_rate": 2.5185185185185183e-05, |
|
"loss": 0.5627, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 1.6800000000000002, |
|
"grad_norm": 8.377484321594238, |
|
"learning_rate": 2.4444444444444445e-05, |
|
"loss": 0.6638, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 6.980154991149902, |
|
"learning_rate": 2.3703703703703707e-05, |
|
"loss": 0.7009, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 5.8498334884643555, |
|
"learning_rate": 2.2962962962962965e-05, |
|
"loss": 0.419, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 7.8378520011901855, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.4252, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.8399999999999999, |
|
"grad_norm": 8.54397201538086, |
|
"learning_rate": 2.148148148148148e-05, |
|
"loss": 0.425, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 9.064745903015137, |
|
"learning_rate": 2.074074074074074e-05, |
|
"loss": 0.5294, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 10.862845420837402, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9176, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 6.8356475830078125, |
|
"learning_rate": 1.925925925925926e-05, |
|
"loss": 0.433, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 4.982166290283203, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.5614, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.69, |
|
"eval_auc": 0.8136, |
|
"eval_f1": 0.6265060240963856, |
|
"eval_loss": 0.5809483528137207, |
|
"eval_precision": 0.7878787878787878, |
|
"eval_recall": 0.52, |
|
"eval_runtime": 16.0696, |
|
"eval_samples_per_second": 6.223, |
|
"eval_steps_per_second": 0.436, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 150, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 52622211072000.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|