|
{ |
|
"best_metric": 0.5166051660516605, |
|
"best_model_checkpoint": "RuleBert-v0.2-k0/checkpoint-1500", |
|
"epoch": 0.49382716049382713, |
|
"eval_steps": 250, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.937500000000001e-06, |
|
"loss": 0.5636, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.875000000000001e-06, |
|
"loss": 0.3677, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.8125e-06, |
|
"loss": 0.3585, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.75e-06, |
|
"loss": 0.3372, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.6875e-06, |
|
"loss": 0.3226, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_accuracy": 0.06666666666666667, |
|
"eval_f1": 0.5180722891566265, |
|
"eval_loss": 0.36232367157936096, |
|
"eval_roc_auc": 0.6758306628436499, |
|
"eval_runtime": 1.6645, |
|
"eval_samples_per_second": 90.118, |
|
"eval_steps_per_second": 1.802, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.625e-06, |
|
"loss": 0.3323, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.562500000000002e-06, |
|
"loss": 0.3451, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.5e-06, |
|
"loss": 0.3317, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.4375e-06, |
|
"loss": 0.3297, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.375000000000001e-06, |
|
"loss": 0.3287, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_accuracy": 0.0, |
|
"eval_f1": 0.49723756906077354, |
|
"eval_loss": 0.3665144741535187, |
|
"eval_roc_auc": 0.6720357564513407, |
|
"eval_runtime": 1.6779, |
|
"eval_samples_per_second": 89.396, |
|
"eval_steps_per_second": 1.788, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.312500000000001e-06, |
|
"loss": 0.3359, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.25125e-06, |
|
"loss": 0.3331, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.18875e-06, |
|
"loss": 0.3374, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.126250000000001e-06, |
|
"loss": 0.3367, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.065e-06, |
|
"loss": 0.3195, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_accuracy": 0.04, |
|
"eval_f1": 0.5126993502658004, |
|
"eval_loss": 0.3585328757762909, |
|
"eval_roc_auc": 0.6747933884297521, |
|
"eval_runtime": 1.6851, |
|
"eval_samples_per_second": 89.013, |
|
"eval_steps_per_second": 1.78, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.0025e-06, |
|
"loss": 0.3226, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 8.94e-06, |
|
"loss": 0.3435, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 8.87875e-06, |
|
"loss": 0.3213, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 8.817500000000002e-06, |
|
"loss": 0.3313, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 8.755e-06, |
|
"loss": 0.3241, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_accuracy": 0.03333333333333333, |
|
"eval_f1": 0.5134189031505251, |
|
"eval_loss": 0.3658309876918793, |
|
"eval_roc_auc": 0.6758812615955473, |
|
"eval_runtime": 1.6782, |
|
"eval_samples_per_second": 89.383, |
|
"eval_steps_per_second": 1.788, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 8.6925e-06, |
|
"loss": 0.3257, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 8.63e-06, |
|
"loss": 0.3288, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 8.5675e-06, |
|
"loss": 0.3331, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 8.505e-06, |
|
"loss": 0.3529, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.442500000000001e-06, |
|
"loss": 0.3321, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_accuracy": 0.0, |
|
"eval_f1": 0.49723756906077354, |
|
"eval_loss": 0.3737262487411499, |
|
"eval_roc_auc": 0.6720357564513407, |
|
"eval_runtime": 1.69, |
|
"eval_samples_per_second": 88.759, |
|
"eval_steps_per_second": 1.775, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 8.380000000000001e-06, |
|
"loss": 0.3339, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 8.317500000000001e-06, |
|
"loss": 0.3153, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 8.255000000000001e-06, |
|
"loss": 0.3406, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 8.192500000000002e-06, |
|
"loss": 0.3109, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 8.13e-06, |
|
"loss": 0.3315, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_accuracy": 0.02666666666666667, |
|
"eval_f1": 0.5166051660516605, |
|
"eval_loss": 0.3748493194580078, |
|
"eval_roc_auc": 0.6842047562826783, |
|
"eval_runtime": 1.6814, |
|
"eval_samples_per_second": 89.211, |
|
"eval_steps_per_second": 1.784, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 8.0675e-06, |
|
"loss": 0.3066, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 8.005e-06, |
|
"loss": 0.3143, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 7.9425e-06, |
|
"loss": 0.3286, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 7.88e-06, |
|
"loss": 0.3125, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 7.8175e-06, |
|
"loss": 0.3384, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_accuracy": 0.02666666666666667, |
|
"eval_f1": 0.5051546391752577, |
|
"eval_loss": 0.367872029542923, |
|
"eval_roc_auc": 0.6734187890032046, |
|
"eval_runtime": 1.6925, |
|
"eval_samples_per_second": 88.625, |
|
"eval_steps_per_second": 1.772, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 7.755000000000001e-06, |
|
"loss": 0.3061, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 7.692500000000001e-06, |
|
"loss": 0.32, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 7.630000000000001e-06, |
|
"loss": 0.3211, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 7.567500000000001e-06, |
|
"loss": 0.3104, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 7.505e-06, |
|
"loss": 0.3338, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_accuracy": 0.02, |
|
"eval_f1": 0.5050732807215332, |
|
"eval_loss": 0.3726758658885956, |
|
"eval_roc_auc": 0.6743632990386238, |
|
"eval_runtime": 1.6832, |
|
"eval_samples_per_second": 89.116, |
|
"eval_steps_per_second": 1.782, |
|
"step": 2000 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 8000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"total_flos": 1052841099264000.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|