|
{ |
|
"best_metric": 0.00286100001416597, |
|
"best_model_checkpoint": "cola-pixel-handwritten-mean-vatrpp-256-64-4-2e-5-15000-42/checkpoint-100", |
|
"epoch": 33.32835820895522, |
|
"global_step": 1100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 9.800000000000001e-06, |
|
"loss": 0.655, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"eval_loss": 0.6341390013694763, |
|
"eval_matthews_correlation": 0.00286100001416597, |
|
"eval_runtime": 7.8213, |
|
"eval_samples_per_second": 133.354, |
|
"eval_steps_per_second": 16.749, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 1.98e-05, |
|
"loss": 0.6174, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"eval_loss": 0.6281591057777405, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 7.9503, |
|
"eval_samples_per_second": 131.19, |
|
"eval_steps_per_second": 16.477, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 1.9867567567567568e-05, |
|
"loss": 0.6196, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"eval_loss": 0.6198328137397766, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 7.8212, |
|
"eval_samples_per_second": 133.356, |
|
"eval_steps_per_second": 16.749, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 12.12, |
|
"learning_rate": 1.9733783783783785e-05, |
|
"loss": 0.6158, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 12.12, |
|
"eval_loss": 0.6199322938919067, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 7.8954, |
|
"eval_samples_per_second": 132.102, |
|
"eval_steps_per_second": 16.592, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 15.15, |
|
"learning_rate": 1.9600000000000002e-05, |
|
"loss": 0.6175, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 15.15, |
|
"eval_loss": 0.6180645823478699, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 8.2397, |
|
"eval_samples_per_second": 126.583, |
|
"eval_steps_per_second": 15.899, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 18.18, |
|
"learning_rate": 1.9464864864864867e-05, |
|
"loss": 0.6152, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 18.18, |
|
"eval_loss": 0.619079053401947, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 7.8304, |
|
"eval_samples_per_second": 133.198, |
|
"eval_steps_per_second": 16.73, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 21.21, |
|
"learning_rate": 1.932972972972973e-05, |
|
"loss": 0.617, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 21.21, |
|
"eval_loss": 0.6184842586517334, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 8.2213, |
|
"eval_samples_per_second": 126.865, |
|
"eval_steps_per_second": 15.934, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 24.24, |
|
"learning_rate": 1.9194594594594596e-05, |
|
"loss": 0.6191, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 24.24, |
|
"eval_loss": 0.6185177564620972, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 7.887, |
|
"eval_samples_per_second": 132.243, |
|
"eval_steps_per_second": 16.61, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 27.27, |
|
"learning_rate": 1.905945945945946e-05, |
|
"loss": 0.6162, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 27.27, |
|
"eval_loss": 0.6183082461357117, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 7.8268, |
|
"eval_samples_per_second": 133.26, |
|
"eval_steps_per_second": 16.737, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 30.3, |
|
"learning_rate": 1.8924324324324325e-05, |
|
"loss": 0.6166, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 30.3, |
|
"eval_loss": 0.6183302402496338, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 7.795, |
|
"eval_samples_per_second": 133.803, |
|
"eval_steps_per_second": 16.806, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"learning_rate": 1.878918918918919e-05, |
|
"loss": 0.6177, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"eval_loss": 0.6182125210762024, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 7.8695, |
|
"eval_samples_per_second": 132.537, |
|
"eval_steps_per_second": 16.647, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"step": 1100, |
|
"total_flos": 7.341930418964005e+18, |
|
"train_loss": 0.6206500174782493, |
|
"train_runtime": 3140.0413, |
|
"train_samples_per_second": 1222.914, |
|
"train_steps_per_second": 4.777 |
|
} |
|
], |
|
"max_steps": 15000, |
|
"num_train_epochs": 455, |
|
"total_flos": 7.341930418964005e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|