|
{ |
|
"best_metric": 0.5422278243727644, |
|
"best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-3/checkpoint-4276", |
|
"epoch": 4.0, |
|
"eval_steps": 500, |
|
"global_step": 8552, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 11.004450798034668, |
|
"learning_rate": 2.0214687462514598e-05, |
|
"loss": 0.598, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 15.662084579467773, |
|
"learning_rate": 1.8959428678205447e-05, |
|
"loss": 0.5617, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 28.870677947998047, |
|
"learning_rate": 1.7704169893896296e-05, |
|
"loss": 0.5375, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 64.57504272460938, |
|
"learning_rate": 1.6448911109587142e-05, |
|
"loss": 0.5691, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.5517680048942566, |
|
"eval_matthews_correlation": 0.4384136507504086, |
|
"eval_runtime": 0.737, |
|
"eval_samples_per_second": 1415.216, |
|
"eval_steps_per_second": 89.553, |
|
"step": 2138 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 25.73099708557129, |
|
"learning_rate": 1.5193652325277987e-05, |
|
"loss": 0.4513, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 63.40884017944336, |
|
"learning_rate": 1.3938393540968835e-05, |
|
"loss": 0.442, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"grad_norm": 0.524051308631897, |
|
"learning_rate": 1.268313475665968e-05, |
|
"loss": 0.4237, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"grad_norm": 135.24822998046875, |
|
"learning_rate": 1.1427875972350528e-05, |
|
"loss": 0.4295, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.7380757331848145, |
|
"eval_matthews_correlation": 0.5422278243727644, |
|
"eval_runtime": 0.7356, |
|
"eval_samples_per_second": 1417.881, |
|
"eval_steps_per_second": 89.722, |
|
"step": 4276 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 0.07921259105205536, |
|
"learning_rate": 1.0172617188041375e-05, |
|
"loss": 0.3849, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"grad_norm": 0.08474227786064148, |
|
"learning_rate": 8.917358403732224e-06, |
|
"loss": 0.2999, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"grad_norm": 0.9242565631866455, |
|
"learning_rate": 7.66209961942307e-06, |
|
"loss": 0.2708, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"grad_norm": 0.1333150714635849, |
|
"learning_rate": 6.406840835113916e-06, |
|
"loss": 0.2846, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.9644601345062256, |
|
"eval_matthews_correlation": 0.5304969997797532, |
|
"eval_runtime": 0.7379, |
|
"eval_samples_per_second": 1413.493, |
|
"eval_steps_per_second": 89.444, |
|
"step": 6414 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"grad_norm": 0.13025422394275665, |
|
"learning_rate": 5.151582050804764e-06, |
|
"loss": 0.267, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"grad_norm": 0.015170727856457233, |
|
"learning_rate": 3.896323266495611e-06, |
|
"loss": 0.1489, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"grad_norm": 0.03684037923812866, |
|
"learning_rate": 2.641064482186458e-06, |
|
"loss": 0.1797, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"grad_norm": 0.15966422855854034, |
|
"learning_rate": 1.385805697877305e-06, |
|
"loss": 0.1557, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"grad_norm": 0.05443466454744339, |
|
"learning_rate": 1.3054691356815192e-07, |
|
"loss": 0.1708, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 1.0916093587875366, |
|
"eval_matthews_correlation": 0.5421747077088894, |
|
"eval_runtime": 0.8194, |
|
"eval_samples_per_second": 1272.83, |
|
"eval_steps_per_second": 80.543, |
|
"step": 8552 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 8552, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 500, |
|
"total_flos": 138657403563900.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": { |
|
"learning_rate": 2.1469946246823753e-05, |
|
"num_train_epochs": 4, |
|
"per_device_train_batch_size": 4, |
|
"seed": 10 |
|
} |
|
} |
|
|