|
{ |
|
"best_metric": 0.04927213117480278, |
|
"best_model_checkpoint": "deberta-v3-xsmall-zyda-2-sentiment/checkpoint-6286", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 9429, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1590836780146357, |
|
"grad_norm": 1.8468247652053833, |
|
"learning_rate": 4.7348605366422736e-05, |
|
"loss": 0.2115, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3181673560292714, |
|
"grad_norm": 1.7370903491973877, |
|
"learning_rate": 4.4697210732845476e-05, |
|
"loss": 0.101, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.4772510340439071, |
|
"grad_norm": 1.7206146717071533, |
|
"learning_rate": 4.2045816099268216e-05, |
|
"loss": 0.0846, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.6363347120585428, |
|
"grad_norm": 1.1373802423477173, |
|
"learning_rate": 3.9394421465690956e-05, |
|
"loss": 0.0748, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.7954183900731785, |
|
"grad_norm": 0.9603880047798157, |
|
"learning_rate": 3.674302683211369e-05, |
|
"loss": 0.0691, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.9545020680878142, |
|
"grad_norm": 1.0165342092514038, |
|
"learning_rate": 3.409163219853643e-05, |
|
"loss": 0.0627, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.06652908027172089, |
|
"eval_mse": 0.06652908171153529, |
|
"eval_runtime": 10.5244, |
|
"eval_samples_per_second": 950.17, |
|
"eval_steps_per_second": 118.771, |
|
"step": 3143 |
|
}, |
|
{ |
|
"epoch": 1.1135857461024499, |
|
"grad_norm": 0.9926055073738098, |
|
"learning_rate": 3.144023756495917e-05, |
|
"loss": 0.0522, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.2726694241170855, |
|
"grad_norm": 1.247205376625061, |
|
"learning_rate": 2.878884293138191e-05, |
|
"loss": 0.0485, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.4317531021317214, |
|
"grad_norm": 1.7589031457901, |
|
"learning_rate": 2.6137448297804644e-05, |
|
"loss": 0.0463, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.590836780146357, |
|
"grad_norm": 0.7484694719314575, |
|
"learning_rate": 2.3486053664227384e-05, |
|
"loss": 0.0443, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.7499204581609926, |
|
"grad_norm": 1.5068027973175049, |
|
"learning_rate": 2.083465903065012e-05, |
|
"loss": 0.0421, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.9090041361756285, |
|
"grad_norm": 0.832625150680542, |
|
"learning_rate": 1.818326439707286e-05, |
|
"loss": 0.0411, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.04927213117480278, |
|
"eval_mse": 0.049272132016595305, |
|
"eval_runtime": 11.3101, |
|
"eval_samples_per_second": 884.162, |
|
"eval_steps_per_second": 110.52, |
|
"step": 6286 |
|
}, |
|
{ |
|
"epoch": 2.068087814190264, |
|
"grad_norm": 0.6708300709724426, |
|
"learning_rate": 1.5531869763495598e-05, |
|
"loss": 0.0387, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.2271714922048997, |
|
"grad_norm": 0.6490187644958496, |
|
"learning_rate": 1.2880475129918337e-05, |
|
"loss": 0.0337, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.3862551702195356, |
|
"grad_norm": 0.7127770185470581, |
|
"learning_rate": 1.0229080496341075e-05, |
|
"loss": 0.0324, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.545338848234171, |
|
"grad_norm": 0.6604452133178711, |
|
"learning_rate": 7.5776858627638146e-06, |
|
"loss": 0.0326, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.704422526248807, |
|
"grad_norm": 0.5042712092399597, |
|
"learning_rate": 4.926291229186552e-06, |
|
"loss": 0.0323, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.8635062042634427, |
|
"grad_norm": 0.573316752910614, |
|
"learning_rate": 2.2748965956092908e-06, |
|
"loss": 0.0321, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.05235280096530914, |
|
"eval_mse": 0.05235280389813637, |
|
"eval_runtime": 10.3984, |
|
"eval_samples_per_second": 961.689, |
|
"eval_steps_per_second": 120.211, |
|
"step": 9429 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 9429, |
|
"total_flos": 9935679003367680.0, |
|
"train_loss": 0.05866297316179509, |
|
"train_runtime": 1207.603, |
|
"train_samples_per_second": 499.597, |
|
"train_steps_per_second": 7.808 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 9429, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9935679003367680.0, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|