agentlans's picture
Upload 13 files
c861078 verified
raw
history blame
4.92 kB
{
"best_metric": 0.04927213117480278,
"best_model_checkpoint": "deberta-v3-xsmall-zyda-2-sentiment/checkpoint-6286",
"epoch": 3.0,
"eval_steps": 500,
"global_step": 9429,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.1590836780146357,
"grad_norm": 1.8468247652053833,
"learning_rate": 4.7348605366422736e-05,
"loss": 0.2115,
"step": 500
},
{
"epoch": 0.3181673560292714,
"grad_norm": 1.7370903491973877,
"learning_rate": 4.4697210732845476e-05,
"loss": 0.101,
"step": 1000
},
{
"epoch": 0.4772510340439071,
"grad_norm": 1.7206146717071533,
"learning_rate": 4.2045816099268216e-05,
"loss": 0.0846,
"step": 1500
},
{
"epoch": 0.6363347120585428,
"grad_norm": 1.1373802423477173,
"learning_rate": 3.9394421465690956e-05,
"loss": 0.0748,
"step": 2000
},
{
"epoch": 0.7954183900731785,
"grad_norm": 0.9603880047798157,
"learning_rate": 3.674302683211369e-05,
"loss": 0.0691,
"step": 2500
},
{
"epoch": 0.9545020680878142,
"grad_norm": 1.0165342092514038,
"learning_rate": 3.409163219853643e-05,
"loss": 0.0627,
"step": 3000
},
{
"epoch": 1.0,
"eval_loss": 0.06652908027172089,
"eval_mse": 0.06652908171153529,
"eval_runtime": 10.5244,
"eval_samples_per_second": 950.17,
"eval_steps_per_second": 118.771,
"step": 3143
},
{
"epoch": 1.1135857461024499,
"grad_norm": 0.9926055073738098,
"learning_rate": 3.144023756495917e-05,
"loss": 0.0522,
"step": 3500
},
{
"epoch": 1.2726694241170855,
"grad_norm": 1.247205376625061,
"learning_rate": 2.878884293138191e-05,
"loss": 0.0485,
"step": 4000
},
{
"epoch": 1.4317531021317214,
"grad_norm": 1.7589031457901,
"learning_rate": 2.6137448297804644e-05,
"loss": 0.0463,
"step": 4500
},
{
"epoch": 1.590836780146357,
"grad_norm": 0.7484694719314575,
"learning_rate": 2.3486053664227384e-05,
"loss": 0.0443,
"step": 5000
},
{
"epoch": 1.7499204581609926,
"grad_norm": 1.5068027973175049,
"learning_rate": 2.083465903065012e-05,
"loss": 0.0421,
"step": 5500
},
{
"epoch": 1.9090041361756285,
"grad_norm": 0.832625150680542,
"learning_rate": 1.818326439707286e-05,
"loss": 0.0411,
"step": 6000
},
{
"epoch": 2.0,
"eval_loss": 0.04927213117480278,
"eval_mse": 0.049272132016595305,
"eval_runtime": 11.3101,
"eval_samples_per_second": 884.162,
"eval_steps_per_second": 110.52,
"step": 6286
},
{
"epoch": 2.068087814190264,
"grad_norm": 0.6708300709724426,
"learning_rate": 1.5531869763495598e-05,
"loss": 0.0387,
"step": 6500
},
{
"epoch": 2.2271714922048997,
"grad_norm": 0.6490187644958496,
"learning_rate": 1.2880475129918337e-05,
"loss": 0.0337,
"step": 7000
},
{
"epoch": 2.3862551702195356,
"grad_norm": 0.7127770185470581,
"learning_rate": 1.0229080496341075e-05,
"loss": 0.0324,
"step": 7500
},
{
"epoch": 2.545338848234171,
"grad_norm": 0.6604452133178711,
"learning_rate": 7.5776858627638146e-06,
"loss": 0.0326,
"step": 8000
},
{
"epoch": 2.704422526248807,
"grad_norm": 0.5042712092399597,
"learning_rate": 4.926291229186552e-06,
"loss": 0.0323,
"step": 8500
},
{
"epoch": 2.8635062042634427,
"grad_norm": 0.573316752910614,
"learning_rate": 2.2748965956092908e-06,
"loss": 0.0321,
"step": 9000
},
{
"epoch": 3.0,
"eval_loss": 0.05235280096530914,
"eval_mse": 0.05235280389813637,
"eval_runtime": 10.3984,
"eval_samples_per_second": 961.689,
"eval_steps_per_second": 120.211,
"step": 9429
},
{
"epoch": 3.0,
"step": 9429,
"total_flos": 9935679003367680.0,
"train_loss": 0.05866297316179509,
"train_runtime": 1207.603,
"train_samples_per_second": 499.597,
"train_steps_per_second": 7.808
}
],
"logging_steps": 500,
"max_steps": 9429,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 9935679003367680.0,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}