|
{ |
|
"best_metric": 0.5249539017677307, |
|
"best_model_checkpoint": "post-auto-v3/checkpoint-180", |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 180, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.044444444444444446, |
|
"grad_norm": 49.678558349609375, |
|
"learning_rate": 7.4074074074074075e-06, |
|
"loss": 2.749, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.08888888888888889, |
|
"grad_norm": 38.787879943847656, |
|
"learning_rate": 1.4814814814814815e-05, |
|
"loss": 3.1578, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.13333333333333333, |
|
"grad_norm": 42.88180160522461, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 2.6539, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.17777777777777778, |
|
"grad_norm": 35.309532165527344, |
|
"learning_rate": 2.962962962962963e-05, |
|
"loss": 1.7708, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.2222222222222222, |
|
"grad_norm": 23.5089168548584, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 1.1406, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.26666666666666666, |
|
"grad_norm": 7.427271842956543, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 0.8041, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.3111111111111111, |
|
"grad_norm": 11.275213241577148, |
|
"learning_rate": 4.9794238683127575e-05, |
|
"loss": 0.8478, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.35555555555555557, |
|
"grad_norm": 21.598812103271484, |
|
"learning_rate": 4.8971193415637865e-05, |
|
"loss": 0.885, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 15.150390625, |
|
"learning_rate": 4.814814814814815e-05, |
|
"loss": 0.7711, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.4444444444444444, |
|
"grad_norm": 8.3656587600708, |
|
"learning_rate": 4.732510288065844e-05, |
|
"loss": 0.6932, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.4888888888888889, |
|
"grad_norm": 6.864919662475586, |
|
"learning_rate": 4.650205761316873e-05, |
|
"loss": 0.6934, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.5333333333333333, |
|
"grad_norm": 20.607200622558594, |
|
"learning_rate": 4.567901234567901e-05, |
|
"loss": 0.6592, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.5777777777777777, |
|
"grad_norm": 5.4164276123046875, |
|
"learning_rate": 4.48559670781893e-05, |
|
"loss": 0.6815, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.6222222222222222, |
|
"grad_norm": 21.185943603515625, |
|
"learning_rate": 4.403292181069959e-05, |
|
"loss": 0.7281, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 5.345090866088867, |
|
"learning_rate": 4.3209876543209875e-05, |
|
"loss": 0.7072, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.7111111111111111, |
|
"grad_norm": 28.171621322631836, |
|
"learning_rate": 4.2386831275720165e-05, |
|
"loss": 0.5982, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.7555555555555555, |
|
"grad_norm": 18.241920471191406, |
|
"learning_rate": 4.1563786008230455e-05, |
|
"loss": 0.6613, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 26.421844482421875, |
|
"learning_rate": 4.074074074074074e-05, |
|
"loss": 0.8852, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.8444444444444444, |
|
"grad_norm": 18.892175674438477, |
|
"learning_rate": 3.9917695473251035e-05, |
|
"loss": 0.7267, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.8888888888888888, |
|
"grad_norm": 15.053618431091309, |
|
"learning_rate": 3.909465020576132e-05, |
|
"loss": 0.3929, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.9333333333333333, |
|
"grad_norm": 45.083675384521484, |
|
"learning_rate": 3.82716049382716e-05, |
|
"loss": 0.5913, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.9777777777777777, |
|
"grad_norm": 16.20758628845215, |
|
"learning_rate": 3.74485596707819e-05, |
|
"loss": 0.6828, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7166666666666667, |
|
"eval_f1_macro": 0.7356755710414246, |
|
"eval_f1_micro": 0.7166666666666667, |
|
"eval_f1_weighted": 0.714955155503936, |
|
"eval_loss": 0.5613141655921936, |
|
"eval_precision_macro": 0.7246444246444246, |
|
"eval_precision_micro": 0.7166666666666667, |
|
"eval_precision_weighted": 0.7248502348502348, |
|
"eval_recall_macro": 0.7572316572316572, |
|
"eval_recall_micro": 0.7166666666666667, |
|
"eval_recall_weighted": 0.7166666666666667, |
|
"eval_runtime": 45.0143, |
|
"eval_samples_per_second": 3.999, |
|
"eval_steps_per_second": 0.267, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.0222222222222221, |
|
"grad_norm": 23.50992774963379, |
|
"learning_rate": 3.662551440329218e-05, |
|
"loss": 0.661, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 1.0666666666666667, |
|
"grad_norm": 16.499202728271484, |
|
"learning_rate": 3.580246913580247e-05, |
|
"loss": 0.54, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.1111111111111112, |
|
"grad_norm": 12.408361434936523, |
|
"learning_rate": 3.497942386831276e-05, |
|
"loss": 0.4252, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.1555555555555554, |
|
"grad_norm": 30.811758041381836, |
|
"learning_rate": 3.4156378600823045e-05, |
|
"loss": 0.3497, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 33.299259185791016, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.3372, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 1.2444444444444445, |
|
"grad_norm": 9.907443046569824, |
|
"learning_rate": 3.2510288065843625e-05, |
|
"loss": 0.4249, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 1.2888888888888888, |
|
"grad_norm": 26.002504348754883, |
|
"learning_rate": 3.168724279835391e-05, |
|
"loss": 0.4189, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 1.3333333333333333, |
|
"grad_norm": 28.73817253112793, |
|
"learning_rate": 3.08641975308642e-05, |
|
"loss": 0.7664, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.3777777777777778, |
|
"grad_norm": 26.192493438720703, |
|
"learning_rate": 3.0041152263374488e-05, |
|
"loss": 0.5389, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 1.4222222222222223, |
|
"grad_norm": 38.437782287597656, |
|
"learning_rate": 2.9218106995884775e-05, |
|
"loss": 0.5959, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 1.4666666666666668, |
|
"grad_norm": 17.456642150878906, |
|
"learning_rate": 2.839506172839506e-05, |
|
"loss": 0.474, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 1.511111111111111, |
|
"grad_norm": 14.425949096679688, |
|
"learning_rate": 2.757201646090535e-05, |
|
"loss": 0.5086, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 1.5555555555555556, |
|
"grad_norm": 20.57114028930664, |
|
"learning_rate": 2.6748971193415638e-05, |
|
"loss": 0.6384, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 18.78620719909668, |
|
"learning_rate": 2.5925925925925925e-05, |
|
"loss": 0.5804, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 1.6444444444444444, |
|
"grad_norm": 33.17873764038086, |
|
"learning_rate": 2.510288065843622e-05, |
|
"loss": 0.5028, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 1.6888888888888889, |
|
"grad_norm": 26.253496170043945, |
|
"learning_rate": 2.4279835390946505e-05, |
|
"loss": 0.446, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 1.7333333333333334, |
|
"grad_norm": 22.726823806762695, |
|
"learning_rate": 2.345679012345679e-05, |
|
"loss": 0.5482, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 1.7777777777777777, |
|
"grad_norm": 13.564722061157227, |
|
"learning_rate": 2.2633744855967078e-05, |
|
"loss": 0.4052, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.8222222222222222, |
|
"grad_norm": 13.872465133666992, |
|
"learning_rate": 2.1810699588477368e-05, |
|
"loss": 0.3939, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 1.8666666666666667, |
|
"grad_norm": 8.408356666564941, |
|
"learning_rate": 2.0987654320987655e-05, |
|
"loss": 0.3675, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 1.911111111111111, |
|
"grad_norm": 6.3673624992370605, |
|
"learning_rate": 2.016460905349794e-05, |
|
"loss": 0.3066, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 1.9555555555555557, |
|
"grad_norm": 11.568488121032715, |
|
"learning_rate": 1.934156378600823e-05, |
|
"loss": 0.2487, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 3.9987974166870117, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.3745, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7722222222222223, |
|
"eval_f1_macro": 0.8129276695270152, |
|
"eval_f1_micro": 0.7722222222222223, |
|
"eval_f1_weighted": 0.7733237593303729, |
|
"eval_loss": 0.5249539017677307, |
|
"eval_precision_macro": 0.8374953236064346, |
|
"eval_precision_micro": 0.7722222222222223, |
|
"eval_precision_weighted": 0.775858897618157, |
|
"eval_recall_macro": 0.7930180930180931, |
|
"eval_recall_micro": 0.7722222222222223, |
|
"eval_recall_weighted": 0.7722222222222223, |
|
"eval_runtime": 47.7709, |
|
"eval_samples_per_second": 3.768, |
|
"eval_steps_per_second": 0.251, |
|
"step": 180 |
|
} |
|
], |
|
"logging_steps": 4, |
|
"max_steps": 270, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 94720830382080.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|