|
{ |
|
"best_metric": 0.2612117528915405, |
|
"best_model_checkpoint": "xblock-social-screenshots-5/checkpoint-6738", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 6738, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 14.132967948913574, |
|
"learning_rate": 1.8545994065281898e-06, |
|
"loss": 0.6958, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 8.136152267456055, |
|
"learning_rate": 3.6350148367952525e-06, |
|
"loss": 0.6975, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.9680777788162231, |
|
"learning_rate": 5.489614243323442e-06, |
|
"loss": 0.5213, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 4.900412559509277, |
|
"learning_rate": 7.270029673590505e-06, |
|
"loss": 0.4804, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 3.952802896499634, |
|
"learning_rate": 9.124629080118695e-06, |
|
"loss": 0.4743, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 14.785197257995605, |
|
"learning_rate": 1.0979228486646884e-05, |
|
"loss": 0.5276, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 13.082988739013672, |
|
"learning_rate": 1.2833827893175073e-05, |
|
"loss": 0.5019, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 8.51102352142334, |
|
"learning_rate": 1.4688427299703264e-05, |
|
"loss": 0.6297, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 5.031483173370361, |
|
"learning_rate": 1.6543026706231455e-05, |
|
"loss": 0.4062, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 7.973475456237793, |
|
"learning_rate": 1.8397626112759644e-05, |
|
"loss": 0.5997, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 11.180716514587402, |
|
"learning_rate": 2.0252225519287833e-05, |
|
"loss": 0.478, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 4.953334331512451, |
|
"learning_rate": 2.2106824925816026e-05, |
|
"loss": 0.3911, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 7.498977184295654, |
|
"learning_rate": 2.3961424332344215e-05, |
|
"loss": 0.477, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.19468317925930023, |
|
"learning_rate": 2.58160237388724e-05, |
|
"loss": 0.338, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.9361104965209961, |
|
"learning_rate": 2.7670623145400593e-05, |
|
"loss": 0.5228, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.6056278944015503, |
|
"learning_rate": 2.9525222551928783e-05, |
|
"loss": 0.526, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 6.435601234436035, |
|
"learning_rate": 3.137982195845697e-05, |
|
"loss": 0.3893, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.40519052743911743, |
|
"learning_rate": 3.323442136498516e-05, |
|
"loss": 0.3945, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.11996802687644958, |
|
"learning_rate": 3.508902077151335e-05, |
|
"loss": 0.5464, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 5.791567325592041, |
|
"learning_rate": 3.6943620178041546e-05, |
|
"loss": 0.6363, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.3942394256591797, |
|
"learning_rate": 3.8798219584569735e-05, |
|
"loss": 0.368, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 11.67949390411377, |
|
"learning_rate": 4.0652818991097924e-05, |
|
"loss": 0.4232, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 6.389534950256348, |
|
"learning_rate": 4.2507418397626114e-05, |
|
"loss": 0.4988, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 22.40272331237793, |
|
"learning_rate": 4.43620178041543e-05, |
|
"loss": 0.707, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 3.8825833797454834, |
|
"learning_rate": 4.621661721068249e-05, |
|
"loss": 0.576, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 3.9929873943328857, |
|
"learning_rate": 4.807121661721069e-05, |
|
"loss": 0.4785, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 8.819046974182129, |
|
"learning_rate": 4.992581602373888e-05, |
|
"loss": 0.5884, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 10.694670677185059, |
|
"learning_rate": 4.9802110817941956e-05, |
|
"loss": 0.5243, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 3.784719944000244, |
|
"learning_rate": 4.9595976253298154e-05, |
|
"loss": 0.6428, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 7.733468055725098, |
|
"learning_rate": 4.938984168865435e-05, |
|
"loss": 0.612, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 7.867452144622803, |
|
"learning_rate": 4.918370712401056e-05, |
|
"loss": 0.582, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 12.518653869628906, |
|
"learning_rate": 4.8977572559366755e-05, |
|
"loss": 0.569, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 7.27546501159668, |
|
"learning_rate": 4.877143799472296e-05, |
|
"loss": 0.6449, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 16.084596633911133, |
|
"learning_rate": 4.856530343007916e-05, |
|
"loss": 0.6471, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 4.305793285369873, |
|
"learning_rate": 4.8359168865435357e-05, |
|
"loss": 0.5507, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 17.53583335876465, |
|
"learning_rate": 4.815303430079156e-05, |
|
"loss": 0.4701, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 6.12671422958374, |
|
"learning_rate": 4.794689973614776e-05, |
|
"loss": 0.551, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 2.3855419158935547, |
|
"learning_rate": 4.774076517150396e-05, |
|
"loss": 0.6347, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 7.226621627807617, |
|
"learning_rate": 4.7534630606860156e-05, |
|
"loss": 0.3186, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 7.834379196166992, |
|
"learning_rate": 4.732849604221636e-05, |
|
"loss": 0.5085, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.51530522108078, |
|
"learning_rate": 4.7122361477572566e-05, |
|
"loss": 0.3593, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.3713924884796143, |
|
"learning_rate": 4.6916226912928764e-05, |
|
"loss": 0.5763, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.3441976308822632, |
|
"learning_rate": 4.671009234828496e-05, |
|
"loss": 0.6034, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 14.422416687011719, |
|
"learning_rate": 4.650395778364116e-05, |
|
"loss": 0.4459, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 4.849581241607666, |
|
"learning_rate": 4.6297823218997365e-05, |
|
"loss": 0.4681, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 11.974592208862305, |
|
"learning_rate": 4.6091688654353563e-05, |
|
"loss": 0.5693, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 8.925352096557617, |
|
"learning_rate": 4.588555408970976e-05, |
|
"loss": 0.4427, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 7.595994472503662, |
|
"learning_rate": 4.5679419525065967e-05, |
|
"loss": 0.3736, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 8.393537521362305, |
|
"learning_rate": 4.5473284960422165e-05, |
|
"loss": 0.6534, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 6.042502403259277, |
|
"learning_rate": 4.526715039577837e-05, |
|
"loss": 0.4326, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 13.696694374084473, |
|
"learning_rate": 4.506101583113457e-05, |
|
"loss": 0.5176, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 4.518089771270752, |
|
"learning_rate": 4.4854881266490766e-05, |
|
"loss": 0.4211, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 11.04053783416748, |
|
"learning_rate": 4.4648746701846964e-05, |
|
"loss": 0.5664, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 16.009056091308594, |
|
"learning_rate": 4.444261213720316e-05, |
|
"loss": 0.5545, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 4.595952033996582, |
|
"learning_rate": 4.423647757255937e-05, |
|
"loss": 0.4318, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 5.188466548919678, |
|
"learning_rate": 4.403034300791557e-05, |
|
"loss": 0.4286, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 6.234115123748779, |
|
"learning_rate": 4.382420844327177e-05, |
|
"loss": 0.5876, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 8.778355598449707, |
|
"learning_rate": 4.361807387862797e-05, |
|
"loss": 0.6304, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 9.595151901245117, |
|
"learning_rate": 4.3411939313984173e-05, |
|
"loss": 0.5844, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 9.795525550842285, |
|
"learning_rate": 4.320580474934037e-05, |
|
"loss": 0.4714, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 14.277398109436035, |
|
"learning_rate": 4.299967018469657e-05, |
|
"loss": 0.5627, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 5.245518684387207, |
|
"learning_rate": 4.2793535620052775e-05, |
|
"loss": 0.5474, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 6.895930290222168, |
|
"learning_rate": 4.258740105540897e-05, |
|
"loss": 0.4686, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 7.74411153793335, |
|
"learning_rate": 4.238126649076518e-05, |
|
"loss": 0.5021, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 2.97990083694458, |
|
"learning_rate": 4.2175131926121376e-05, |
|
"loss": 0.4173, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 8.026514053344727, |
|
"learning_rate": 4.1968997361477574e-05, |
|
"loss": 0.3728, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.3547731339931488, |
|
"learning_rate": 4.176286279683377e-05, |
|
"loss": 0.4983, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 4.483277797698975, |
|
"learning_rate": 4.155672823218997e-05, |
|
"loss": 0.5235, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 7.000768184661865, |
|
"learning_rate": 4.1350593667546175e-05, |
|
"loss": 0.4955, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 4.905660152435303, |
|
"learning_rate": 4.114445910290238e-05, |
|
"loss": 0.4803, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 4.236353397369385, |
|
"learning_rate": 4.093832453825858e-05, |
|
"loss": 0.5105, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 8.685340881347656, |
|
"learning_rate": 4.073218997361478e-05, |
|
"loss": 0.6345, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 2.6251416206359863, |
|
"learning_rate": 4.052605540897098e-05, |
|
"loss": 0.6859, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 13.661340713500977, |
|
"learning_rate": 4.031992084432718e-05, |
|
"loss": 0.3122, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 1.7971667051315308, |
|
"learning_rate": 4.011378627968338e-05, |
|
"loss": 0.5103, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 4.921170234680176, |
|
"learning_rate": 3.9907651715039576e-05, |
|
"loss": 0.4259, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 6.340487480163574, |
|
"learning_rate": 3.970151715039578e-05, |
|
"loss": 0.5088, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 8.182121276855469, |
|
"learning_rate": 3.9495382585751986e-05, |
|
"loss": 0.5638, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 3.3480095863342285, |
|
"learning_rate": 3.9289248021108184e-05, |
|
"loss": 0.5788, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.820002555847168, |
|
"learning_rate": 3.908311345646438e-05, |
|
"loss": 0.5043, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 5.409160614013672, |
|
"learning_rate": 3.887697889182058e-05, |
|
"loss": 0.3171, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 6.675960063934326, |
|
"learning_rate": 3.867084432717678e-05, |
|
"loss": 0.4536, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 2.4080889225006104, |
|
"learning_rate": 3.8464709762532984e-05, |
|
"loss": 0.5149, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 1.0316126346588135, |
|
"learning_rate": 3.825857519788918e-05, |
|
"loss": 0.6131, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 6.637559413909912, |
|
"learning_rate": 3.805244063324539e-05, |
|
"loss": 0.4527, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 5.249868869781494, |
|
"learning_rate": 3.7846306068601585e-05, |
|
"loss": 0.3764, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 8.488882064819336, |
|
"learning_rate": 3.764017150395778e-05, |
|
"loss": 0.4237, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 1.0089231729507446, |
|
"learning_rate": 3.743403693931399e-05, |
|
"loss": 0.5939, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.9522852301597595, |
|
"learning_rate": 3.7227902374670186e-05, |
|
"loss": 0.4297, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9071889606053861, |
|
"eval_f1_macro": 0.4640938055477737, |
|
"eval_f1_micro": 0.9071889606053861, |
|
"eval_f1_weighted": 0.8894145383985912, |
|
"eval_loss": 0.35839545726776123, |
|
"eval_precision_macro": 0.6916894534747811, |
|
"eval_precision_micro": 0.9071889606053861, |
|
"eval_precision_weighted": 0.8955058437354184, |
|
"eval_recall_macro": 0.4208919396551037, |
|
"eval_recall_micro": 0.9071889606053861, |
|
"eval_recall_weighted": 0.9071889606053861, |
|
"eval_runtime": 391.8611, |
|
"eval_samples_per_second": 11.466, |
|
"eval_steps_per_second": 0.717, |
|
"step": 2246 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 4.51116943359375, |
|
"learning_rate": 3.7021767810026384e-05, |
|
"loss": 0.5182, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 4.062203884124756, |
|
"learning_rate": 3.681563324538258e-05, |
|
"loss": 0.4014, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 4.1213884353637695, |
|
"learning_rate": 3.660949868073879e-05, |
|
"loss": 0.3564, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 4.061648845672607, |
|
"learning_rate": 3.640336411609499e-05, |
|
"loss": 0.5185, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 5.865363597869873, |
|
"learning_rate": 3.619722955145119e-05, |
|
"loss": 0.5669, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 1.3118332624435425, |
|
"learning_rate": 3.599109498680739e-05, |
|
"loss": 0.3586, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 8.255626678466797, |
|
"learning_rate": 3.578496042216359e-05, |
|
"loss": 0.5938, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 11.17790412902832, |
|
"learning_rate": 3.557882585751979e-05, |
|
"loss": 0.3843, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 9.643479347229004, |
|
"learning_rate": 3.5380936675461745e-05, |
|
"loss": 0.5595, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 0.35103797912597656, |
|
"learning_rate": 3.5174802110817943e-05, |
|
"loss": 0.413, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 0.7915975451469421, |
|
"learning_rate": 3.496866754617414e-05, |
|
"loss": 0.3686, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 5.222273826599121, |
|
"learning_rate": 3.4762532981530347e-05, |
|
"loss": 0.3873, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 6.224874019622803, |
|
"learning_rate": 3.4556398416886545e-05, |
|
"loss": 0.5069, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 12.55453109741211, |
|
"learning_rate": 3.435026385224275e-05, |
|
"loss": 0.255, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 11.516471862792969, |
|
"learning_rate": 3.414412928759895e-05, |
|
"loss": 0.4712, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 1.0335161685943604, |
|
"learning_rate": 3.3937994722955146e-05, |
|
"loss": 0.4652, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 5.176596641540527, |
|
"learning_rate": 3.3731860158311344e-05, |
|
"loss": 0.3918, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 3.3881676197052, |
|
"learning_rate": 3.352572559366754e-05, |
|
"loss": 0.3869, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 4.0972514152526855, |
|
"learning_rate": 3.331959102902375e-05, |
|
"loss": 0.44, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 0.4464218020439148, |
|
"learning_rate": 3.311345646437995e-05, |
|
"loss": 0.3373, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 6.553329944610596, |
|
"learning_rate": 3.290732189973615e-05, |
|
"loss": 0.3585, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 6.4266157150268555, |
|
"learning_rate": 3.270118733509235e-05, |
|
"loss": 0.4454, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 0.8840596079826355, |
|
"learning_rate": 3.2495052770448553e-05, |
|
"loss": 0.396, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"grad_norm": 6.651895523071289, |
|
"learning_rate": 3.228891820580475e-05, |
|
"loss": 0.4029, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 8.035750389099121, |
|
"learning_rate": 3.208278364116095e-05, |
|
"loss": 0.5253, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 0.7661600112915039, |
|
"learning_rate": 3.187664907651715e-05, |
|
"loss": 0.498, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"grad_norm": 3.3388662338256836, |
|
"learning_rate": 3.167051451187335e-05, |
|
"loss": 0.4511, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 4.266098499298096, |
|
"learning_rate": 3.146437994722956e-05, |
|
"loss": 0.5038, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"grad_norm": 9.547815322875977, |
|
"learning_rate": 3.1258245382585756e-05, |
|
"loss": 0.3055, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 5.78660774230957, |
|
"learning_rate": 3.1052110817941954e-05, |
|
"loss": 0.3645, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 0.6312762498855591, |
|
"learning_rate": 3.084597625329815e-05, |
|
"loss": 0.5396, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 9.60580825805664, |
|
"learning_rate": 3.063984168865435e-05, |
|
"loss": 0.3903, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"grad_norm": 8.78200626373291, |
|
"learning_rate": 3.043370712401056e-05, |
|
"loss": 0.2628, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 0.5894768834114075, |
|
"learning_rate": 3.0227572559366757e-05, |
|
"loss": 0.3829, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 4.919884204864502, |
|
"learning_rate": 3.0021437994722955e-05, |
|
"loss": 0.4657, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"grad_norm": 4.6643290519714355, |
|
"learning_rate": 2.9815303430079157e-05, |
|
"loss": 0.4419, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 4.99531888961792, |
|
"learning_rate": 2.9609168865435355e-05, |
|
"loss": 0.6053, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 2.8033461570739746, |
|
"learning_rate": 2.940303430079156e-05, |
|
"loss": 0.3048, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 10.75854206085205, |
|
"learning_rate": 2.9196899736147758e-05, |
|
"loss": 0.3911, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 1.392392635345459, |
|
"learning_rate": 2.899076517150396e-05, |
|
"loss": 0.43, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 4.793901443481445, |
|
"learning_rate": 2.8784630606860158e-05, |
|
"loss": 0.4389, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 6.22283935546875, |
|
"learning_rate": 2.8578496042216363e-05, |
|
"loss": 0.3379, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 2.443415403366089, |
|
"learning_rate": 2.837236147757256e-05, |
|
"loss": 0.4967, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 4.5518059730529785, |
|
"learning_rate": 2.8166226912928762e-05, |
|
"loss": 0.5465, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 6.078768253326416, |
|
"learning_rate": 2.796009234828496e-05, |
|
"loss": 0.4807, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 0.6977243423461914, |
|
"learning_rate": 2.775395778364116e-05, |
|
"loss": 0.5693, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"grad_norm": 13.189055442810059, |
|
"learning_rate": 2.7547823218997364e-05, |
|
"loss": 0.2355, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 11.238237380981445, |
|
"learning_rate": 2.7341688654353565e-05, |
|
"loss": 0.5586, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 8.910079956054688, |
|
"learning_rate": 2.7135554089709763e-05, |
|
"loss": 0.4066, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 1.7681870460510254, |
|
"learning_rate": 2.692941952506596e-05, |
|
"loss": 0.5567, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 0.8575474619865417, |
|
"learning_rate": 2.6723284960422163e-05, |
|
"loss": 0.2942, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"grad_norm": 6.890367031097412, |
|
"learning_rate": 2.6517150395778368e-05, |
|
"loss": 0.2915, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 0.6504009962081909, |
|
"learning_rate": 2.6311015831134566e-05, |
|
"loss": 0.4993, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"grad_norm": 5.363488674163818, |
|
"learning_rate": 2.6104881266490768e-05, |
|
"loss": 0.4317, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.34819716215133667, |
|
"learning_rate": 2.5898746701846966e-05, |
|
"loss": 0.3354, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"grad_norm": 0.5899884104728699, |
|
"learning_rate": 2.5692612137203164e-05, |
|
"loss": 0.3711, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 3.2884409427642822, |
|
"learning_rate": 2.548647757255937e-05, |
|
"loss": 0.3231, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"grad_norm": 10.364724159240723, |
|
"learning_rate": 2.528034300791557e-05, |
|
"loss": 0.5173, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 5.121739864349365, |
|
"learning_rate": 2.507420844327177e-05, |
|
"loss": 0.4939, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"grad_norm": 6.0736589431762695, |
|
"learning_rate": 2.486807387862797e-05, |
|
"loss": 0.4922, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"grad_norm": 11.522198677062988, |
|
"learning_rate": 2.466193931398417e-05, |
|
"loss": 0.4233, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"grad_norm": 2.02380633354187, |
|
"learning_rate": 2.4455804749340373e-05, |
|
"loss": 0.3657, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 7.379997730255127, |
|
"learning_rate": 2.424967018469657e-05, |
|
"loss": 0.4719, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 9.087469100952148, |
|
"learning_rate": 2.404353562005277e-05, |
|
"loss": 0.4912, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"grad_norm": 6.239768028259277, |
|
"learning_rate": 2.383740105540897e-05, |
|
"loss": 0.4496, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 0.5235075354576111, |
|
"learning_rate": 2.3631266490765173e-05, |
|
"loss": 0.3338, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"grad_norm": 4.942290782928467, |
|
"learning_rate": 2.3425131926121374e-05, |
|
"loss": 0.5681, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 0.7366746068000793, |
|
"learning_rate": 2.3218997361477572e-05, |
|
"loss": 0.4026, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 6.261937141418457, |
|
"learning_rate": 2.3012862796833774e-05, |
|
"loss": 0.3486, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 4.27028751373291, |
|
"learning_rate": 2.2806728232189976e-05, |
|
"loss": 0.2636, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 1.5691050291061401, |
|
"learning_rate": 2.260883905013193e-05, |
|
"loss": 0.4031, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"grad_norm": 8.969446182250977, |
|
"learning_rate": 2.2402704485488127e-05, |
|
"loss": 0.3943, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 0.4185885190963745, |
|
"learning_rate": 2.219656992084433e-05, |
|
"loss": 0.4099, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"grad_norm": 3.5010409355163574, |
|
"learning_rate": 2.1990435356200527e-05, |
|
"loss": 0.5192, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 0.3482280969619751, |
|
"learning_rate": 2.1784300791556732e-05, |
|
"loss": 0.4143, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 0.623389720916748, |
|
"learning_rate": 2.157816622691293e-05, |
|
"loss": 0.3566, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 11.738636016845703, |
|
"learning_rate": 2.1372031662269128e-05, |
|
"loss": 0.4869, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"grad_norm": 18.07844352722168, |
|
"learning_rate": 2.116589709762533e-05, |
|
"loss": 0.2863, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"grad_norm": 7.016993522644043, |
|
"learning_rate": 2.095976253298153e-05, |
|
"loss": 0.3461, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 0.18062768876552582, |
|
"learning_rate": 2.0753627968337733e-05, |
|
"loss": 0.2242, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"grad_norm": 8.72131061553955, |
|
"learning_rate": 2.054749340369393e-05, |
|
"loss": 0.4769, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 0.8265817165374756, |
|
"learning_rate": 2.0341358839050133e-05, |
|
"loss": 0.4334, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"grad_norm": 2.490007162094116, |
|
"learning_rate": 2.0135224274406334e-05, |
|
"loss": 0.457, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 0.18633964657783508, |
|
"learning_rate": 1.9929089709762532e-05, |
|
"loss": 0.4192, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"grad_norm": 5.816620826721191, |
|
"learning_rate": 1.9722955145118734e-05, |
|
"loss": 0.3111, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 5.391794204711914, |
|
"learning_rate": 1.9516820580474935e-05, |
|
"loss": 0.276, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 0.8628760576248169, |
|
"learning_rate": 1.9310686015831137e-05, |
|
"loss": 0.3737, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"grad_norm": 4.97860050201416, |
|
"learning_rate": 1.9104551451187335e-05, |
|
"loss": 0.376, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"grad_norm": 4.89571475982666, |
|
"learning_rate": 1.8898416886543537e-05, |
|
"loss": 0.3772, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"grad_norm": 0.35662633180618286, |
|
"learning_rate": 1.8692282321899738e-05, |
|
"loss": 0.4631, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9203204985533051, |
|
"eval_f1_macro": 0.49143994642521266, |
|
"eval_f1_micro": 0.9203204985533051, |
|
"eval_f1_weighted": 0.9061028561941445, |
|
"eval_loss": 0.2875824272632599, |
|
"eval_precision_macro": 0.6640335095209342, |
|
"eval_precision_micro": 0.9203204985533051, |
|
"eval_precision_weighted": 0.9054269421170664, |
|
"eval_recall_macro": 0.445379050503802, |
|
"eval_recall_micro": 0.9203204985533051, |
|
"eval_recall_weighted": 0.9203204985533051, |
|
"eval_runtime": 404.3838, |
|
"eval_samples_per_second": 11.111, |
|
"eval_steps_per_second": 0.695, |
|
"step": 4492 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 10.790550231933594, |
|
"learning_rate": 1.8486147757255936e-05, |
|
"loss": 0.536, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 8.45910930633545, |
|
"learning_rate": 1.8280013192612138e-05, |
|
"loss": 0.4415, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 5.119017124176025, |
|
"learning_rate": 1.807387862796834e-05, |
|
"loss": 0.3585, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 1.2371793985366821, |
|
"learning_rate": 1.786774406332454e-05, |
|
"loss": 0.3951, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 4.3467607498168945, |
|
"learning_rate": 1.766160949868074e-05, |
|
"loss": 0.4048, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 0.7943634986877441, |
|
"learning_rate": 1.7455474934036937e-05, |
|
"loss": 0.3604, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 1.8456642627716064, |
|
"learning_rate": 1.7249340369393142e-05, |
|
"loss": 0.4308, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 15.450132369995117, |
|
"learning_rate": 1.704320580474934e-05, |
|
"loss": 0.2849, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"grad_norm": 0.7097306251525879, |
|
"learning_rate": 1.6837071240105542e-05, |
|
"loss": 0.3189, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 0.0466163270175457, |
|
"learning_rate": 1.663093667546174e-05, |
|
"loss": 0.3747, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"grad_norm": 17.914644241333008, |
|
"learning_rate": 1.6424802110817945e-05, |
|
"loss": 0.4635, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"grad_norm": 5.257259845733643, |
|
"learning_rate": 1.6218667546174143e-05, |
|
"loss": 0.3882, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"grad_norm": 0.1565193384885788, |
|
"learning_rate": 1.601253298153034e-05, |
|
"loss": 0.4073, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"grad_norm": 13.001235008239746, |
|
"learning_rate": 1.5806398416886546e-05, |
|
"loss": 0.3174, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"grad_norm": 11.252735137939453, |
|
"learning_rate": 1.5600263852242745e-05, |
|
"loss": 0.4794, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"grad_norm": 8.534846305847168, |
|
"learning_rate": 1.5394129287598946e-05, |
|
"loss": 0.2951, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"grad_norm": 4.504175662994385, |
|
"learning_rate": 1.5187994722955146e-05, |
|
"loss": 0.5374, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"grad_norm": 4.395377159118652, |
|
"learning_rate": 1.4981860158311347e-05, |
|
"loss": 0.3697, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 11.302129745483398, |
|
"learning_rate": 1.4775725593667547e-05, |
|
"loss": 0.2116, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"grad_norm": 0.7363251447677612, |
|
"learning_rate": 1.4569591029023747e-05, |
|
"loss": 0.327, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"grad_norm": 4.721381664276123, |
|
"learning_rate": 1.4363456464379949e-05, |
|
"loss": 0.3867, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"grad_norm": 2.9688565731048584, |
|
"learning_rate": 1.4157321899736149e-05, |
|
"loss": 0.4413, |
|
"step": 5025 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 0.40781280398368835, |
|
"learning_rate": 1.395118733509235e-05, |
|
"loss": 0.3637, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"grad_norm": 1.3190653324127197, |
|
"learning_rate": 1.374505277044855e-05, |
|
"loss": 0.3532, |
|
"step": 5075 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"grad_norm": 0.6920987963676453, |
|
"learning_rate": 1.3538918205804748e-05, |
|
"loss": 0.3523, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"grad_norm": 10.344520568847656, |
|
"learning_rate": 1.3332783641160951e-05, |
|
"loss": 0.4375, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"grad_norm": 12.584922790527344, |
|
"learning_rate": 1.312664907651715e-05, |
|
"loss": 0.4301, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"grad_norm": 0.4461086094379425, |
|
"learning_rate": 1.2920514511873353e-05, |
|
"loss": 0.3158, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"grad_norm": 9.555744171142578, |
|
"learning_rate": 1.2714379947229551e-05, |
|
"loss": 0.3468, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"grad_norm": 12.140357971191406, |
|
"learning_rate": 1.2508245382585754e-05, |
|
"loss": 0.4215, |
|
"step": 5225 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"grad_norm": 4.640113830566406, |
|
"learning_rate": 1.2302110817941952e-05, |
|
"loss": 0.3538, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"grad_norm": 8.983073234558105, |
|
"learning_rate": 1.2095976253298154e-05, |
|
"loss": 0.2868, |
|
"step": 5275 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"grad_norm": 6.697389125823975, |
|
"learning_rate": 1.1889841688654354e-05, |
|
"loss": 0.4148, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"grad_norm": 9.571817398071289, |
|
"learning_rate": 1.1683707124010555e-05, |
|
"loss": 0.4891, |
|
"step": 5325 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"grad_norm": 6.644136905670166, |
|
"learning_rate": 1.1477572559366755e-05, |
|
"loss": 0.2858, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"grad_norm": 19.55898666381836, |
|
"learning_rate": 1.1271437994722955e-05, |
|
"loss": 0.4059, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 6.888569355010986, |
|
"learning_rate": 1.1065303430079157e-05, |
|
"loss": 0.3415, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"grad_norm": 7.856134414672852, |
|
"learning_rate": 1.0859168865435356e-05, |
|
"loss": 0.3327, |
|
"step": 5425 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"grad_norm": 15.82084846496582, |
|
"learning_rate": 1.0653034300791558e-05, |
|
"loss": 0.4321, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"grad_norm": 14.98440170288086, |
|
"learning_rate": 1.0446899736147758e-05, |
|
"loss": 0.4672, |
|
"step": 5475 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"grad_norm": 0.872367799282074, |
|
"learning_rate": 1.0240765171503958e-05, |
|
"loss": 0.2711, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"grad_norm": 5.635341167449951, |
|
"learning_rate": 1.0034630606860158e-05, |
|
"loss": 0.3636, |
|
"step": 5525 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"grad_norm": 12.989480972290039, |
|
"learning_rate": 9.82849604221636e-06, |
|
"loss": 0.3524, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"grad_norm": 16.43426513671875, |
|
"learning_rate": 9.622361477572559e-06, |
|
"loss": 0.2381, |
|
"step": 5575 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"grad_norm": 1.0547945499420166, |
|
"learning_rate": 9.41622691292876e-06, |
|
"loss": 0.3293, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 13.630729675292969, |
|
"learning_rate": 9.210092348284962e-06, |
|
"loss": 0.3658, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"grad_norm": 12.972505569458008, |
|
"learning_rate": 9.003957783641162e-06, |
|
"loss": 0.2829, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"grad_norm": 10.048601150512695, |
|
"learning_rate": 8.797823218997362e-06, |
|
"loss": 0.3867, |
|
"step": 5675 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"grad_norm": 1.120229721069336, |
|
"learning_rate": 8.591688654353562e-06, |
|
"loss": 0.4166, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"grad_norm": 0.9482748508453369, |
|
"learning_rate": 8.385554089709763e-06, |
|
"loss": 0.4281, |
|
"step": 5725 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"grad_norm": 0.29686295986175537, |
|
"learning_rate": 8.179419525065963e-06, |
|
"loss": 0.3187, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"grad_norm": 9.385336875915527, |
|
"learning_rate": 7.973284960422165e-06, |
|
"loss": 0.3553, |
|
"step": 5775 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"grad_norm": 5.910414695739746, |
|
"learning_rate": 7.767150395778365e-06, |
|
"loss": 0.2821, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"grad_norm": 5.932247161865234, |
|
"learning_rate": 7.561015831134564e-06, |
|
"loss": 0.2706, |
|
"step": 5825 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"grad_norm": 1.826149821281433, |
|
"learning_rate": 7.354881266490765e-06, |
|
"loss": 0.3742, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"grad_norm": 9.548162460327148, |
|
"learning_rate": 7.148746701846966e-06, |
|
"loss": 0.3124, |
|
"step": 5875 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"grad_norm": 10.59200668334961, |
|
"learning_rate": 6.9426121372031665e-06, |
|
"loss": 0.2541, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"grad_norm": 6.801640033721924, |
|
"learning_rate": 6.736477572559367e-06, |
|
"loss": 0.336, |
|
"step": 5925 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"grad_norm": 6.312964916229248, |
|
"learning_rate": 6.530343007915568e-06, |
|
"loss": 0.5251, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"grad_norm": 10.121294975280762, |
|
"learning_rate": 6.324208443271768e-06, |
|
"loss": 0.3999, |
|
"step": 5975 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"grad_norm": 11.066811561584473, |
|
"learning_rate": 6.1180738786279684e-06, |
|
"loss": 0.3101, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"grad_norm": 0.14530642330646515, |
|
"learning_rate": 5.911939313984169e-06, |
|
"loss": 0.2483, |
|
"step": 6025 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"grad_norm": 8.127425193786621, |
|
"learning_rate": 5.70580474934037e-06, |
|
"loss": 0.2684, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"grad_norm": 4.671697616577148, |
|
"learning_rate": 5.4996701846965706e-06, |
|
"loss": 0.3339, |
|
"step": 6075 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"grad_norm": 7.663967609405518, |
|
"learning_rate": 5.29353562005277e-06, |
|
"loss": 0.2823, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"grad_norm": 4.24953556060791, |
|
"learning_rate": 5.087401055408971e-06, |
|
"loss": 0.2481, |
|
"step": 6125 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"grad_norm": 6.942299842834473, |
|
"learning_rate": 4.881266490765172e-06, |
|
"loss": 0.3467, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"grad_norm": 2.23897123336792, |
|
"learning_rate": 4.6751319261213725e-06, |
|
"loss": 0.4663, |
|
"step": 6175 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"grad_norm": 21.0181827545166, |
|
"learning_rate": 4.468997361477572e-06, |
|
"loss": 0.2012, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"grad_norm": 6.582679748535156, |
|
"learning_rate": 4.262862796833773e-06, |
|
"loss": 0.3391, |
|
"step": 6225 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"grad_norm": 0.8921090960502625, |
|
"learning_rate": 4.056728232189974e-06, |
|
"loss": 0.2925, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"grad_norm": 0.729013204574585, |
|
"learning_rate": 3.8505936675461745e-06, |
|
"loss": 0.2918, |
|
"step": 6275 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 0.47634056210517883, |
|
"learning_rate": 3.6527044854881267e-06, |
|
"loss": 0.3309, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"grad_norm": 0.7455437183380127, |
|
"learning_rate": 3.4465699208443274e-06, |
|
"loss": 0.2839, |
|
"step": 6325 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"grad_norm": 17.21619415283203, |
|
"learning_rate": 3.240435356200528e-06, |
|
"loss": 0.3023, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"grad_norm": 0.5437944531440735, |
|
"learning_rate": 3.0343007915567284e-06, |
|
"loss": 0.2888, |
|
"step": 6375 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"grad_norm": 4.063761234283447, |
|
"learning_rate": 2.8281662269129287e-06, |
|
"loss": 0.4373, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"grad_norm": 6.74634313583374, |
|
"learning_rate": 2.6220316622691294e-06, |
|
"loss": 0.4396, |
|
"step": 6425 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"grad_norm": 3.216498613357544, |
|
"learning_rate": 2.41589709762533e-06, |
|
"loss": 0.3116, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"grad_norm": 1.6705697774887085, |
|
"learning_rate": 2.2097625329815304e-06, |
|
"loss": 0.3643, |
|
"step": 6475 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"grad_norm": 4.8092851638793945, |
|
"learning_rate": 2.003627968337731e-06, |
|
"loss": 0.2643, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"grad_norm": 16.015827178955078, |
|
"learning_rate": 1.7974934036939316e-06, |
|
"loss": 0.4048, |
|
"step": 6525 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"grad_norm": 2.7783210277557373, |
|
"learning_rate": 1.5913588390501319e-06, |
|
"loss": 0.3777, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"grad_norm": 0.9853120446205139, |
|
"learning_rate": 1.3852242744063324e-06, |
|
"loss": 0.2335, |
|
"step": 6575 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"grad_norm": 0.24746793508529663, |
|
"learning_rate": 1.179089709762533e-06, |
|
"loss": 0.1812, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"grad_norm": 2.739319324493408, |
|
"learning_rate": 9.729551451187335e-07, |
|
"loss": 0.2132, |
|
"step": 6625 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"grad_norm": 0.38551005721092224, |
|
"learning_rate": 7.66820580474934e-07, |
|
"loss": 0.2686, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"grad_norm": 6.976538181304932, |
|
"learning_rate": 5.606860158311346e-07, |
|
"loss": 0.507, |
|
"step": 6675 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"grad_norm": 6.835049152374268, |
|
"learning_rate": 3.5455145118733513e-07, |
|
"loss": 0.4169, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"grad_norm": 0.45143523812294006, |
|
"learning_rate": 1.4841688654353562e-07, |
|
"loss": 0.3045, |
|
"step": 6725 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9278878255063432, |
|
"eval_f1_macro": 0.5509939923795275, |
|
"eval_f1_micro": 0.9278878255063432, |
|
"eval_f1_weighted": 0.915533252030031, |
|
"eval_loss": 0.2612117528915405, |
|
"eval_precision_macro": 0.7507175360173887, |
|
"eval_precision_micro": 0.9278878255063432, |
|
"eval_precision_weighted": 0.9184859185112592, |
|
"eval_recall_macro": 0.4872137731200702, |
|
"eval_recall_micro": 0.9278878255063432, |
|
"eval_recall_weighted": 0.9278878255063432, |
|
"eval_runtime": 408.2523, |
|
"eval_samples_per_second": 11.005, |
|
"eval_steps_per_second": 0.688, |
|
"step": 6738 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 6738, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 4.1760701843670835e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|