| { |
| "best_global_step": 60003, |
| "best_metric": 0.8089721345660946, |
| "best_model_checkpoint": "./doc_type_v2_primary_model_multilingual-e5-small/checkpoint-60003", |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 60003, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.024998750062496876, |
| "grad_norm": 17.081697463989258, |
| "learning_rate": 4.9584187457293806e-05, |
| "loss": 1.8537, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.04999750012499375, |
| "grad_norm": 16.341184616088867, |
| "learning_rate": 4.9167541622918856e-05, |
| "loss": 1.3289, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.07499625018749062, |
| "grad_norm": 12.614828109741211, |
| "learning_rate": 4.875089578854391e-05, |
| "loss": 1.1698, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.0999950002499875, |
| "grad_norm": 17.94846534729004, |
| "learning_rate": 4.833424995416896e-05, |
| "loss": 1.0996, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.12499375031248437, |
| "grad_norm": 9.764547348022461, |
| "learning_rate": 4.7917604119794014e-05, |
| "loss": 1.0552, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.14999250037498124, |
| "grad_norm": 5.973393440246582, |
| "learning_rate": 4.7500958285419064e-05, |
| "loss": 1.0462, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.17499125043747812, |
| "grad_norm": 5.258781909942627, |
| "learning_rate": 4.7084312451044115e-05, |
| "loss": 1.0004, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.199990000499975, |
| "grad_norm": 5.401681423187256, |
| "learning_rate": 4.666766661666917e-05, |
| "loss": 0.9812, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.2249887505624719, |
| "grad_norm": 3.4015467166900635, |
| "learning_rate": 4.625102078229422e-05, |
| "loss": 0.9245, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.24998750062496874, |
| "grad_norm": 11.498674392700195, |
| "learning_rate": 4.583437494791927e-05, |
| "loss": 0.9282, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.2749862506874656, |
| "grad_norm": 6.841133117675781, |
| "learning_rate": 4.541772911354433e-05, |
| "loss": 0.9167, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.2999850007499625, |
| "grad_norm": 5.397707939147949, |
| "learning_rate": 4.500108327916937e-05, |
| "loss": 0.8886, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.3249837508124594, |
| "grad_norm": 7.148469924926758, |
| "learning_rate": 4.458443744479443e-05, |
| "loss": 0.8826, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.34998250087495625, |
| "grad_norm": 3.2729530334472656, |
| "learning_rate": 4.416779161041948e-05, |
| "loss": 0.8443, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.3749812509374531, |
| "grad_norm": 12.553752899169922, |
| "learning_rate": 4.375114577604453e-05, |
| "loss": 0.8374, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.39998000099995, |
| "grad_norm": 9.571837425231934, |
| "learning_rate": 4.333449994166959e-05, |
| "loss": 0.8271, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.42497875106244687, |
| "grad_norm": 11.265901565551758, |
| "learning_rate": 4.291785410729464e-05, |
| "loss": 0.8306, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.4499775011249438, |
| "grad_norm": 18.747684478759766, |
| "learning_rate": 4.250120827291969e-05, |
| "loss": 0.8561, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.47497625118744063, |
| "grad_norm": 7.2989726066589355, |
| "learning_rate": 4.208456243854474e-05, |
| "loss": 0.7851, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.4999750012499375, |
| "grad_norm": 21.371959686279297, |
| "learning_rate": 4.1667916604169796e-05, |
| "loss": 0.7841, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.5249737513124344, |
| "grad_norm": 19.508371353149414, |
| "learning_rate": 4.1251270769794846e-05, |
| "loss": 0.7678, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.5499725013749313, |
| "grad_norm": 5.09838342666626, |
| "learning_rate": 4.0834624935419896e-05, |
| "loss": 0.7538, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.5749712514374281, |
| "grad_norm": 6.288057804107666, |
| "learning_rate": 4.041797910104495e-05, |
| "loss": 0.735, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.599970001499925, |
| "grad_norm": 2.406168222427368, |
| "learning_rate": 4.000133326667e-05, |
| "loss": 0.774, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.6249687515624219, |
| "grad_norm": 11.135022163391113, |
| "learning_rate": 3.9584687432295054e-05, |
| "loss": 0.7368, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.6499675016249188, |
| "grad_norm": 16.766277313232422, |
| "learning_rate": 3.916804159792011e-05, |
| "loss": 0.7435, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.6749662516874156, |
| "grad_norm": 7.3794121742248535, |
| "learning_rate": 3.8751395763545154e-05, |
| "loss": 0.7035, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.6999650017499125, |
| "grad_norm": 13.058135032653809, |
| "learning_rate": 3.833474992917021e-05, |
| "loss": 0.7552, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.7249637518124094, |
| "grad_norm": 13.570932388305664, |
| "learning_rate": 3.791810409479526e-05, |
| "loss": 0.7443, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.7499625018749062, |
| "grad_norm": 16.705114364624023, |
| "learning_rate": 3.750145826042031e-05, |
| "loss": 0.7461, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.7749612519374032, |
| "grad_norm": 20.24770164489746, |
| "learning_rate": 3.708481242604537e-05, |
| "loss": 0.7352, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.7999600019999, |
| "grad_norm": 10.8892183303833, |
| "learning_rate": 3.666816659167042e-05, |
| "loss": 0.6946, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.8249587520623969, |
| "grad_norm": 24.564472198486328, |
| "learning_rate": 3.625152075729547e-05, |
| "loss": 0.6939, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.8499575021248937, |
| "grad_norm": 14.484394073486328, |
| "learning_rate": 3.583487492292053e-05, |
| "loss": 0.7509, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.8749562521873906, |
| "grad_norm": 11.327393531799316, |
| "learning_rate": 3.541822908854558e-05, |
| "loss": 0.6992, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.8999550022498876, |
| "grad_norm": 12.824069023132324, |
| "learning_rate": 3.500158325417063e-05, |
| "loss": 0.7043, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.9249537523123844, |
| "grad_norm": 1.3452341556549072, |
| "learning_rate": 3.458493741979568e-05, |
| "loss": 0.6977, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.9499525023748813, |
| "grad_norm": 7.985979080200195, |
| "learning_rate": 3.416829158542073e-05, |
| "loss": 0.6952, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.9749512524373781, |
| "grad_norm": 6.591372489929199, |
| "learning_rate": 3.3751645751045785e-05, |
| "loss": 0.708, |
| "step": 19500 |
| }, |
| { |
| "epoch": 0.999950002499875, |
| "grad_norm": 4.785042762756348, |
| "learning_rate": 3.3334999916670835e-05, |
| "loss": 0.6695, |
| "step": 20000 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_f1": 0.7876339482882986, |
| "eval_loss": 0.6957715749740601, |
| "eval_runtime": 12.0347, |
| "eval_samples_per_second": 1662.524, |
| "eval_steps_per_second": 207.815, |
| "step": 20001 |
| }, |
| { |
| "epoch": 1.024948752562372, |
| "grad_norm": 15.502031326293945, |
| "learning_rate": 3.2918354082295885e-05, |
| "loss": 0.5363, |
| "step": 20500 |
| }, |
| { |
| "epoch": 1.0499475026248688, |
| "grad_norm": 0.9488680362701416, |
| "learning_rate": 3.2501708247920936e-05, |
| "loss": 0.547, |
| "step": 21000 |
| }, |
| { |
| "epoch": 1.0749462526873657, |
| "grad_norm": 4.085986614227295, |
| "learning_rate": 3.208506241354599e-05, |
| "loss": 0.5733, |
| "step": 21500 |
| }, |
| { |
| "epoch": 1.0999450027498625, |
| "grad_norm": 15.25266170501709, |
| "learning_rate": 3.166841657917104e-05, |
| "loss": 0.5454, |
| "step": 22000 |
| }, |
| { |
| "epoch": 1.1249437528123594, |
| "grad_norm": 11.815897941589355, |
| "learning_rate": 3.125177074479609e-05, |
| "loss": 0.5235, |
| "step": 22500 |
| }, |
| { |
| "epoch": 1.1499425028748562, |
| "grad_norm": 17.311704635620117, |
| "learning_rate": 3.083512491042115e-05, |
| "loss": 0.5291, |
| "step": 23000 |
| }, |
| { |
| "epoch": 1.174941252937353, |
| "grad_norm": 7.48703145980835, |
| "learning_rate": 3.0418479076046197e-05, |
| "loss": 0.5537, |
| "step": 23500 |
| }, |
| { |
| "epoch": 1.19994000299985, |
| "grad_norm": 0.3721858263015747, |
| "learning_rate": 3.000183324167125e-05, |
| "loss": 0.555, |
| "step": 24000 |
| }, |
| { |
| "epoch": 1.2249387530623468, |
| "grad_norm": 22.23200035095215, |
| "learning_rate": 2.9585187407296305e-05, |
| "loss": 0.5338, |
| "step": 24500 |
| }, |
| { |
| "epoch": 1.2499375031248436, |
| "grad_norm": 2.753875255584717, |
| "learning_rate": 2.9168541572921355e-05, |
| "loss": 0.5615, |
| "step": 25000 |
| }, |
| { |
| "epoch": 1.2749362531873407, |
| "grad_norm": 23.020252227783203, |
| "learning_rate": 2.875189573854641e-05, |
| "loss": 0.5155, |
| "step": 25500 |
| }, |
| { |
| "epoch": 1.2999350032498376, |
| "grad_norm": 31.79548454284668, |
| "learning_rate": 2.8335249904171456e-05, |
| "loss": 0.5353, |
| "step": 26000 |
| }, |
| { |
| "epoch": 1.3249337533123344, |
| "grad_norm": 0.2923097312450409, |
| "learning_rate": 2.7918604069796513e-05, |
| "loss": 0.5317, |
| "step": 26500 |
| }, |
| { |
| "epoch": 1.3499325033748313, |
| "grad_norm": 9.347312927246094, |
| "learning_rate": 2.7501958235421566e-05, |
| "loss": 0.5429, |
| "step": 27000 |
| }, |
| { |
| "epoch": 1.3749312534373281, |
| "grad_norm": 13.638419151306152, |
| "learning_rate": 2.7085312401046613e-05, |
| "loss": 0.5311, |
| "step": 27500 |
| }, |
| { |
| "epoch": 1.399930003499825, |
| "grad_norm": 19.09702491760254, |
| "learning_rate": 2.6668666566671667e-05, |
| "loss": 0.5345, |
| "step": 28000 |
| }, |
| { |
| "epoch": 1.4249287535623218, |
| "grad_norm": 0.6322915554046631, |
| "learning_rate": 2.6252020732296717e-05, |
| "loss": 0.5287, |
| "step": 28500 |
| }, |
| { |
| "epoch": 1.4499275036248187, |
| "grad_norm": 19.159151077270508, |
| "learning_rate": 2.583537489792177e-05, |
| "loss": 0.5204, |
| "step": 29000 |
| }, |
| { |
| "epoch": 1.4749262536873156, |
| "grad_norm": 0.7778434753417969, |
| "learning_rate": 2.5418729063546824e-05, |
| "loss": 0.5121, |
| "step": 29500 |
| }, |
| { |
| "epoch": 1.4999250037498126, |
| "grad_norm": 20.512577056884766, |
| "learning_rate": 2.5002083229171875e-05, |
| "loss": 0.52, |
| "step": 30000 |
| }, |
| { |
| "epoch": 1.5249237538123093, |
| "grad_norm": 8.87389087677002, |
| "learning_rate": 2.458543739479693e-05, |
| "loss": 0.5094, |
| "step": 30500 |
| }, |
| { |
| "epoch": 1.5499225038748063, |
| "grad_norm": 21.17337989807129, |
| "learning_rate": 2.416879156042198e-05, |
| "loss": 0.5169, |
| "step": 31000 |
| }, |
| { |
| "epoch": 1.574921253937303, |
| "grad_norm": 8.69658374786377, |
| "learning_rate": 2.3752145726047032e-05, |
| "loss": 0.5226, |
| "step": 31500 |
| }, |
| { |
| "epoch": 1.5999200039998, |
| "grad_norm": 1.2267570495605469, |
| "learning_rate": 2.3335499891672083e-05, |
| "loss": 0.5281, |
| "step": 32000 |
| }, |
| { |
| "epoch": 1.624918754062297, |
| "grad_norm": 14.757322311401367, |
| "learning_rate": 2.2918854057297136e-05, |
| "loss": 0.5246, |
| "step": 32500 |
| }, |
| { |
| "epoch": 1.6499175041247938, |
| "grad_norm": 6.141539096832275, |
| "learning_rate": 2.250220822292219e-05, |
| "loss": 0.532, |
| "step": 33000 |
| }, |
| { |
| "epoch": 1.6749162541872906, |
| "grad_norm": 15.90838623046875, |
| "learning_rate": 2.208556238854724e-05, |
| "loss": 0.5068, |
| "step": 33500 |
| }, |
| { |
| "epoch": 1.6999150042497875, |
| "grad_norm": 3.071305751800537, |
| "learning_rate": 2.166891655417229e-05, |
| "loss": 0.4971, |
| "step": 34000 |
| }, |
| { |
| "epoch": 1.7249137543122843, |
| "grad_norm": 5.962382793426514, |
| "learning_rate": 2.1252270719797344e-05, |
| "loss": 0.5122, |
| "step": 34500 |
| }, |
| { |
| "epoch": 1.7499125043747812, |
| "grad_norm": 5.9214911460876465, |
| "learning_rate": 2.0835624885422398e-05, |
| "loss": 0.489, |
| "step": 35000 |
| }, |
| { |
| "epoch": 1.7749112544372783, |
| "grad_norm": 8.897248268127441, |
| "learning_rate": 2.0418979051047448e-05, |
| "loss": 0.479, |
| "step": 35500 |
| }, |
| { |
| "epoch": 1.799910004499775, |
| "grad_norm": 16.03746223449707, |
| "learning_rate": 2.0002333216672502e-05, |
| "loss": 0.4919, |
| "step": 36000 |
| }, |
| { |
| "epoch": 1.824908754562272, |
| "grad_norm": 21.669597625732422, |
| "learning_rate": 1.9585687382297552e-05, |
| "loss": 0.4974, |
| "step": 36500 |
| }, |
| { |
| "epoch": 1.8499075046247686, |
| "grad_norm": 3.668883800506592, |
| "learning_rate": 1.9169041547922606e-05, |
| "loss": 0.5045, |
| "step": 37000 |
| }, |
| { |
| "epoch": 1.8749062546872657, |
| "grad_norm": 4.8963518142700195, |
| "learning_rate": 1.8752395713547656e-05, |
| "loss": 0.525, |
| "step": 37500 |
| }, |
| { |
| "epoch": 1.8999050047497625, |
| "grad_norm": 19.771133422851562, |
| "learning_rate": 1.833574987917271e-05, |
| "loss": 0.4748, |
| "step": 38000 |
| }, |
| { |
| "epoch": 1.9249037548122594, |
| "grad_norm": 20.69668960571289, |
| "learning_rate": 1.791910404479776e-05, |
| "loss": 0.4831, |
| "step": 38500 |
| }, |
| { |
| "epoch": 1.9499025048747562, |
| "grad_norm": 3.1742944717407227, |
| "learning_rate": 1.750245821042281e-05, |
| "loss": 0.5091, |
| "step": 39000 |
| }, |
| { |
| "epoch": 1.974901254937253, |
| "grad_norm": 0.3630174696445465, |
| "learning_rate": 1.7085812376047867e-05, |
| "loss": 0.4821, |
| "step": 39500 |
| }, |
| { |
| "epoch": 1.99990000499975, |
| "grad_norm": 10.60681438446045, |
| "learning_rate": 1.6669166541672918e-05, |
| "loss": 0.4862, |
| "step": 40000 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_f1": 0.7969511995029, |
| "eval_loss": 0.7491226196289062, |
| "eval_runtime": 12.147, |
| "eval_samples_per_second": 1647.158, |
| "eval_steps_per_second": 205.895, |
| "step": 40002 |
| }, |
| { |
| "epoch": 2.024898755062247, |
| "grad_norm": 18.80621910095215, |
| "learning_rate": 1.6252520707297968e-05, |
| "loss": 0.357, |
| "step": 40500 |
| }, |
| { |
| "epoch": 2.049897505124744, |
| "grad_norm": 3.8872764110565186, |
| "learning_rate": 1.5835874872923022e-05, |
| "loss": 0.333, |
| "step": 41000 |
| }, |
| { |
| "epoch": 2.0748962551872405, |
| "grad_norm": 19.08934211730957, |
| "learning_rate": 1.5419229038548072e-05, |
| "loss": 0.374, |
| "step": 41500 |
| }, |
| { |
| "epoch": 2.0998950052497376, |
| "grad_norm": 10.449114799499512, |
| "learning_rate": 1.5002583204173126e-05, |
| "loss": 0.3698, |
| "step": 42000 |
| }, |
| { |
| "epoch": 2.1248937553122342, |
| "grad_norm": 6.660628318786621, |
| "learning_rate": 1.4585937369798178e-05, |
| "loss": 0.3759, |
| "step": 42500 |
| }, |
| { |
| "epoch": 2.1498925053747313, |
| "grad_norm": 9.793807983398438, |
| "learning_rate": 1.416929153542323e-05, |
| "loss": 0.3543, |
| "step": 43000 |
| }, |
| { |
| "epoch": 2.174891255437228, |
| "grad_norm": 20.215002059936523, |
| "learning_rate": 1.375264570104828e-05, |
| "loss": 0.3695, |
| "step": 43500 |
| }, |
| { |
| "epoch": 2.199890005499725, |
| "grad_norm": 20.272212982177734, |
| "learning_rate": 1.3335999866673335e-05, |
| "loss": 0.3385, |
| "step": 44000 |
| }, |
| { |
| "epoch": 2.2248887555622217, |
| "grad_norm": 12.721766471862793, |
| "learning_rate": 1.2919354032298386e-05, |
| "loss": 0.3583, |
| "step": 44500 |
| }, |
| { |
| "epoch": 2.2498875056247187, |
| "grad_norm": 11.291624069213867, |
| "learning_rate": 1.2502708197923438e-05, |
| "loss": 0.3445, |
| "step": 45000 |
| }, |
| { |
| "epoch": 2.274886255687216, |
| "grad_norm": 14.476861000061035, |
| "learning_rate": 1.208606236354849e-05, |
| "loss": 0.3575, |
| "step": 45500 |
| }, |
| { |
| "epoch": 2.2998850057497124, |
| "grad_norm": 8.20272159576416, |
| "learning_rate": 1.1669416529173542e-05, |
| "loss": 0.3382, |
| "step": 46000 |
| }, |
| { |
| "epoch": 2.3248837558122095, |
| "grad_norm": 2.5329763889312744, |
| "learning_rate": 1.1252770694798594e-05, |
| "loss": 0.3732, |
| "step": 46500 |
| }, |
| { |
| "epoch": 2.349882505874706, |
| "grad_norm": 0.9955561757087708, |
| "learning_rate": 1.0836124860423647e-05, |
| "loss": 0.3454, |
| "step": 47000 |
| }, |
| { |
| "epoch": 2.3748812559372032, |
| "grad_norm": 6.986231803894043, |
| "learning_rate": 1.0419479026048697e-05, |
| "loss": 0.3563, |
| "step": 47500 |
| }, |
| { |
| "epoch": 2.3998800059997, |
| "grad_norm": 21.110620498657227, |
| "learning_rate": 1.000283319167375e-05, |
| "loss": 0.3302, |
| "step": 48000 |
| }, |
| { |
| "epoch": 2.424878756062197, |
| "grad_norm": 0.08908458799123764, |
| "learning_rate": 9.586187357298801e-06, |
| "loss": 0.3421, |
| "step": 48500 |
| }, |
| { |
| "epoch": 2.4498775061246936, |
| "grad_norm": 13.181462287902832, |
| "learning_rate": 9.169541522923853e-06, |
| "loss": 0.3119, |
| "step": 49000 |
| }, |
| { |
| "epoch": 2.4748762561871906, |
| "grad_norm": 12.58914852142334, |
| "learning_rate": 8.752895688548907e-06, |
| "loss": 0.3578, |
| "step": 49500 |
| }, |
| { |
| "epoch": 2.4998750062496873, |
| "grad_norm": 39.47843551635742, |
| "learning_rate": 8.336249854173957e-06, |
| "loss": 0.3584, |
| "step": 50000 |
| }, |
| { |
| "epoch": 2.5248737563121844, |
| "grad_norm": 4.305168628692627, |
| "learning_rate": 7.919604019799011e-06, |
| "loss": 0.3142, |
| "step": 50500 |
| }, |
| { |
| "epoch": 2.5498725063746814, |
| "grad_norm": 0.7413849830627441, |
| "learning_rate": 7.502958185424062e-06, |
| "loss": 0.3124, |
| "step": 51000 |
| }, |
| { |
| "epoch": 2.574871256437178, |
| "grad_norm": 1.338671326637268, |
| "learning_rate": 7.086312351049114e-06, |
| "loss": 0.3262, |
| "step": 51500 |
| }, |
| { |
| "epoch": 2.599870006499675, |
| "grad_norm": 26.348230361938477, |
| "learning_rate": 6.669666516674167e-06, |
| "loss": 0.3072, |
| "step": 52000 |
| }, |
| { |
| "epoch": 2.624868756562172, |
| "grad_norm": 38.16984558105469, |
| "learning_rate": 6.253020682299218e-06, |
| "loss": 0.3274, |
| "step": 52500 |
| }, |
| { |
| "epoch": 2.649867506624669, |
| "grad_norm": 13.00293254852295, |
| "learning_rate": 5.83637484792427e-06, |
| "loss": 0.3131, |
| "step": 53000 |
| }, |
| { |
| "epoch": 2.6748662566871655, |
| "grad_norm": 3.519160270690918, |
| "learning_rate": 5.419729013549323e-06, |
| "loss": 0.3281, |
| "step": 53500 |
| }, |
| { |
| "epoch": 2.6998650067496626, |
| "grad_norm": 15.743597984313965, |
| "learning_rate": 5.003083179174375e-06, |
| "loss": 0.3108, |
| "step": 54000 |
| }, |
| { |
| "epoch": 2.7248637568121596, |
| "grad_norm": 20.438329696655273, |
| "learning_rate": 4.586437344799427e-06, |
| "loss": 0.3189, |
| "step": 54500 |
| }, |
| { |
| "epoch": 2.7498625068746563, |
| "grad_norm": 45.14103317260742, |
| "learning_rate": 4.169791510424479e-06, |
| "loss": 0.3367, |
| "step": 55000 |
| }, |
| { |
| "epoch": 2.774861256937153, |
| "grad_norm": 3.860975980758667, |
| "learning_rate": 3.7531456760495313e-06, |
| "loss": 0.2969, |
| "step": 55500 |
| }, |
| { |
| "epoch": 2.79986000699965, |
| "grad_norm": 0.40173372626304626, |
| "learning_rate": 3.3364998416745833e-06, |
| "loss": 0.3332, |
| "step": 56000 |
| }, |
| { |
| "epoch": 2.824858757062147, |
| "grad_norm": 2.1133482456207275, |
| "learning_rate": 2.9198540072996353e-06, |
| "loss": 0.3197, |
| "step": 56500 |
| }, |
| { |
| "epoch": 2.8498575071246437, |
| "grad_norm": 25.709867477416992, |
| "learning_rate": 2.5032081729246873e-06, |
| "loss": 0.312, |
| "step": 57000 |
| }, |
| { |
| "epoch": 2.8748562571871408, |
| "grad_norm": 22.588973999023438, |
| "learning_rate": 2.0865623385497392e-06, |
| "loss": 0.3275, |
| "step": 57500 |
| }, |
| { |
| "epoch": 2.8998550072496374, |
| "grad_norm": 2.185502529144287, |
| "learning_rate": 1.6699165041747914e-06, |
| "loss": 0.2933, |
| "step": 58000 |
| }, |
| { |
| "epoch": 2.9248537573121345, |
| "grad_norm": 12.381799697875977, |
| "learning_rate": 1.2532706697998434e-06, |
| "loss": 0.3123, |
| "step": 58500 |
| }, |
| { |
| "epoch": 2.949852507374631, |
| "grad_norm": 0.39924994111061096, |
| "learning_rate": 8.366248354248955e-07, |
| "loss": 0.3045, |
| "step": 59000 |
| }, |
| { |
| "epoch": 2.974851257437128, |
| "grad_norm": 16.00220489501953, |
| "learning_rate": 4.199790010499475e-07, |
| "loss": 0.2928, |
| "step": 59500 |
| }, |
| { |
| "epoch": 2.9998500074996253, |
| "grad_norm": 2.6532301902770996, |
| "learning_rate": 3.3331666749995837e-09, |
| "loss": 0.3199, |
| "step": 60000 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_f1": 0.8089721345660946, |
| "eval_loss": 0.8624263405799866, |
| "eval_runtime": 10.7103, |
| "eval_samples_per_second": 1868.104, |
| "eval_steps_per_second": 233.513, |
| "step": 60003 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 60003, |
| "total_flos": 7908189620438016.0, |
| "train_loss": 0.5708606184445773, |
| "train_runtime": 1693.148, |
| "train_samples_per_second": 283.503, |
| "train_steps_per_second": 35.439 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 60003, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7908189620438016.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|