{ "best_metric": 0.24899413187145658, "best_model_checkpoint": "esm2_t12_35M_lora_ptm_sites_2023-10-10_00-58-43/checkpoint-176106", "epoch": 1.0, "eval_steps": 500, "global_step": 176106, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.0003701556393528675, "loss": 0.4195, "step": 200 }, { "epoch": 0.0, "learning_rate": 0.0003701521172172271, "loss": 0.1652, "step": 400 }, { "epoch": 0.0, "learning_rate": 0.00037014627443809477, "loss": 0.1381, "step": 600 }, { "epoch": 0.0, "learning_rate": 0.0003701380523685105, "loss": 0.0871, "step": 800 }, { "epoch": 0.01, "learning_rate": 0.000370127474584151, "loss": 0.0919, "step": 1000 }, { "epoch": 0.01, "learning_rate": 0.000370114541219666, "loss": 0.0678, "step": 1200 }, { "epoch": 0.01, "learning_rate": 0.0003700992524396906, "loss": 0.0678, "step": 1400 }, { "epoch": 0.01, "learning_rate": 0.00037008160843884317, "loss": 0.0565, "step": 1600 }, { "epoch": 0.01, "learning_rate": 0.0003700616094417228, "loss": 0.0712, "step": 1800 }, { "epoch": 0.01, "learning_rate": 0.00037003925570290656, "loss": 0.0604, "step": 2000 }, { "epoch": 0.01, "learning_rate": 0.00037001454750694614, "loss": 0.0632, "step": 2200 }, { "epoch": 0.01, "learning_rate": 0.0003699874851683642, "loss": 0.0697, "step": 2400 }, { "epoch": 0.01, "learning_rate": 0.00036995806903165044, "loss": 0.046, "step": 2600 }, { "epoch": 0.02, "learning_rate": 0.0003699264641725442, "loss": 0.0479, "step": 2800 }, { "epoch": 0.02, "learning_rate": 0.00036989235335692274, "loss": 0.042, "step": 3000 }, { "epoch": 0.02, "learning_rate": 0.0003698558899541489, "loss": 0.0533, "step": 3200 }, { "epoch": 0.02, "learning_rate": 0.00036981707442838315, "loss": 0.0508, "step": 3400 }, { "epoch": 0.02, "learning_rate": 0.0003697759072737272, "loss": 0.0492, "step": 3600 }, { "epoch": 0.02, "learning_rate": 0.00036973238901421803, "loss": 0.0496, "step": 3800 }, { "epoch": 0.02, "learning_rate": 0.0003696865202038208, "loss": 0.0729, "step": 4000 }, { "epoch": 0.02, "learning_rate": 0.00036963830142642195, "loss": 0.0556, "step": 4200 }, { "epoch": 0.02, "learning_rate": 0.0003695877332958219, "loss": 0.0393, "step": 4400 }, { "epoch": 0.03, "learning_rate": 0.0003695348164557272, "loss": 0.042, "step": 4600 }, { "epoch": 0.03, "learning_rate": 0.00036947955157974214, "loss": 0.0388, "step": 4800 }, { "epoch": 0.03, "learning_rate": 0.0003694222332701908, "loss": 0.0632, "step": 5000 }, { "epoch": 0.03, "learning_rate": 0.00036936228619390763, "loss": 0.0372, "step": 5200 }, { "epoch": 0.03, "learning_rate": 0.0003692999932779562, "loss": 0.0309, "step": 5400 }, { "epoch": 0.03, "learning_rate": 0.00036923535531529345, "loss": 0.0336, "step": 5600 }, { "epoch": 0.03, "learning_rate": 0.0003691683731287276, "loss": 0.0553, "step": 5800 }, { "epoch": 0.03, "learning_rate": 0.0003690990475709077, "loss": 0.0466, "step": 6000 }, { "epoch": 0.04, "learning_rate": 0.0003690277436899897, "loss": 0.0248, "step": 6200 }, { "epoch": 0.04, "learning_rate": 0.00036895374577248087, "loss": 0.0422, "step": 6400 }, { "epoch": 0.04, "learning_rate": 0.00036887740721581455, "loss": 0.0472, "step": 6600 }, { "epoch": 0.04, "learning_rate": 0.0003687987289917415, "loss": 0.041, "step": 6800 }, { "epoch": 0.04, "learning_rate": 0.00036871771210179546, "loss": 0.0321, "step": 7000 }, { "epoch": 0.04, "learning_rate": 0.00036863435757727986, "loss": 0.036, "step": 7200 }, { "epoch": 0.04, "learning_rate": 0.00036854866647925537, "loss": 0.0355, "step": 7400 }, { "epoch": 0.04, "learning_rate": 0.0003684606398985257, "loss": 0.0423, "step": 7600 }, { "epoch": 0.04, "learning_rate": 0.00036837027895562436, "loss": 0.0391, "step": 7800 }, { "epoch": 0.05, "learning_rate": 0.0003682775848008, "loss": 0.0473, "step": 8000 }, { "epoch": 0.05, "learning_rate": 0.00036818255861400173, "loss": 0.0326, "step": 8200 }, { "epoch": 0.05, "learning_rate": 0.00036808520160486446, "loss": 0.0295, "step": 8400 }, { "epoch": 0.05, "learning_rate": 0.0003679855150126932, "loss": 0.0365, "step": 8600 }, { "epoch": 0.05, "learning_rate": 0.00036788350010644714, "loss": 0.0244, "step": 8800 }, { "epoch": 0.05, "learning_rate": 0.0003677791581847241, "loss": 0.0484, "step": 9000 }, { "epoch": 0.05, "learning_rate": 0.00036767249057574337, "loss": 0.0285, "step": 9200 }, { "epoch": 0.05, "learning_rate": 0.0003675634986373291, "loss": 0.0393, "step": 9400 }, { "epoch": 0.05, "learning_rate": 0.0003674521837568929, "loss": 0.0204, "step": 9600 }, { "epoch": 0.06, "learning_rate": 0.0003673385473514164, "loss": 0.0448, "step": 9800 }, { "epoch": 0.06, "learning_rate": 0.00036722259086743295, "loss": 0.0451, "step": 10000 }, { "epoch": 0.06, "learning_rate": 0.00036710431578100935, "loss": 0.0295, "step": 10200 }, { "epoch": 0.06, "learning_rate": 0.00036698372359772696, "loss": 0.0361, "step": 10400 }, { "epoch": 0.06, "learning_rate": 0.00036686081585266277, "loss": 0.0308, "step": 10600 }, { "epoch": 0.06, "learning_rate": 0.0003667355941103695, "loss": 0.0293, "step": 10800 }, { "epoch": 0.06, "learning_rate": 0.0003666087033850132, "loss": 0.0319, "step": 11000 }, { "epoch": 0.06, "learning_rate": 0.0003664788700095357, "loss": 0.0267, "step": 11200 }, { "epoch": 0.06, "learning_rate": 0.0003663467274988045, "loss": 0.0195, "step": 11400 }, { "epoch": 0.07, "learning_rate": 0.00036621227753492634, "loss": 0.0425, "step": 11600 }, { "epoch": 0.07, "learning_rate": 0.00036607552182938043, "loss": 0.0211, "step": 11800 }, { "epoch": 0.07, "learning_rate": 0.0003659364621229971, "loss": 0.0352, "step": 12000 }, { "epoch": 0.07, "learning_rate": 0.0003657951001859353, "loss": 0.0266, "step": 12200 }, { "epoch": 0.07, "learning_rate": 0.00036565216184881143, "loss": 0.0332, "step": 12400 }, { "epoch": 0.07, "learning_rate": 0.0003655062123664866, "loss": 0.0346, "step": 12600 }, { "epoch": 0.07, "learning_rate": 0.00036535796613034296, "loss": 0.042, "step": 12800 }, { "epoch": 0.07, "learning_rate": 0.00036520742502747924, "loss": 0.0372, "step": 13000 }, { "epoch": 0.07, "learning_rate": 0.0003650545909742067, "loss": 0.0246, "step": 13200 }, { "epoch": 0.08, "learning_rate": 0.0003648994659160249, "loss": 0.0283, "step": 13400 }, { "epoch": 0.08, "learning_rate": 0.00036474205182759645, "loss": 0.0299, "step": 13600 }, { "epoch": 0.08, "learning_rate": 0.0003645823507127223, "loss": 0.0292, "step": 13800 }, { "epoch": 0.08, "learning_rate": 0.000364420364604316, "loss": 0.0354, "step": 14000 }, { "epoch": 0.08, "learning_rate": 0.000364256095564378, "loss": 0.0215, "step": 14200 }, { "epoch": 0.08, "learning_rate": 0.00036408954568396915, "loss": 0.0288, "step": 14400 }, { "epoch": 0.08, "learning_rate": 0.0003639207170831844, "loss": 0.0214, "step": 14600 }, { "epoch": 0.08, "learning_rate": 0.0003637496119111255, "loss": 0.0336, "step": 14800 }, { "epoch": 0.09, "learning_rate": 0.0003635762323458739, "loss": 0.0193, "step": 15000 }, { "epoch": 0.09, "learning_rate": 0.0003634005805944629, "loss": 0.0507, "step": 15200 }, { "epoch": 0.09, "learning_rate": 0.0003632226588928494, "loss": 0.032, "step": 15400 }, { "epoch": 0.09, "learning_rate": 0.000363042469505886, "loss": 0.0303, "step": 15600 }, { "epoch": 0.09, "learning_rate": 0.0003628600147272912, "loss": 0.0271, "step": 15800 }, { "epoch": 0.09, "learning_rate": 0.00036267529687962144, "loss": 0.0275, "step": 16000 }, { "epoch": 0.09, "learning_rate": 0.00036248831831424026, "loss": 0.0352, "step": 16200 }, { "epoch": 0.09, "learning_rate": 0.0003622990814112894, "loss": 0.0302, "step": 16400 }, { "epoch": 0.09, "learning_rate": 0.00036210758857965785, "loss": 0.0175, "step": 16600 }, { "epoch": 0.1, "learning_rate": 0.00036191481659005633, "loss": 0.0525, "step": 16800 }, { "epoch": 0.1, "learning_rate": 0.0003617188304915086, "loss": 0.0259, "step": 17000 }, { "epoch": 0.1, "learning_rate": 0.0003615205958505779, "loss": 0.0265, "step": 17200 }, { "epoch": 0.1, "learning_rate": 0.00036132011519068993, "loss": 0.0304, "step": 17400 }, { "epoch": 0.1, "learning_rate": 0.00036111739106386103, "loss": 0.0351, "step": 17600 }, { "epoch": 0.1, "learning_rate": 0.0003609124260506658, "loss": 0.0564, "step": 17800 }, { "epoch": 0.1, "learning_rate": 0.0003607052227602041, "loss": 0.0235, "step": 18000 }, { "epoch": 0.1, "learning_rate": 0.0003604957838300679, "loss": 0.0427, "step": 18200 }, { "epoch": 0.1, "learning_rate": 0.00036028411192630784, "loss": 0.0259, "step": 18400 }, { "epoch": 0.11, "learning_rate": 0.00036007020974339896, "loss": 0.0226, "step": 18600 }, { "epoch": 0.11, "learning_rate": 0.00035985408000420693, "loss": 0.0361, "step": 18800 }, { "epoch": 0.11, "learning_rate": 0.00035963682276229046, "loss": 0.0396, "step": 19000 }, { "epoch": 0.11, "learning_rate": 0.0003594162572956826, "loss": 0.038, "step": 19200 }, { "epoch": 0.11, "learning_rate": 0.000359193472597272, "loss": 0.0263, "step": 19400 }, { "epoch": 0.11, "learning_rate": 0.00035896847150299397, "loss": 0.0275, "step": 19600 }, { "epoch": 0.11, "learning_rate": 0.0003587412568769976, "loss": 0.0368, "step": 19800 }, { "epoch": 0.11, "learning_rate": 0.0003585118316116088, "loss": 0.0267, "step": 20000 }, { "epoch": 0.11, "learning_rate": 0.000358280198627294, "loss": 0.0252, "step": 20200 }, { "epoch": 0.12, "learning_rate": 0.0003580463608726229, "loss": 0.0396, "step": 20400 }, { "epoch": 0.12, "learning_rate": 0.0003578103213242304, "loss": 0.0297, "step": 20600 }, { "epoch": 0.12, "learning_rate": 0.0003575720829867795, "loss": 0.0214, "step": 20800 }, { "epoch": 0.12, "learning_rate": 0.0003573316488929225, "loss": 0.0252, "step": 21000 }, { "epoch": 0.12, "learning_rate": 0.00035708902210326236, "loss": 0.025, "step": 21200 }, { "epoch": 0.12, "learning_rate": 0.0003568442057063144, "loss": 0.0262, "step": 21400 }, { "epoch": 0.12, "learning_rate": 0.000356597202818466, "loss": 0.022, "step": 21600 }, { "epoch": 0.12, "learning_rate": 0.0003563492679409498, "loss": 0.0458, "step": 21800 }, { "epoch": 0.12, "learning_rate": 0.0003560979124246924, "loss": 0.0289, "step": 22000 }, { "epoch": 0.13, "learning_rate": 0.00035584437991746675, "loss": 0.0247, "step": 22200 }, { "epoch": 0.13, "learning_rate": 0.0003555886736466121, "loss": 0.0198, "step": 22400 }, { "epoch": 0.13, "learning_rate": 0.0003553307968671388, "loss": 0.0259, "step": 22600 }, { "epoch": 0.13, "learning_rate": 0.0003550707528616864, "loss": 0.0134, "step": 22800 }, { "epoch": 0.13, "learning_rate": 0.0003548085449404821, "loss": 0.0344, "step": 23000 }, { "epoch": 0.13, "learning_rate": 0.0003545441764412988, "loss": 0.0308, "step": 23200 }, { "epoch": 0.13, "learning_rate": 0.0003542776507294125, "loss": 0.0295, "step": 23400 }, { "epoch": 0.13, "learning_rate": 0.0003540089711975591, "loss": 0.0237, "step": 23600 }, { "epoch": 0.14, "learning_rate": 0.00035373814126589203, "loss": 0.0229, "step": 23800 }, { "epoch": 0.14, "learning_rate": 0.0003534651643819378, "loss": 0.0203, "step": 24000 }, { "epoch": 0.14, "learning_rate": 0.00035319004402055264, "loss": 0.0297, "step": 24200 }, { "epoch": 0.14, "learning_rate": 0.000352914175302923, "loss": 0.0285, "step": 24400 }, { "epoch": 0.14, "learning_rate": 0.00035263478919374686, "loss": 0.0224, "step": 24600 }, { "epoch": 0.14, "learning_rate": 0.00035235327017739155, "loss": 0.0333, "step": 24800 }, { "epoch": 0.14, "learning_rate": 0.0003520696218374504, "loss": 0.0564, "step": 25000 }, { "epoch": 0.14, "learning_rate": 0.00035178384778462185, "loss": 0.0341, "step": 25200 }, { "epoch": 0.14, "learning_rate": 0.0003514959516566635, "loss": 0.0278, "step": 25400 }, { "epoch": 0.15, "learning_rate": 0.0003512059371183459, "loss": 0.0358, "step": 25600 }, { "epoch": 0.15, "learning_rate": 0.0003509138078614061, "loss": 0.0298, "step": 25800 }, { "epoch": 0.15, "learning_rate": 0.00035061956760450006, "loss": 0.0314, "step": 26000 }, { "epoch": 0.15, "learning_rate": 0.0003503232200931561, "loss": 0.0264, "step": 26200 }, { "epoch": 0.15, "learning_rate": 0.00035002476909972645, "loss": 0.0259, "step": 26400 }, { "epoch": 0.15, "learning_rate": 0.00034972421842333984, "loss": 0.0274, "step": 26600 }, { "epoch": 0.15, "learning_rate": 0.00034942309032960303, "loss": 0.0193, "step": 26800 }, { "epoch": 0.15, "learning_rate": 0.0003491183622419467, "loss": 0.0223, "step": 27000 }, { "epoch": 0.15, "learning_rate": 0.0003488115460094296, "loss": 0.0213, "step": 27200 }, { "epoch": 0.16, "learning_rate": 0.0003485026455376655, "loss": 0.0268, "step": 27400 }, { "epoch": 0.16, "learning_rate": 0.0003481916647587996, "loss": 0.0232, "step": 27600 }, { "epoch": 0.16, "learning_rate": 0.00034788017807543066, "loss": 0.0325, "step": 27800 }, { "epoch": 0.16, "learning_rate": 0.00034756505893653175, "loss": 0.0242, "step": 28000 }, { "epoch": 0.16, "learning_rate": 0.0003472478714255293, "loss": 0.0221, "step": 28200 }, { "epoch": 0.16, "learning_rate": 0.0003469286195800583, "loss": 0.0276, "step": 28400 }, { "epoch": 0.16, "learning_rate": 0.0003466073074640316, "loss": 0.0225, "step": 28600 }, { "epoch": 0.16, "learning_rate": 0.0003462839391675882, "loss": 0.0219, "step": 28800 }, { "epoch": 0.16, "learning_rate": 0.0003459585188070413, "loss": 0.0355, "step": 29000 }, { "epoch": 0.17, "learning_rate": 0.00034563105052482586, "loss": 0.0211, "step": 29200 }, { "epoch": 0.17, "learning_rate": 0.0003453015384894458, "loss": 0.0353, "step": 29400 }, { "epoch": 0.17, "learning_rate": 0.0003449699868954208, "loss": 0.0253, "step": 29600 }, { "epoch": 0.17, "learning_rate": 0.0003446363999632333, "loss": 0.0406, "step": 29800 }, { "epoch": 0.17, "learning_rate": 0.0003443007819392746, "loss": 0.0201, "step": 30000 }, { "epoch": 0.17, "learning_rate": 0.0003439631370957905, "loss": 0.0261, "step": 30200 }, { "epoch": 0.17, "learning_rate": 0.0003436234697308274, "loss": 0.0426, "step": 30400 }, { "epoch": 0.17, "learning_rate": 0.0003432817841681772, "loss": 0.0184, "step": 30600 }, { "epoch": 0.17, "learning_rate": 0.0003429380847573226, "loss": 0.0201, "step": 30800 }, { "epoch": 0.18, "learning_rate": 0.00034259237587338153, "loss": 0.0331, "step": 31000 }, { "epoch": 0.18, "learning_rate": 0.00034224466191705135, "loss": 0.0208, "step": 31200 }, { "epoch": 0.18, "learning_rate": 0.000341894947314553, "loss": 0.0297, "step": 31400 }, { "epoch": 0.18, "learning_rate": 0.0003415432365175747, "loss": 0.0248, "step": 31600 }, { "epoch": 0.18, "learning_rate": 0.0003411895340032153, "loss": 0.0207, "step": 31800 }, { "epoch": 0.18, "learning_rate": 0.00034083562765829117, "loss": 0.0334, "step": 32000 }, { "epoch": 0.18, "learning_rate": 0.0003404779651439537, "loss": 0.0153, "step": 32200 }, { "epoch": 0.18, "learning_rate": 0.0003401183244725952, "loss": 0.0315, "step": 32400 }, { "epoch": 0.19, "learning_rate": 0.0003397567102222577, "loss": 0.0234, "step": 32600 }, { "epoch": 0.19, "learning_rate": 0.00033939312699610597, "loss": 0.026, "step": 32800 }, { "epoch": 0.19, "learning_rate": 0.00033902757942236837, "loss": 0.018, "step": 33000 }, { "epoch": 0.19, "learning_rate": 0.00033866007215427904, "loss": 0.0318, "step": 33200 }, { "epoch": 0.19, "learning_rate": 0.00033829060987001754, "loss": 0.0249, "step": 33400 }, { "epoch": 0.19, "learning_rate": 0.00033791919727265, "loss": 0.0266, "step": 33600 }, { "epoch": 0.19, "learning_rate": 0.00033754583909006893, "loss": 0.0259, "step": 33800 }, { "epoch": 0.19, "learning_rate": 0.0003371705400749333, "loss": 0.03, "step": 34000 }, { "epoch": 0.19, "learning_rate": 0.0003367951959879635, "loss": 0.0283, "step": 34200 }, { "epoch": 0.2, "learning_rate": 0.000336416039308738, "loss": 0.0269, "step": 34400 }, { "epoch": 0.2, "learning_rate": 0.00033603495617873176, "loss": 0.034, "step": 34600 }, { "epoch": 0.2, "learning_rate": 0.00033565195144893837, "loss": 0.0319, "step": 34800 }, { "epoch": 0.2, "learning_rate": 0.0003352670299948123, "loss": 0.018, "step": 35000 }, { "epoch": 0.2, "learning_rate": 0.00033488019671620693, "loss": 0.0419, "step": 35200 }, { "epoch": 0.2, "learning_rate": 0.0003344914565373123, "loss": 0.0371, "step": 35400 }, { "epoch": 0.2, "learning_rate": 0.0003341008144065922, "loss": 0.0253, "step": 35600 }, { "epoch": 0.2, "learning_rate": 0.0003337082752967214, "loss": 0.0254, "step": 35800 }, { "epoch": 0.2, "learning_rate": 0.0003333138442045221, "loss": 0.0287, "step": 36000 }, { "epoch": 0.21, "learning_rate": 0.0003329175261509006, "loss": 0.0203, "step": 36200 }, { "epoch": 0.21, "learning_rate": 0.00033251932618078315, "loss": 0.0234, "step": 36400 }, { "epoch": 0.21, "learning_rate": 0.00033211924936305204, "loss": 0.0162, "step": 36600 }, { "epoch": 0.21, "learning_rate": 0.0003317193151808796, "loss": 0.0215, "step": 36800 }, { "epoch": 0.21, "learning_rate": 0.00033131550929049215, "loss": 0.0274, "step": 37000 }, { "epoch": 0.21, "learning_rate": 0.0003309098418764647, "loss": 0.0337, "step": 37200 }, { "epoch": 0.21, "learning_rate": 0.0003305023181027363, "loss": 0.0214, "step": 37400 }, { "epoch": 0.21, "learning_rate": 0.0003300929431568763, "loss": 0.0192, "step": 37600 }, { "epoch": 0.21, "learning_rate": 0.0003296817222500186, "loss": 0.0373, "step": 37800 }, { "epoch": 0.22, "learning_rate": 0.0003292686606167952, "loss": 0.0341, "step": 38000 }, { "epoch": 0.22, "learning_rate": 0.00032885376351526955, "loss": 0.0267, "step": 38200 }, { "epoch": 0.22, "learning_rate": 0.00032843703622686987, "loss": 0.0257, "step": 38400 }, { "epoch": 0.22, "learning_rate": 0.00032801848405632146, "loss": 0.0352, "step": 38600 }, { "epoch": 0.22, "learning_rate": 0.00032759811233157966, "loss": 0.0291, "step": 38800 }, { "epoch": 0.22, "learning_rate": 0.0003271780418373506, "loss": 0.0236, "step": 39000 }, { "epoch": 0.22, "learning_rate": 0.0003267540561114055, "loss": 0.02, "step": 39200 }, { "epoch": 0.22, "learning_rate": 0.00032632826692678864, "loss": 0.0162, "step": 39400 }, { "epoch": 0.22, "learning_rate": 0.00032590067970357875, "loss": 0.0248, "step": 39600 }, { "epoch": 0.23, "learning_rate": 0.0003254712998847426, "loss": 0.0229, "step": 39800 }, { "epoch": 0.23, "learning_rate": 0.00032504013293606604, "loss": 0.0239, "step": 40000 }, { "epoch": 0.23, "learning_rate": 0.0003246071843460839, "loss": 0.0208, "step": 40200 }, { "epoch": 0.23, "learning_rate": 0.0003241724596260105, "loss": 0.0248, "step": 40400 }, { "epoch": 0.23, "learning_rate": 0.00032373596430966946, "loss": 0.0132, "step": 40600 }, { "epoch": 0.23, "learning_rate": 0.000323297703953423, "loss": 0.0176, "step": 40800 }, { "epoch": 0.23, "learning_rate": 0.0003228598886025915, "loss": 0.0327, "step": 41000 }, { "epoch": 0.23, "learning_rate": 0.0003224203368588642, "loss": 0.045, "step": 41200 }, { "epoch": 0.24, "learning_rate": 0.0003219768323998541, "loss": 0.0251, "step": 41400 }, { "epoch": 0.24, "learning_rate": 0.0003215315852937889, "loss": 0.0296, "step": 41600 }, { "epoch": 0.24, "learning_rate": 0.0003210846012084369, "loss": 0.0327, "step": 41800 }, { "epoch": 0.24, "learning_rate": 0.0003206358858336769, "loss": 0.0261, "step": 42000 }, { "epoch": 0.24, "learning_rate": 0.0003201854448814265, "loss": 0.0321, "step": 42200 }, { "epoch": 0.24, "learning_rate": 0.00031973328408556876, "loss": 0.0228, "step": 42400 }, { "epoch": 0.24, "learning_rate": 0.0003192794092018796, "loss": 0.0234, "step": 42600 }, { "epoch": 0.24, "learning_rate": 0.0003188238260079543, "loss": 0.0227, "step": 42800 }, { "epoch": 0.24, "learning_rate": 0.00031836654030313415, "loss": 0.0187, "step": 43000 }, { "epoch": 0.25, "learning_rate": 0.00031790985703126633, "loss": 0.0381, "step": 43200 }, { "epoch": 0.25, "learning_rate": 0.0003174491922289618, "loss": 0.0221, "step": 43400 }, { "epoch": 0.25, "learning_rate": 0.000316986842414148, "loss": 0.016, "step": 43600 }, { "epoch": 0.25, "learning_rate": 0.0003165228134723018, "loss": 0.0217, "step": 43800 }, { "epoch": 0.25, "learning_rate": 0.0003160571113102746, "loss": 0.0356, "step": 44000 }, { "epoch": 0.25, "learning_rate": 0.00031558974185621694, "loss": 0.0281, "step": 44200 }, { "epoch": 0.25, "learning_rate": 0.000315120711059503, "loss": 0.0231, "step": 44400 }, { "epoch": 0.25, "learning_rate": 0.0003146500248906554, "loss": 0.0322, "step": 44600 }, { "epoch": 0.25, "learning_rate": 0.0003141776893412682, "loss": 0.0247, "step": 44800 }, { "epoch": 0.26, "learning_rate": 0.00031370371042393195, "loss": 0.0271, "step": 45000 }, { "epoch": 0.26, "learning_rate": 0.00031322809417215584, "loss": 0.0184, "step": 45200 }, { "epoch": 0.26, "learning_rate": 0.0003127508466402918, "loss": 0.0232, "step": 45400 }, { "epoch": 0.26, "learning_rate": 0.0003122719739034571, "loss": 0.0475, "step": 45600 }, { "epoch": 0.26, "learning_rate": 0.0003117914820574569, "loss": 0.0303, "step": 45800 }, { "epoch": 0.26, "learning_rate": 0.0003113093772187068, "loss": 0.0394, "step": 46000 }, { "epoch": 0.26, "learning_rate": 0.00031082566552415524, "loss": 0.0267, "step": 46200 }, { "epoch": 0.26, "learning_rate": 0.0003103403531312047, "loss": 0.0316, "step": 46400 }, { "epoch": 0.26, "learning_rate": 0.00030985344621763415, "loss": 0.0216, "step": 46600 }, { "epoch": 0.27, "learning_rate": 0.0003093649509815197, "loss": 0.0199, "step": 46800 }, { "epoch": 0.27, "learning_rate": 0.00030887487364115605, "loss": 0.0231, "step": 47000 }, { "epoch": 0.27, "learning_rate": 0.00030838322043497736, "loss": 0.0202, "step": 47200 }, { "epoch": 0.27, "learning_rate": 0.00030789246762956597, "loss": 0.0283, "step": 47400 }, { "epoch": 0.27, "learning_rate": 0.00030739768928821336, "loss": 0.0394, "step": 47600 }, { "epoch": 0.27, "learning_rate": 0.0003069013538848477, "loss": 0.0264, "step": 47800 }, { "epoch": 0.27, "learning_rate": 0.00030640346773756506, "loss": 0.0411, "step": 48000 }, { "epoch": 0.27, "learning_rate": 0.00030590403718420164, "loss": 0.036, "step": 48200 }, { "epoch": 0.27, "learning_rate": 0.00030540306858225326, "loss": 0.0421, "step": 48400 }, { "epoch": 0.28, "learning_rate": 0.0003049005683087941, "loss": 0.0398, "step": 48600 }, { "epoch": 0.28, "learning_rate": 0.000304396542760396, "loss": 0.0173, "step": 48800 }, { "epoch": 0.28, "learning_rate": 0.0003038909983530467, "loss": 0.0317, "step": 49000 }, { "epoch": 0.28, "learning_rate": 0.0003033839415220679, "loss": 0.048, "step": 49200 }, { "epoch": 0.28, "learning_rate": 0.00030287537872203423, "loss": 0.0345, "step": 49400 }, { "epoch": 0.28, "learning_rate": 0.0003023678704574289, "loss": 0.0316, "step": 49600 }, { "epoch": 0.28, "learning_rate": 0.00030185632260843674, "loss": 0.0247, "step": 49800 }, { "epoch": 0.28, "learning_rate": 0.0003013432882361978, "loss": 0.0165, "step": 50000 }, { "epoch": 0.29, "learning_rate": 0.0003008287738713774, "loss": 0.0267, "step": 50200 }, { "epoch": 0.29, "learning_rate": 0.0003003127860634806, "loss": 0.0236, "step": 50400 }, { "epoch": 0.29, "learning_rate": 0.00029979792229214487, "loss": 0.0526, "step": 50600 }, { "epoch": 0.29, "learning_rate": 0.00029927901460657474, "loss": 0.0176, "step": 50800 }, { "epoch": 0.29, "learning_rate": 0.0002987612586177442, "loss": 0.0237, "step": 51000 }, { "epoch": 0.29, "learning_rate": 0.000298239457344193, "loss": 0.0169, "step": 51200 }, { "epoch": 0.29, "learning_rate": 0.00029771621558824144, "loss": 0.0421, "step": 51400 }, { "epoch": 0.29, "learning_rate": 0.00029719154001048997, "loss": 0.0271, "step": 51600 }, { "epoch": 0.29, "learning_rate": 0.0002966654372897905, "loss": 0.0225, "step": 51800 }, { "epoch": 0.3, "learning_rate": 0.00029613791412316185, "loss": 0.0283, "step": 52000 }, { "epoch": 0.3, "learning_rate": 0.00029560897722570427, "loss": 0.017, "step": 52200 }, { "epoch": 0.3, "learning_rate": 0.00029507863333051433, "loss": 0.0216, "step": 52400 }, { "epoch": 0.3, "learning_rate": 0.00029454688918859875, "loss": 0.0212, "step": 52600 }, { "epoch": 0.3, "learning_rate": 0.00029401375156878874, "loss": 0.0356, "step": 52800 }, { "epoch": 0.3, "learning_rate": 0.00029347922725765375, "loss": 0.0223, "step": 53000 }, { "epoch": 0.3, "learning_rate": 0.0002929433230594152, "loss": 0.0231, "step": 53200 }, { "epoch": 0.3, "learning_rate": 0.0002924060457958596, "loss": 0.0385, "step": 53400 }, { "epoch": 0.3, "learning_rate": 0.000291867402306252, "loss": 0.0194, "step": 53600 }, { "epoch": 0.31, "learning_rate": 0.00029132739944724874, "loss": 0.021, "step": 53800 }, { "epoch": 0.31, "learning_rate": 0.0002907860440928105, "loss": 0.0384, "step": 54000 }, { "epoch": 0.31, "learning_rate": 0.00029024334313411393, "loss": 0.0326, "step": 54200 }, { "epoch": 0.31, "learning_rate": 0.00028969930347946533, "loss": 0.0309, "step": 54400 }, { "epoch": 0.31, "learning_rate": 0.00028915393205421116, "loss": 0.0228, "step": 54600 }, { "epoch": 0.31, "learning_rate": 0.00028860723580065116, "loss": 0.0164, "step": 54800 }, { "epoch": 0.31, "learning_rate": 0.0002880592216779493, "loss": 0.0292, "step": 55000 }, { "epoch": 0.31, "learning_rate": 0.0002875098966620452, "loss": 0.0184, "step": 55200 }, { "epoch": 0.31, "learning_rate": 0.0002869592677455658, "loss": 0.0171, "step": 55400 }, { "epoch": 0.32, "learning_rate": 0.00028640734193773564, "loss": 0.0272, "step": 55600 }, { "epoch": 0.32, "learning_rate": 0.0002858541262642884, "loss": 0.0202, "step": 55800 }, { "epoch": 0.32, "learning_rate": 0.00028529962776737674, "loss": 0.025, "step": 56000 }, { "epoch": 0.32, "learning_rate": 0.00028474385350548337, "loss": 0.0232, "step": 56200 }, { "epoch": 0.32, "learning_rate": 0.0002841868105533304, "loss": 0.0286, "step": 56400 }, { "epoch": 0.32, "learning_rate": 0.00028362850600179034, "loss": 0.0246, "step": 56600 }, { "epoch": 0.32, "learning_rate": 0.0002830689469577944, "loss": 0.0271, "step": 56800 }, { "epoch": 0.32, "learning_rate": 0.00028250814054424367, "loss": 0.0216, "step": 57000 }, { "epoch": 0.32, "learning_rate": 0.00028194890720638425, "loss": 0.0261, "step": 57200 }, { "epoch": 0.33, "learning_rate": 0.0002813856336334063, "loss": 0.0328, "step": 57400 }, { "epoch": 0.33, "learning_rate": 0.00028082113411859194, "loss": 0.0209, "step": 57600 }, { "epoch": 0.33, "learning_rate": 0.0002802554158477314, "loss": 0.0266, "step": 57800 }, { "epoch": 0.33, "learning_rate": 0.0002796884860221292, "loss": 0.0163, "step": 58000 }, { "epoch": 0.33, "learning_rate": 0.0002791203518585125, "loss": 0.0302, "step": 58200 }, { "epoch": 0.33, "learning_rate": 0.00027855102058893863, "loss": 0.0205, "step": 58400 }, { "epoch": 0.33, "learning_rate": 0.0002779804994607039, "loss": 0.0227, "step": 58600 }, { "epoch": 0.33, "learning_rate": 0.00027740879573625075, "loss": 0.035, "step": 58800 }, { "epoch": 0.34, "learning_rate": 0.0002768359166930753, "loss": 0.0255, "step": 59000 }, { "epoch": 0.34, "learning_rate": 0.00027626186962363523, "loss": 0.0261, "step": 59200 }, { "epoch": 0.34, "learning_rate": 0.0002756866618352563, "loss": 0.0306, "step": 59400 }, { "epoch": 0.34, "learning_rate": 0.0002751103006500397, "loss": 0.0106, "step": 59600 }, { "epoch": 0.34, "learning_rate": 0.00027453279340476877, "loss": 0.0199, "step": 59800 }, { "epoch": 0.34, "learning_rate": 0.000273957043500945, "loss": 0.0366, "step": 60000 }, { "epoch": 0.34, "learning_rate": 0.0002733772718425448, "loss": 0.0181, "step": 60200 }, { "epoch": 0.34, "learning_rate": 0.0002727992834466737, "loss": 0.0211, "step": 60400 }, { "epoch": 0.34, "learning_rate": 0.0002722172767484197, "loss": 0.0309, "step": 60600 }, { "epoch": 0.35, "learning_rate": 0.00027163416081682745, "loss": 0.0205, "step": 60800 }, { "epoch": 0.35, "learning_rate": 0.00027104994307466473, "loss": 0.0422, "step": 61000 }, { "epoch": 0.35, "learning_rate": 0.0002704646309587249, "loss": 0.0197, "step": 61200 }, { "epoch": 0.35, "learning_rate": 0.0002698782319197321, "loss": 0.02, "step": 61400 }, { "epoch": 0.35, "learning_rate": 0.00026929075342224635, "loss": 0.0292, "step": 61600 }, { "epoch": 0.35, "learning_rate": 0.00026870220294456887, "loss": 0.0174, "step": 61800 }, { "epoch": 0.35, "learning_rate": 0.00026811258797864644, "loss": 0.0211, "step": 62000 }, { "epoch": 0.35, "learning_rate": 0.00026752191602997627, "loss": 0.0179, "step": 62200 }, { "epoch": 0.35, "learning_rate": 0.0002669331558226597, "loss": 0.0243, "step": 62400 }, { "epoch": 0.36, "learning_rate": 0.00026634039766960824, "loss": 0.0305, "step": 62600 }, { "epoch": 0.36, "learning_rate": 0.0002657466050928862, "loss": 0.0271, "step": 62800 }, { "epoch": 0.36, "learning_rate": 0.0002651517856511696, "loss": 0.045, "step": 63000 }, { "epoch": 0.36, "learning_rate": 0.0002645559469162059, "loss": 0.0267, "step": 63200 }, { "epoch": 0.36, "learning_rate": 0.0002639590964727178, "loss": 0.03, "step": 63400 }, { "epoch": 0.36, "learning_rate": 0.00026336124191830645, "loss": 0.018, "step": 63600 }, { "epoch": 0.36, "learning_rate": 0.00026276239086335485, "loss": 0.0297, "step": 63800 }, { "epoch": 0.36, "learning_rate": 0.00026216255093093095, "loss": 0.0159, "step": 64000 }, { "epoch": 0.36, "learning_rate": 0.00026156172975669046, "loss": 0.0184, "step": 64200 }, { "epoch": 0.37, "learning_rate": 0.00026095993498878, "loss": 0.0272, "step": 64400 }, { "epoch": 0.37, "learning_rate": 0.0002603571742877395, "loss": 0.0142, "step": 64600 }, { "epoch": 0.37, "learning_rate": 0.00025975345532640456, "loss": 0.0188, "step": 64800 }, { "epoch": 0.37, "learning_rate": 0.00025915181148932056, "loss": 0.0468, "step": 65000 }, { "epoch": 0.37, "learning_rate": 0.00025854620376982297, "loss": 0.0152, "step": 65200 }, { "epoch": 0.37, "learning_rate": 0.00025793966084276023, "loss": 0.0351, "step": 65400 }, { "epoch": 0.37, "learning_rate": 0.00025733219042911403, "loss": 0.0259, "step": 65600 }, { "epoch": 0.37, "learning_rate": 0.0002567238002616722, "loss": 0.0349, "step": 65800 }, { "epoch": 0.37, "learning_rate": 0.00025611449808493066, "loss": 0.0122, "step": 66000 }, { "epoch": 0.38, "learning_rate": 0.0002555042916549949, "loss": 0.0207, "step": 66200 }, { "epoch": 0.38, "learning_rate": 0.00025489318873948087, "loss": 0.0338, "step": 66400 }, { "epoch": 0.38, "learning_rate": 0.00025428119711741644, "loss": 0.0264, "step": 66600 }, { "epoch": 0.38, "learning_rate": 0.00025366832457914223, "loss": 0.0211, "step": 66800 }, { "epoch": 0.38, "learning_rate": 0.0002530545789262125, "loss": 0.026, "step": 67000 }, { "epoch": 0.38, "learning_rate": 0.00025243996797129576, "loss": 0.0384, "step": 67200 }, { "epoch": 0.38, "learning_rate": 0.0002518244995380754, "loss": 0.0242, "step": 67400 }, { "epoch": 0.38, "learning_rate": 0.00025120818146115014, "loss": 0.0185, "step": 67600 }, { "epoch": 0.38, "learning_rate": 0.00025059102158593404, "loss": 0.0151, "step": 67800 }, { "epoch": 0.39, "learning_rate": 0.00024997302776855716, "loss": 0.016, "step": 68000 }, { "epoch": 0.39, "learning_rate": 0.0002493542078757648, "loss": 0.0322, "step": 68200 }, { "epoch": 0.39, "learning_rate": 0.00024873456978481814, "loss": 0.0201, "step": 68400 }, { "epoch": 0.39, "learning_rate": 0.00024811412138339326, "loss": 0.0296, "step": 68600 }, { "epoch": 0.39, "learning_rate": 0.00024749287056948145, "loss": 0.0329, "step": 68800 }, { "epoch": 0.39, "learning_rate": 0.000246870825251288, "loss": 0.0136, "step": 69000 }, { "epoch": 0.39, "learning_rate": 0.00024624799334713204, "loss": 0.0267, "step": 69200 }, { "epoch": 0.39, "learning_rate": 0.00024562438278534536, "loss": 0.02, "step": 69400 }, { "epoch": 0.4, "learning_rate": 0.00024500000150417183, "loss": 0.027, "step": 69600 }, { "epoch": 0.4, "learning_rate": 0.00024437485745166604, "loss": 0.0287, "step": 69800 }, { "epoch": 0.4, "learning_rate": 0.0002437489585855924, "loss": 0.0213, "step": 70000 }, { "epoch": 0.4, "learning_rate": 0.0002431223128733236, "loss": 0.027, "step": 70200 }, { "epoch": 0.4, "learning_rate": 0.00024249492829173943, "loss": 0.0204, "step": 70400 }, { "epoch": 0.4, "learning_rate": 0.00024186681282712484, "loss": 0.0226, "step": 70600 }, { "epoch": 0.4, "learning_rate": 0.00024123797447506894, "loss": 0.0235, "step": 70800 }, { "epoch": 0.4, "learning_rate": 0.00024060842124036243, "loss": 0.022, "step": 71000 }, { "epoch": 0.4, "learning_rate": 0.0002399781611368965, "loss": 0.015, "step": 71200 }, { "epoch": 0.41, "learning_rate": 0.0002393503587074034, "loss": 0.02, "step": 71400 }, { "epoch": 0.41, "learning_rate": 0.0002387187123780583, "loss": 0.0172, "step": 71600 }, { "epoch": 0.41, "learning_rate": 0.00023808638323498182, "loss": 0.0184, "step": 71800 }, { "epoch": 0.41, "learning_rate": 0.0002374533793274009, "loss": 0.022, "step": 72000 }, { "epoch": 0.41, "learning_rate": 0.00023681970871313178, "loss": 0.0163, "step": 72200 }, { "epoch": 0.41, "learning_rate": 0.00023618537945847764, "loss": 0.042, "step": 72400 }, { "epoch": 0.41, "learning_rate": 0.0002355503996381257, "loss": 0.0249, "step": 72600 }, { "epoch": 0.41, "learning_rate": 0.00023491477733504463, "loss": 0.0392, "step": 72800 }, { "epoch": 0.41, "learning_rate": 0.00023427852064038156, "loss": 0.0206, "step": 73000 }, { "epoch": 0.42, "learning_rate": 0.00023364482361278884, "loss": 0.0196, "step": 73200 }, { "epoch": 0.42, "learning_rate": 0.00023300732551134807, "loss": 0.0273, "step": 73400 }, { "epoch": 0.42, "learning_rate": 0.00023236921729921222, "loss": 0.0197, "step": 73600 }, { "epoch": 0.42, "learning_rate": 0.00023173370213417187, "loss": 0.0247, "step": 73800 }, { "epoch": 0.42, "learning_rate": 0.00023109440102573372, "loss": 0.0279, "step": 74000 }, { "epoch": 0.42, "learning_rate": 0.00023045451415715175, "loss": 0.0236, "step": 74200 }, { "epoch": 0.42, "learning_rate": 0.00022981404967385886, "loss": 0.0183, "step": 74400 }, { "epoch": 0.42, "learning_rate": 0.00022917301572864066, "loss": 0.0285, "step": 74600 }, { "epoch": 0.42, "learning_rate": 0.00022853142048153175, "loss": 0.0298, "step": 74800 }, { "epoch": 0.43, "learning_rate": 0.00022788927209971169, "loss": 0.0202, "step": 75000 }, { "epoch": 0.43, "learning_rate": 0.00022724657875740128, "loss": 0.0198, "step": 75200 }, { "epoch": 0.43, "learning_rate": 0.00022660334863575842, "loss": 0.0297, "step": 75400 }, { "epoch": 0.43, "learning_rate": 0.00022595958992277377, "loss": 0.0167, "step": 75600 }, { "epoch": 0.43, "learning_rate": 0.00022531531081316684, "loss": 0.0184, "step": 75800 }, { "epoch": 0.43, "learning_rate": 0.00022467051950828147, "loss": 0.0282, "step": 76000 }, { "epoch": 0.43, "learning_rate": 0.00022402845193252637, "loss": 0.0345, "step": 76200 }, { "epoch": 0.43, "learning_rate": 0.00022338266332551338, "loss": 0.0173, "step": 76400 }, { "epoch": 0.43, "learning_rate": 0.0002227363871248368, "loss": 0.0243, "step": 76600 }, { "epoch": 0.44, "learning_rate": 0.0002220896315572626, "loss": 0.0166, "step": 76800 }, { "epoch": 0.44, "learning_rate": 0.0002214424048556585, "loss": 0.0257, "step": 77000 }, { "epoch": 0.44, "learning_rate": 0.00022079471525888992, "loss": 0.0477, "step": 77200 }, { "epoch": 0.44, "learning_rate": 0.0002201465710117142, "loss": 0.0205, "step": 77400 }, { "epoch": 0.44, "learning_rate": 0.00021949798036467665, "loss": 0.0275, "step": 77600 }, { "epoch": 0.44, "learning_rate": 0.00021884895157400457, "loss": 0.0196, "step": 77800 }, { "epoch": 0.44, "learning_rate": 0.000218199492901503, "loss": 0.0342, "step": 78000 }, { "epoch": 0.44, "learning_rate": 0.00021754961261444885, "loss": 0.0219, "step": 78200 }, { "epoch": 0.45, "learning_rate": 0.00021689931898548614, "loss": 0.017, "step": 78400 }, { "epoch": 0.45, "learning_rate": 0.00021624862029252044, "loss": 0.0263, "step": 78600 }, { "epoch": 0.45, "learning_rate": 0.0002155975248186137, "loss": 0.025, "step": 78800 }, { "epoch": 0.45, "learning_rate": 0.00021494604085187845, "loss": 0.0171, "step": 79000 }, { "epoch": 0.45, "learning_rate": 0.0002142941766853728, "loss": 0.0205, "step": 79200 }, { "epoch": 0.45, "learning_rate": 0.00021364194061699446, "loss": 0.0162, "step": 79400 }, { "epoch": 0.45, "learning_rate": 0.00021298934094937536, "loss": 0.0238, "step": 79600 }, { "epoch": 0.45, "learning_rate": 0.0002123363859897756, "loss": 0.0239, "step": 79800 }, { "epoch": 0.45, "learning_rate": 0.00021168308404997838, "loss": 0.0139, "step": 80000 }, { "epoch": 0.46, "learning_rate": 0.00021102944344618345, "loss": 0.0305, "step": 80200 }, { "epoch": 0.46, "learning_rate": 0.0002103754724989017, "loss": 0.0228, "step": 80400 }, { "epoch": 0.46, "learning_rate": 0.00020972117953284915, "loss": 0.0379, "step": 80600 }, { "epoch": 0.46, "learning_rate": 0.0002090665728768409, "loss": 0.0176, "step": 80800 }, { "epoch": 0.46, "learning_rate": 0.0002084116608636852, "loss": 0.0186, "step": 81000 }, { "epoch": 0.46, "learning_rate": 0.00020775645183007728, "loss": 0.019, "step": 81200 }, { "epoch": 0.46, "learning_rate": 0.00020710095411649338, "loss": 0.0251, "step": 81400 }, { "epoch": 0.46, "learning_rate": 0.0002064451760670844, "loss": 0.0229, "step": 81600 }, { "epoch": 0.46, "learning_rate": 0.00020578912602956987, "loss": 0.0303, "step": 81800 }, { "epoch": 0.47, "learning_rate": 0.0002051360945654819, "loss": 0.017, "step": 82000 }, { "epoch": 0.47, "learning_rate": 0.00020447952686428194, "loss": 0.0196, "step": 82200 }, { "epoch": 0.47, "learning_rate": 0.00020382271219668632, "loss": 0.0212, "step": 82400 }, { "epoch": 0.47, "learning_rate": 0.00020316565892361012, "loss": 0.0194, "step": 82600 }, { "epoch": 0.47, "learning_rate": 0.00020250837540900578, "loss": 0.0299, "step": 82800 }, { "epoch": 0.47, "learning_rate": 0.00020185087001975654, "loss": 0.0184, "step": 83000 }, { "epoch": 0.47, "learning_rate": 0.00020119315112557005, "loss": 0.033, "step": 83200 }, { "epoch": 0.47, "learning_rate": 0.00020053522709887175, "loss": 0.0207, "step": 83400 }, { "epoch": 0.47, "learning_rate": 0.00019987710631469828, "loss": 0.0141, "step": 83600 }, { "epoch": 0.48, "learning_rate": 0.00019921879715059093, "loss": 0.0225, "step": 83800 }, { "epoch": 0.48, "learning_rate": 0.000198560307986489, "loss": 0.0263, "step": 84000 }, { "epoch": 0.48, "learning_rate": 0.00019790164720462304, "loss": 0.0284, "step": 84200 }, { "epoch": 0.48, "learning_rate": 0.00019724282318940825, "loss": 0.016, "step": 84400 }, { "epoch": 0.48, "learning_rate": 0.00019658384432733769, "loss": 0.0394, "step": 84600 }, { "epoch": 0.48, "learning_rate": 0.0001959247190068755, "loss": 0.0208, "step": 84800 }, { "epoch": 0.48, "learning_rate": 0.00019526545561835023, "loss": 0.0199, "step": 85000 }, { "epoch": 0.48, "learning_rate": 0.00019460606255384803, "loss": 0.0188, "step": 85200 }, { "epoch": 0.48, "learning_rate": 0.00019394654820710546, "loss": 0.0246, "step": 85400 }, { "epoch": 0.49, "learning_rate": 0.0001932869209734034, "loss": 0.0386, "step": 85600 }, { "epoch": 0.49, "learning_rate": 0.00019262718924945921, "loss": 0.0336, "step": 85800 }, { "epoch": 0.49, "learning_rate": 0.00019197066079753742, "loss": 0.0226, "step": 86000 }, { "epoch": 0.49, "learning_rate": 0.0001913107457060452, "loss": 0.014, "step": 86200 }, { "epoch": 0.49, "learning_rate": 0.00019065075128001235, "loss": 0.0251, "step": 86400 }, { "epoch": 0.49, "learning_rate": 0.00018999068592083065, "loss": 0.0188, "step": 86600 }, { "epoch": 0.49, "learning_rate": 0.00018933055803079484, "loss": 0.0139, "step": 86800 }, { "epoch": 0.49, "learning_rate": 0.0001886703760129956, "loss": 0.0213, "step": 87000 }, { "epoch": 0.5, "learning_rate": 0.0001880101482712127, "loss": 0.0322, "step": 87200 }, { "epoch": 0.5, "learning_rate": 0.00018734988320980793, "loss": 0.0223, "step": 87400 }, { "epoch": 0.5, "learning_rate": 0.00018668958923361806, "loss": 0.0261, "step": 87600 }, { "epoch": 0.5, "learning_rate": 0.00018602927474784813, "loss": 0.019, "step": 87800 }, { "epoch": 0.5, "learning_rate": 0.00018536894815796403, "loss": 0.0222, "step": 88000 }, { "epoch": 0.5, "learning_rate": 0.0001847119195163232, "loss": 0.0147, "step": 88200 }, { "epoch": 0.5, "learning_rate": 0.0001840515938906732, "loss": 0.0206, "step": 88400 }, { "epoch": 0.5, "learning_rate": 0.0001833912813357758, "loss": 0.0189, "step": 88600 }, { "epoch": 0.5, "learning_rate": 0.0001827309902570724, "loss": 0.0224, "step": 88800 }, { "epoch": 0.51, "learning_rate": 0.00018207072905973099, "loss": 0.0197, "step": 89000 }, { "epoch": 0.51, "learning_rate": 0.00018141050614853935, "loss": 0.0323, "step": 89200 }, { "epoch": 0.51, "learning_rate": 0.00018075032992779762, "loss": 0.0138, "step": 89400 }, { "epoch": 0.51, "learning_rate": 0.0001800902088012118, "loss": 0.0197, "step": 89600 }, { "epoch": 0.51, "learning_rate": 0.0001794334512880854, "loss": 0.0307, "step": 89800 }, { "epoch": 0.51, "learning_rate": 0.00017877346517762124, "loss": 0.026, "step": 90000 }, { "epoch": 0.51, "learning_rate": 0.00017811355932579115, "loss": 0.0236, "step": 90200 }, { "epoch": 0.51, "learning_rate": 0.00017745374213285934, "loss": 0.0359, "step": 90400 }, { "epoch": 0.51, "learning_rate": 0.0001767940219979617, "loss": 0.0245, "step": 90600 }, { "epoch": 0.52, "learning_rate": 0.00017613440731899813, "loss": 0.0333, "step": 90800 }, { "epoch": 0.52, "learning_rate": 0.00017547490649252667, "loss": 0.0131, "step": 91000 }, { "epoch": 0.52, "learning_rate": 0.00017481552791365573, "loss": 0.0221, "step": 91200 }, { "epoch": 0.52, "learning_rate": 0.00017415627997593782, "loss": 0.0209, "step": 91400 }, { "epoch": 0.52, "learning_rate": 0.0001734971710712621, "loss": 0.0273, "step": 91600 }, { "epoch": 0.52, "learning_rate": 0.0001728382095897483, "loss": 0.0262, "step": 91800 }, { "epoch": 0.52, "learning_rate": 0.00017217940391963928, "loss": 0.0366, "step": 92000 }, { "epoch": 0.52, "learning_rate": 0.00017152076244719467, "loss": 0.0265, "step": 92200 }, { "epoch": 0.52, "learning_rate": 0.00017086229355658372, "loss": 0.023, "step": 92400 }, { "epoch": 0.53, "learning_rate": 0.00017020400562977906, "loss": 0.0304, "step": 92600 }, { "epoch": 0.53, "learning_rate": 0.00016954590704644948, "loss": 0.017, "step": 92800 }, { "epoch": 0.53, "learning_rate": 0.00016888800618385382, "loss": 0.0414, "step": 93000 }, { "epoch": 0.53, "learning_rate": 0.00016823031141673374, "loss": 0.0462, "step": 93200 }, { "epoch": 0.53, "learning_rate": 0.00016757611797137148, "loss": 0.0284, "step": 93400 }, { "epoch": 0.53, "learning_rate": 0.00016691885937382842, "loss": 0.0256, "step": 93600 }, { "epoch": 0.53, "learning_rate": 0.0001662618319379937, "loss": 0.0148, "step": 93800 }, { "epoch": 0.53, "learning_rate": 0.00016560504402749084, "loss": 0.0144, "step": 94000 }, { "epoch": 0.53, "learning_rate": 0.00016494850400289434, "loss": 0.0202, "step": 94200 }, { "epoch": 0.54, "learning_rate": 0.00016429222022162316, "loss": 0.0264, "step": 94400 }, { "epoch": 0.54, "learning_rate": 0.00016363620103783448, "loss": 0.0175, "step": 94600 }, { "epoch": 0.54, "learning_rate": 0.00016298045480231735, "loss": 0.0187, "step": 94800 }, { "epoch": 0.54, "learning_rate": 0.0001623249898623863, "loss": 0.0153, "step": 95000 }, { "epoch": 0.54, "learning_rate": 0.00016166981456177496, "loss": 0.0145, "step": 95200 }, { "epoch": 0.54, "learning_rate": 0.00016101493724053015, "loss": 0.0196, "step": 95400 }, { "epoch": 0.54, "learning_rate": 0.00016036036623490562, "loss": 0.0138, "step": 95600 }, { "epoch": 0.54, "learning_rate": 0.00015970610987725575, "loss": 0.0189, "step": 95800 }, { "epoch": 0.55, "learning_rate": 0.00015905217649592963, "loss": 0.0367, "step": 96000 }, { "epoch": 0.55, "learning_rate": 0.00015839857441516498, "loss": 0.0134, "step": 96200 }, { "epoch": 0.55, "learning_rate": 0.00015774857740871961, "loss": 0.0236, "step": 96400 }, { "epoch": 0.55, "learning_rate": 0.00015709566112445702, "loss": 0.0288, "step": 96600 }, { "epoch": 0.55, "learning_rate": 0.0001564431010461969, "loss": 0.0204, "step": 96800 }, { "epoch": 0.55, "learning_rate": 0.00015579090548069552, "loss": 0.0108, "step": 97000 }, { "epoch": 0.55, "learning_rate": 0.0001551390827300693, "loss": 0.0264, "step": 97200 }, { "epoch": 0.55, "learning_rate": 0.0001544876410916887, "loss": 0.0196, "step": 97400 }, { "epoch": 0.55, "learning_rate": 0.0001538365888580729, "loss": 0.0221, "step": 97600 }, { "epoch": 0.56, "learning_rate": 0.00015318593431678411, "loss": 0.0236, "step": 97800 }, { "epoch": 0.56, "learning_rate": 0.0001525356857503223, "loss": 0.0229, "step": 98000 }, { "epoch": 0.56, "learning_rate": 0.0001518858514360193, "loss": 0.0231, "step": 98200 }, { "epoch": 0.56, "learning_rate": 0.00015123643964593393, "loss": 0.0162, "step": 98400 }, { "epoch": 0.56, "learning_rate": 0.00015058745864674644, "loss": 0.0411, "step": 98600 }, { "epoch": 0.56, "learning_rate": 0.00014993891669965337, "loss": 0.0197, "step": 98800 }, { "epoch": 0.56, "learning_rate": 0.00014929082206026223, "loss": 0.0285, "step": 99000 }, { "epoch": 0.56, "learning_rate": 0.0001486431829784866, "loss": 0.0316, "step": 99200 }, { "epoch": 0.56, "learning_rate": 0.00014799600769844118, "loss": 0.0223, "step": 99400 }, { "epoch": 0.57, "learning_rate": 0.0001473493044583364, "loss": 0.0255, "step": 99600 }, { "epoch": 0.57, "learning_rate": 0.00014670308149037416, "loss": 0.0137, "step": 99800 }, { "epoch": 0.57, "learning_rate": 0.0001460573470206426, "loss": 0.0174, "step": 100000 }, { "epoch": 0.57, "learning_rate": 0.00014541533420859412, "loss": 0.0322, "step": 100200 }, { "epoch": 0.57, "learning_rate": 0.0001447705988435316, "loss": 0.0253, "step": 100400 }, { "epoch": 0.57, "learning_rate": 0.00014412637657621645, "loss": 0.0251, "step": 100600 }, { "epoch": 0.57, "learning_rate": 0.0001434826756072689, "loss": 0.0185, "step": 100800 }, { "epoch": 0.57, "learning_rate": 0.00014283950413067326, "loss": 0.0184, "step": 101000 }, { "epoch": 0.57, "learning_rate": 0.00014219687033367387, "loss": 0.0137, "step": 101200 }, { "epoch": 0.58, "learning_rate": 0.00014155478239667057, "loss": 0.0191, "step": 101400 }, { "epoch": 0.58, "learning_rate": 0.00014091324849311473, "loss": 0.0234, "step": 101600 }, { "epoch": 0.58, "learning_rate": 0.000140272276789405, "loss": 0.0199, "step": 101800 }, { "epoch": 0.58, "learning_rate": 0.00013963187544478376, "loss": 0.0141, "step": 102000 }, { "epoch": 0.58, "learning_rate": 0.00013899205261123283, "loss": 0.0207, "step": 102200 }, { "epoch": 0.58, "learning_rate": 0.00013835281643337, "loss": 0.0242, "step": 102400 }, { "epoch": 0.58, "learning_rate": 0.00013771417504834503, "loss": 0.031, "step": 102600 }, { "epoch": 0.58, "learning_rate": 0.00013707613658573656, "loss": 0.0239, "step": 102800 }, { "epoch": 0.58, "learning_rate": 0.00013643870916744814, "loss": 0.0238, "step": 103000 }, { "epoch": 0.59, "learning_rate": 0.00013580190090760512, "loss": 0.0349, "step": 103200 }, { "epoch": 0.59, "learning_rate": 0.0001351657199124511, "loss": 0.0177, "step": 103400 }, { "epoch": 0.59, "learning_rate": 0.0001345301742802452, "loss": 0.0139, "step": 103600 }, { "epoch": 0.59, "learning_rate": 0.0001338952721011585, "loss": 0.018, "step": 103800 }, { "epoch": 0.59, "learning_rate": 0.00013326102145717149, "loss": 0.0179, "step": 104000 }, { "epoch": 0.59, "learning_rate": 0.00013262743042197046, "loss": 0.0218, "step": 104200 }, { "epoch": 0.59, "learning_rate": 0.00013199450706084573, "loss": 0.0117, "step": 104400 }, { "epoch": 0.59, "learning_rate": 0.00013136225943058828, "loss": 0.0231, "step": 104600 }, { "epoch": 0.6, "learning_rate": 0.00013073069557938726, "loss": 0.031, "step": 104800 }, { "epoch": 0.6, "learning_rate": 0.0001300998235467278, "loss": 0.0206, "step": 105000 }, { "epoch": 0.6, "learning_rate": 0.0001294728004700576, "loss": 0.0213, "step": 105200 }, { "epoch": 0.6, "learning_rate": 0.0001288433325983142, "loss": 0.0407, "step": 105400 }, { "epoch": 0.6, "learning_rate": 0.00012821458057027873, "loss": 0.0225, "step": 105600 }, { "epoch": 0.6, "learning_rate": 0.00012758969071674774, "loss": 0.0235, "step": 105800 }, { "epoch": 0.6, "learning_rate": 0.00012696239069890963, "loss": 0.0212, "step": 106000 }, { "epoch": 0.6, "learning_rate": 0.00012633583046820873, "loss": 0.0328, "step": 106200 }, { "epoch": 0.6, "learning_rate": 0.00012571001800043652, "loss": 0.0278, "step": 106400 }, { "epoch": 0.61, "learning_rate": 0.000125084961261866, "loss": 0.0158, "step": 106600 }, { "epoch": 0.61, "learning_rate": 0.00012446066820914994, "loss": 0.0146, "step": 106800 }, { "epoch": 0.61, "learning_rate": 0.00012383714678922, "loss": 0.0284, "step": 107000 }, { "epoch": 0.61, "learning_rate": 0.00012321440493918523, "loss": 0.0149, "step": 107200 }, { "epoch": 0.61, "learning_rate": 0.00012259245058623115, "loss": 0.0138, "step": 107400 }, { "epoch": 0.61, "learning_rate": 0.00012197129164751876, "loss": 0.02, "step": 107600 }, { "epoch": 0.61, "learning_rate": 0.00012135093603008409, "loss": 0.0187, "step": 107800 }, { "epoch": 0.61, "learning_rate": 0.00012073139163073704, "loss": 0.0233, "step": 108000 }, { "epoch": 0.61, "learning_rate": 0.00012011266633596143, "loss": 0.0244, "step": 108200 }, { "epoch": 0.62, "learning_rate": 0.00011949476802181382, "loss": 0.0282, "step": 108400 }, { "epoch": 0.62, "learning_rate": 0.0001188777045538242, "loss": 0.0217, "step": 108600 }, { "epoch": 0.62, "learning_rate": 0.00011826456278153069, "loss": 0.0214, "step": 108800 }, { "epoch": 0.62, "learning_rate": 0.000117649188287617, "loss": 0.0245, "step": 109000 }, { "epoch": 0.62, "learning_rate": 0.00011703467213314812, "loss": 0.018, "step": 109200 }, { "epoch": 0.62, "learning_rate": 0.00011642102214060081, "loss": 0.0136, "step": 109400 }, { "epoch": 0.62, "learning_rate": 0.00011580824612142588, "loss": 0.0274, "step": 109600 }, { "epoch": 0.62, "learning_rate": 0.00011519635187594886, "loss": 0.0316, "step": 109800 }, { "epoch": 0.62, "learning_rate": 0.00011458534719327081, "loss": 0.0195, "step": 110000 }, { "epoch": 0.63, "learning_rate": 0.00011397523985116925, "loss": 0.0194, "step": 110200 }, { "epoch": 0.63, "learning_rate": 0.0001133660376159988, "loss": 0.0417, "step": 110400 }, { "epoch": 0.63, "learning_rate": 0.00011275774824259256, "loss": 0.0226, "step": 110600 }, { "epoch": 0.63, "learning_rate": 0.00011215037947416353, "loss": 0.0247, "step": 110800 }, { "epoch": 0.63, "learning_rate": 0.00011154393904220578, "loss": 0.0123, "step": 111000 }, { "epoch": 0.63, "learning_rate": 0.00011093843466639602, "loss": 0.0215, "step": 111200 }, { "epoch": 0.63, "learning_rate": 0.00011033387405449557, "loss": 0.0169, "step": 111400 }, { "epoch": 0.63, "learning_rate": 0.00010973026490225217, "loss": 0.0335, "step": 111600 }, { "epoch": 0.63, "learning_rate": 0.00010912761489330187, "loss": 0.021, "step": 111800 }, { "epoch": 0.64, "learning_rate": 0.00010852593169907127, "loss": 0.0212, "step": 112000 }, { "epoch": 0.64, "learning_rate": 0.00010792522297867997, "loss": 0.0251, "step": 112200 }, { "epoch": 0.64, "learning_rate": 0.00010732549637884315, "loss": 0.0195, "step": 112400 }, { "epoch": 0.64, "learning_rate": 0.00010672975074337141, "loss": 0.0154, "step": 112600 }, { "epoch": 0.64, "learning_rate": 0.00010613200626886399, "loss": 0.0161, "step": 112800 }, { "epoch": 0.64, "learning_rate": 0.00010553526674164345, "loss": 0.022, "step": 113000 }, { "epoch": 0.64, "learning_rate": 0.00010493953975789901, "loss": 0.0183, "step": 113200 }, { "epoch": 0.64, "learning_rate": 0.00010434483290093065, "loss": 0.0301, "step": 113400 }, { "epoch": 0.65, "learning_rate": 0.00010375115374105277, "loss": 0.0272, "step": 113600 }, { "epoch": 0.65, "learning_rate": 0.00010315850983549783, "loss": 0.0169, "step": 113800 }, { "epoch": 0.65, "learning_rate": 0.00010256690872831991, "loss": 0.0258, "step": 114000 }, { "epoch": 0.65, "learning_rate": 0.00010197635795029873, "loss": 0.0339, "step": 114200 }, { "epoch": 0.65, "learning_rate": 0.00010138686501884381, "loss": 0.0213, "step": 114400 }, { "epoch": 0.65, "learning_rate": 0.00010079843743789918, "loss": 0.0287, "step": 114600 }, { "epoch": 0.65, "learning_rate": 0.00010021401679048213, "loss": 0.0281, "step": 114800 }, { "epoch": 0.65, "learning_rate": 9.962773694788469e-05, "loss": 0.029, "step": 115000 }, { "epoch": 0.65, "learning_rate": 9.904254484859887e-05, "loss": 0.028, "step": 115200 }, { "epoch": 0.66, "learning_rate": 9.845844794182107e-05, "loss": 0.0131, "step": 115400 }, { "epoch": 0.66, "learning_rate": 9.787545366280647e-05, "loss": 0.0328, "step": 115600 }, { "epoch": 0.66, "learning_rate": 9.729356943277424e-05, "loss": 0.0271, "step": 115800 }, { "epoch": 0.66, "learning_rate": 9.67128026588135e-05, "loss": 0.0318, "step": 116000 }, { "epoch": 0.66, "learning_rate": 9.613316073378832e-05, "loss": 0.0234, "step": 116200 }, { "epoch": 0.66, "learning_rate": 9.555465103624428e-05, "loss": 0.025, "step": 116400 }, { "epoch": 0.66, "learning_rate": 9.497728093031412e-05, "loss": 0.0344, "step": 116600 }, { "epoch": 0.66, "learning_rate": 9.440105776562451e-05, "loss": 0.0164, "step": 116800 }, { "epoch": 0.66, "learning_rate": 9.382598887720169e-05, "loss": 0.0298, "step": 117000 }, { "epoch": 0.67, "learning_rate": 9.325208158537876e-05, "loss": 0.024, "step": 117200 }, { "epoch": 0.67, "learning_rate": 9.267934319570226e-05, "loss": 0.0153, "step": 117400 }, { "epoch": 0.67, "learning_rate": 9.210778099883943e-05, "loss": 0.0135, "step": 117600 }, { "epoch": 0.67, "learning_rate": 9.153740227048476e-05, "loss": 0.0168, "step": 117800 }, { "epoch": 0.67, "learning_rate": 9.097105723733374e-05, "loss": 0.0126, "step": 118000 }, { "epoch": 0.67, "learning_rate": 9.040589819318035e-05, "loss": 0.0195, "step": 118200 }, { "epoch": 0.67, "learning_rate": 8.983910128561952e-05, "loss": 0.0322, "step": 118400 }, { "epoch": 0.67, "learning_rate": 8.92735167257004e-05, "loss": 0.0205, "step": 118600 }, { "epoch": 0.67, "learning_rate": 8.870915171302544e-05, "loss": 0.0218, "step": 118800 }, { "epoch": 0.68, "learning_rate": 8.814601343167284e-05, "loss": 0.0164, "step": 119000 }, { "epoch": 0.68, "learning_rate": 8.758410905010516e-05, "loss": 0.0244, "step": 119200 }, { "epoch": 0.68, "learning_rate": 8.702344572107807e-05, "loss": 0.0099, "step": 119400 }, { "epoch": 0.68, "learning_rate": 8.646403058154925e-05, "loss": 0.0237, "step": 119600 }, { "epoch": 0.68, "learning_rate": 8.590587075258757e-05, "loss": 0.0274, "step": 119800 }, { "epoch": 0.68, "learning_rate": 8.534897333928242e-05, "loss": 0.0172, "step": 120000 }, { "epoch": 0.68, "learning_rate": 8.479334543065332e-05, "loss": 0.0254, "step": 120200 }, { "epoch": 0.68, "learning_rate": 8.423899409955962e-05, "loss": 0.0226, "step": 120400 }, { "epoch": 0.68, "learning_rate": 8.368592640261049e-05, "loss": 0.0171, "step": 120600 }, { "epoch": 0.69, "learning_rate": 8.313414938007512e-05, "loss": 0.0276, "step": 120800 }, { "epoch": 0.69, "learning_rate": 8.258367005579311e-05, "loss": 0.0146, "step": 121000 }, { "epoch": 0.69, "learning_rate": 8.203449543708476e-05, "loss": 0.0269, "step": 121200 }, { "epoch": 0.69, "learning_rate": 8.148663251466253e-05, "loss": 0.0174, "step": 121400 }, { "epoch": 0.69, "learning_rate": 8.094008826254145e-05, "loss": 0.0158, "step": 121600 }, { "epoch": 0.69, "learning_rate": 8.039486963795052e-05, "loss": 0.0353, "step": 121800 }, { "epoch": 0.69, "learning_rate": 7.985098358124426e-05, "loss": 0.0342, "step": 122000 }, { "epoch": 0.69, "learning_rate": 7.930843701581424e-05, "loss": 0.0251, "step": 122200 }, { "epoch": 0.7, "learning_rate": 7.8767236848001e-05, "loss": 0.0154, "step": 122400 }, { "epoch": 0.7, "learning_rate": 7.822738996700614e-05, "loss": 0.031, "step": 122600 }, { "epoch": 0.7, "learning_rate": 7.768890324480457e-05, "loss": 0.0205, "step": 122800 }, { "epoch": 0.7, "learning_rate": 7.715178353605712e-05, "loss": 0.012, "step": 123000 }, { "epoch": 0.7, "learning_rate": 7.661603767802323e-05, "loss": 0.012, "step": 123200 }, { "epoch": 0.7, "learning_rate": 7.60816724904739e-05, "loss": 0.0198, "step": 123400 }, { "epoch": 0.7, "learning_rate": 7.55486947756049e-05, "loss": 0.0189, "step": 123600 }, { "epoch": 0.7, "learning_rate": 7.501711131795021e-05, "loss": 0.0162, "step": 123800 }, { "epoch": 0.7, "learning_rate": 7.448692888429562e-05, "loss": 0.0146, "step": 124000 }, { "epoch": 0.71, "learning_rate": 7.395815422359255e-05, "loss": 0.016, "step": 124200 }, { "epoch": 0.71, "learning_rate": 7.343342733796512e-05, "loss": 0.0168, "step": 124400 }, { "epoch": 0.71, "learning_rate": 7.290748127549915e-05, "loss": 0.0294, "step": 124600 }, { "epoch": 0.71, "learning_rate": 7.238296309154575e-05, "loss": 0.0112, "step": 124800 }, { "epoch": 0.71, "learning_rate": 7.185987946295322e-05, "loss": 0.013, "step": 125000 }, { "epoch": 0.71, "learning_rate": 7.133823704830904e-05, "loss": 0.0237, "step": 125200 }, { "epoch": 0.71, "learning_rate": 7.081804248785451e-05, "loss": 0.0247, "step": 125400 }, { "epoch": 0.71, "learning_rate": 7.029930240340067e-05, "loss": 0.0225, "step": 125600 }, { "epoch": 0.71, "learning_rate": 6.978202339824351e-05, "loss": 0.028, "step": 125800 }, { "epoch": 0.72, "learning_rate": 6.926621205708063e-05, "loss": 0.0234, "step": 126000 }, { "epoch": 0.72, "learning_rate": 6.875187494592678e-05, "loss": 0.0175, "step": 126200 }, { "epoch": 0.72, "learning_rate": 6.82390186120306e-05, "loss": 0.0198, "step": 126400 }, { "epoch": 0.72, "learning_rate": 6.773020271848358e-05, "loss": 0.0233, "step": 126600 }, { "epoch": 0.72, "learning_rate": 6.722032002013071e-05, "loss": 0.0213, "step": 126800 }, { "epoch": 0.72, "learning_rate": 6.6711937594947e-05, "loss": 0.0209, "step": 127000 }, { "epoch": 0.72, "learning_rate": 6.620506191438099e-05, "loss": 0.0329, "step": 127200 }, { "epoch": 0.72, "learning_rate": 6.569969943070103e-05, "loss": 0.0224, "step": 127400 }, { "epoch": 0.72, "learning_rate": 6.51958565769135e-05, "loss": 0.0301, "step": 127600 }, { "epoch": 0.73, "learning_rate": 6.469604754411272e-05, "loss": 0.0303, "step": 127800 }, { "epoch": 0.73, "learning_rate": 6.41952554936078e-05, "loss": 0.0163, "step": 128000 }, { "epoch": 0.73, "learning_rate": 6.369600222379772e-05, "loss": 0.0162, "step": 128200 }, { "epoch": 0.73, "learning_rate": 6.319829408992151e-05, "loss": 0.0233, "step": 128400 }, { "epoch": 0.73, "learning_rate": 6.27021374275494e-05, "loss": 0.0273, "step": 128600 }, { "epoch": 0.73, "learning_rate": 6.220753855250208e-05, "loss": 0.0312, "step": 128800 }, { "epoch": 0.73, "learning_rate": 6.171450376077071e-05, "loss": 0.0216, "step": 129000 }, { "epoch": 0.73, "learning_rate": 6.122303932843605e-05, "loss": 0.0358, "step": 129200 }, { "epoch": 0.73, "learning_rate": 6.073315151158924e-05, "loss": 0.0106, "step": 129400 }, { "epoch": 0.74, "learning_rate": 6.0244846546251834e-05, "loss": 0.0101, "step": 129600 }, { "epoch": 0.74, "learning_rate": 5.9758130648296665e-05, "loss": 0.0247, "step": 129800 }, { "epoch": 0.74, "learning_rate": 5.927301001336826e-05, "loss": 0.0157, "step": 130000 }, { "epoch": 0.74, "learning_rate": 5.878949081680443e-05, "loss": 0.0219, "step": 130200 }, { "epoch": 0.74, "learning_rate": 5.830998476252924e-05, "loss": 0.0217, "step": 130400 }, { "epoch": 0.74, "learning_rate": 5.7829678803223054e-05, "loss": 0.0145, "step": 130600 }, { "epoch": 0.74, "learning_rate": 5.735099265515025e-05, "loss": 0.0225, "step": 130800 }, { "epoch": 0.74, "learning_rate": 5.687393241174086e-05, "loss": 0.0211, "step": 131000 }, { "epoch": 0.75, "learning_rate": 5.639850414572804e-05, "loss": 0.0275, "step": 131200 }, { "epoch": 0.75, "learning_rate": 5.5924713909070656e-05, "loss": 0.0171, "step": 131400 }, { "epoch": 0.75, "learning_rate": 5.545256773287633e-05, "loss": 0.0211, "step": 131600 }, { "epoch": 0.75, "learning_rate": 5.498207162732463e-05, "loss": 0.0194, "step": 131800 }, { "epoch": 0.75, "learning_rate": 5.451323158159054e-05, "loss": 0.0235, "step": 132000 }, { "epoch": 0.75, "learning_rate": 5.4046053563768266e-05, "loss": 0.0229, "step": 132200 }, { "epoch": 0.75, "learning_rate": 5.358054352079529e-05, "loss": 0.0073, "step": 132400 }, { "epoch": 0.75, "learning_rate": 5.311670737837655e-05, "loss": 0.0174, "step": 132600 }, { "epoch": 0.75, "learning_rate": 5.265455104090913e-05, "loss": 0.0173, "step": 132800 }, { "epoch": 0.76, "learning_rate": 5.2194080391407055e-05, "loss": 0.0173, "step": 133000 }, { "epoch": 0.76, "learning_rate": 5.173530129142639e-05, "loss": 0.0188, "step": 133200 }, { "epoch": 0.76, "learning_rate": 5.127821958099065e-05, "loss": 0.0264, "step": 133400 }, { "epoch": 0.76, "learning_rate": 5.082284107851646e-05, "loss": 0.0263, "step": 133600 }, { "epoch": 0.76, "learning_rate": 5.036917158073942e-05, "loss": 0.0139, "step": 133800 }, { "epoch": 0.76, "learning_rate": 4.991721686264047e-05, "loss": 0.0184, "step": 134000 }, { "epoch": 0.76, "learning_rate": 4.946698267737218e-05, "loss": 0.0207, "step": 134200 }, { "epoch": 0.76, "learning_rate": 4.901847475618568e-05, "loss": 0.0242, "step": 134400 }, { "epoch": 0.76, "learning_rate": 4.857169880835763e-05, "loss": 0.0192, "step": 134600 }, { "epoch": 0.77, "learning_rate": 4.812666052111755e-05, "loss": 0.0148, "step": 134800 }, { "epoch": 0.77, "learning_rate": 4.7687789861181634e-05, "loss": 0.0365, "step": 135000 }, { "epoch": 0.77, "learning_rate": 4.724622635071022e-05, "loss": 0.0186, "step": 135200 }, { "epoch": 0.77, "learning_rate": 4.6806417373413885e-05, "loss": 0.0225, "step": 135400 }, { "epoch": 0.77, "learning_rate": 4.6368368527836036e-05, "loss": 0.0176, "step": 135600 }, { "epoch": 0.77, "learning_rate": 4.5932085390114806e-05, "loss": 0.0263, "step": 135800 }, { "epoch": 0.77, "learning_rate": 4.549757351391151e-05, "loss": 0.0364, "step": 136000 }, { "epoch": 0.77, "learning_rate": 4.506483843034039e-05, "loss": 0.033, "step": 136200 }, { "epoch": 0.77, "learning_rate": 4.463388564789776e-05, "loss": 0.0167, "step": 136400 }, { "epoch": 0.78, "learning_rate": 4.420472065239248e-05, "loss": 0.0108, "step": 136600 }, { "epoch": 0.78, "learning_rate": 4.377734890687561e-05, "loss": 0.022, "step": 136800 }, { "epoch": 0.78, "learning_rate": 4.335177585157113e-05, "loss": 0.0164, "step": 137000 }, { "epoch": 0.78, "learning_rate": 4.2928006903806404e-05, "loss": 0.0185, "step": 137200 }, { "epoch": 0.78, "learning_rate": 4.2508152745142374e-05, "loss": 0.0273, "step": 137400 }, { "epoch": 0.78, "learning_rate": 4.208799908482074e-05, "loss": 0.0141, "step": 137600 }, { "epoch": 0.78, "learning_rate": 4.166966561927144e-05, "loss": 0.0173, "step": 137800 }, { "epoch": 0.78, "learning_rate": 4.1253157673665675e-05, "loss": 0.02, "step": 138000 }, { "epoch": 0.78, "learning_rate": 4.083848054993692e-05, "loss": 0.0276, "step": 138200 }, { "epoch": 0.79, "learning_rate": 4.042563952671287e-05, "loss": 0.0162, "step": 138400 }, { "epoch": 0.79, "learning_rate": 4.0014639859248885e-05, "loss": 0.0213, "step": 138600 }, { "epoch": 0.79, "learning_rate": 3.960548677936065e-05, "loss": 0.0228, "step": 138800 }, { "epoch": 0.79, "learning_rate": 3.9198185495357965e-05, "loss": 0.0224, "step": 139000 }, { "epoch": 0.79, "learning_rate": 3.879274119197787e-05, "loss": 0.0195, "step": 139200 }, { "epoch": 0.79, "learning_rate": 3.8389159030319236e-05, "loss": 0.0163, "step": 139400 }, { "epoch": 0.79, "learning_rate": 3.79874441477767e-05, "loss": 0.0194, "step": 139600 }, { "epoch": 0.79, "learning_rate": 3.758760165797558e-05, "loss": 0.0177, "step": 139800 }, { "epoch": 0.79, "learning_rate": 3.718963665070633e-05, "loss": 0.0197, "step": 140000 }, { "epoch": 0.8, "learning_rate": 3.6793554191860186e-05, "loss": 0.0202, "step": 140200 }, { "epoch": 0.8, "learning_rate": 3.639935932336438e-05, "loss": 0.0127, "step": 140400 }, { "epoch": 0.8, "learning_rate": 3.6007057063118326e-05, "loss": 0.0247, "step": 140600 }, { "epoch": 0.8, "learning_rate": 3.561665240492917e-05, "loss": 0.022, "step": 140800 }, { "epoch": 0.8, "learning_rate": 3.522815031844875e-05, "loss": 0.0198, "step": 141000 }, { "epoch": 0.8, "learning_rate": 3.4841555749110164e-05, "loss": 0.0164, "step": 141200 }, { "epoch": 0.8, "learning_rate": 3.4458792263419346e-05, "loss": 0.0249, "step": 141400 }, { "epoch": 0.8, "learning_rate": 3.4076017868658e-05, "loss": 0.0087, "step": 141600 }, { "epoch": 0.81, "learning_rate": 3.369706512869315e-05, "loss": 0.0293, "step": 141800 }, { "epoch": 0.81, "learning_rate": 3.3318130301208905e-05, "loss": 0.0273, "step": 142000 }, { "epoch": 0.81, "learning_rate": 3.2941127304445294e-05, "loss": 0.0147, "step": 142200 }, { "epoch": 0.81, "learning_rate": 3.256606093745782e-05, "loss": 0.0262, "step": 142400 }, { "epoch": 0.81, "learning_rate": 3.219293597464966e-05, "loss": 0.0278, "step": 142600 }, { "epoch": 0.81, "learning_rate": 3.182175716571092e-05, "loss": 0.025, "step": 142800 }, { "epoch": 0.81, "learning_rate": 3.1452529235558165e-05, "loss": 0.0212, "step": 143000 }, { "epoch": 0.81, "learning_rate": 3.108525688427432e-05, "loss": 0.0297, "step": 143200 }, { "epoch": 0.81, "learning_rate": 3.071994478704871e-05, "loss": 0.0141, "step": 143400 }, { "epoch": 0.82, "learning_rate": 3.035659759411763e-05, "loss": 0.0298, "step": 143600 }, { "epoch": 0.82, "learning_rate": 2.9995219930705253e-05, "loss": 0.0212, "step": 143800 }, { "epoch": 0.82, "learning_rate": 2.9637608496407227e-05, "loss": 0.0261, "step": 144000 }, { "epoch": 0.82, "learning_rate": 2.928017376249928e-05, "loss": 0.0436, "step": 144200 }, { "epoch": 0.82, "learning_rate": 2.8924722260435328e-05, "loss": 0.019, "step": 144400 }, { "epoch": 0.82, "learning_rate": 2.8571258514931404e-05, "loss": 0.0157, "step": 144600 }, { "epoch": 0.82, "learning_rate": 2.8219787025400236e-05, "loss": 0.0216, "step": 144800 }, { "epoch": 0.82, "learning_rate": 2.787031226589443e-05, "loss": 0.0246, "step": 145000 }, { "epoch": 0.82, "learning_rate": 2.752283868504904e-05, "loss": 0.0252, "step": 145200 }, { "epoch": 0.83, "learning_rate": 2.7177370706025224e-05, "loss": 0.0182, "step": 145400 }, { "epoch": 0.83, "learning_rate": 2.6833912726453738e-05, "loss": 0.0143, "step": 145600 }, { "epoch": 0.83, "learning_rate": 2.649246911837925e-05, "loss": 0.0198, "step": 145800 }, { "epoch": 0.83, "learning_rate": 2.6153044228204397e-05, "loss": 0.0123, "step": 146000 }, { "epoch": 0.83, "learning_rate": 2.5815642376634615e-05, "loss": 0.0385, "step": 146200 }, { "epoch": 0.83, "learning_rate": 2.5480267858622927e-05, "loss": 0.0177, "step": 146400 }, { "epoch": 0.83, "learning_rate": 2.5148586597250578e-05, "loss": 0.0278, "step": 146600 }, { "epoch": 0.83, "learning_rate": 2.4817269338190568e-05, "loss": 0.0252, "step": 146800 }, { "epoch": 0.83, "learning_rate": 2.448799212146731e-05, "loss": 0.0135, "step": 147000 }, { "epoch": 0.84, "learning_rate": 2.4162390211592713e-05, "loss": 0.0327, "step": 147200 }, { "epoch": 0.84, "learning_rate": 2.3837195375797726e-05, "loss": 0.0292, "step": 147400 }, { "epoch": 0.84, "learning_rate": 2.351405305818026e-05, "loss": 0.0288, "step": 147600 }, { "epoch": 0.84, "learning_rate": 2.319296737217692e-05, "loss": 0.0272, "step": 147800 }, { "epoch": 0.84, "learning_rate": 2.2873942405044402e-05, "loss": 0.0238, "step": 148000 }, { "epoch": 0.84, "learning_rate": 2.2556982217807548e-05, "loss": 0.0142, "step": 148200 }, { "epoch": 0.84, "learning_rate": 2.2242090845207555e-05, "loss": 0.0175, "step": 148400 }, { "epoch": 0.84, "learning_rate": 2.19292722956507e-05, "loss": 0.0151, "step": 148600 }, { "epoch": 0.84, "learning_rate": 2.1618530551157263e-05, "loss": 0.028, "step": 148800 }, { "epoch": 0.85, "learning_rate": 2.1309869567310876e-05, "loss": 0.0214, "step": 149000 }, { "epoch": 0.85, "learning_rate": 2.100329327320813e-05, "loss": 0.0218, "step": 149200 }, { "epoch": 0.85, "learning_rate": 2.0698805571408578e-05, "loss": 0.0188, "step": 149400 }, { "epoch": 0.85, "learning_rate": 2.039641033788514e-05, "loss": 0.0071, "step": 149600 }, { "epoch": 0.85, "learning_rate": 2.0096111421974547e-05, "loss": 0.0138, "step": 149800 }, { "epoch": 0.85, "learning_rate": 1.979791264632855e-05, "loss": 0.0284, "step": 150000 }, { "epoch": 0.85, "learning_rate": 1.9501817806865195e-05, "loss": 0.0154, "step": 150200 }, { "epoch": 0.85, "learning_rate": 1.9207830672720558e-05, "loss": 0.0229, "step": 150400 }, { "epoch": 0.86, "learning_rate": 1.8915954986200532e-05, "loss": 0.0259, "step": 150600 }, { "epoch": 0.86, "learning_rate": 1.8626194462733508e-05, "loss": 0.0187, "step": 150800 }, { "epoch": 0.86, "learning_rate": 1.8338552790822838e-05, "loss": 0.0171, "step": 151000 }, { "epoch": 0.86, "learning_rate": 1.8053033632000137e-05, "loss": 0.0266, "step": 151200 }, { "epoch": 0.86, "learning_rate": 1.7771052291063565e-05, "loss": 0.0356, "step": 151400 }, { "epoch": 0.86, "learning_rate": 1.7489778377186878e-05, "loss": 0.014, "step": 151600 }, { "epoch": 0.86, "learning_rate": 1.7210637780862658e-05, "loss": 0.0141, "step": 151800 }, { "epoch": 0.86, "learning_rate": 1.693363405540805e-05, "loss": 0.0244, "step": 152000 }, { "epoch": 0.86, "learning_rate": 1.665877072693892e-05, "loss": 0.0172, "step": 152200 }, { "epoch": 0.87, "learning_rate": 1.638605129432503e-05, "loss": 0.0271, "step": 152400 }, { "epoch": 0.87, "learning_rate": 1.611547922914535e-05, "loss": 0.0096, "step": 152600 }, { "epoch": 0.87, "learning_rate": 1.584705797564406e-05, "loss": 0.0308, "step": 152800 }, { "epoch": 0.87, "learning_rate": 1.5580790950686504e-05, "loss": 0.027, "step": 153000 }, { "epoch": 0.87, "learning_rate": 1.531668154371589e-05, "loss": 0.021, "step": 153200 }, { "epoch": 0.87, "learning_rate": 1.5054733116709978e-05, "loss": 0.0108, "step": 153400 }, { "epoch": 0.87, "learning_rate": 1.4794949004138424e-05, "loss": 0.0152, "step": 153600 }, { "epoch": 0.87, "learning_rate": 1.4537332512920213e-05, "loss": 0.0208, "step": 153800 }, { "epoch": 0.87, "learning_rate": 1.4281886922381655e-05, "loss": 0.0189, "step": 154000 }, { "epoch": 0.88, "learning_rate": 1.4028615484214573e-05, "loss": 0.0262, "step": 154200 }, { "epoch": 0.88, "learning_rate": 1.3778771471221268e-05, "loss": 0.0305, "step": 154400 }, { "epoch": 0.88, "learning_rate": 1.35298470713599e-05, "loss": 0.0378, "step": 154600 }, { "epoch": 0.88, "learning_rate": 1.3283106396952985e-05, "loss": 0.0212, "step": 154800 }, { "epoch": 0.88, "learning_rate": 1.3038552588883296e-05, "loss": 0.0139, "step": 155000 }, { "epoch": 0.88, "learning_rate": 1.2796188760195822e-05, "loss": 0.0268, "step": 155200 }, { "epoch": 0.88, "learning_rate": 1.2556017996058265e-05, "loss": 0.0164, "step": 155400 }, { "epoch": 0.88, "learning_rate": 1.2318043353721693e-05, "loss": 0.0118, "step": 155600 }, { "epoch": 0.88, "learning_rate": 1.2082267862481735e-05, "loss": 0.0203, "step": 155800 }, { "epoch": 0.89, "learning_rate": 1.1848694523639894e-05, "loss": 0.0221, "step": 156000 }, { "epoch": 0.89, "learning_rate": 1.1617326310465425e-05, "loss": 0.0155, "step": 156200 }, { "epoch": 0.89, "learning_rate": 1.1388166168157457e-05, "loss": 0.0217, "step": 156400 }, { "epoch": 0.89, "learning_rate": 1.1161217013807514e-05, "loss": 0.0277, "step": 156600 }, { "epoch": 0.89, "learning_rate": 1.0937599900986833e-05, "loss": 0.0159, "step": 156800 }, { "epoch": 0.89, "learning_rate": 1.0715070270453825e-05, "loss": 0.0238, "step": 157000 }, { "epoch": 0.89, "learning_rate": 1.0494760196045681e-05, "loss": 0.0184, "step": 157200 }, { "epoch": 0.89, "learning_rate": 1.0276672482197057e-05, "loss": 0.0125, "step": 157400 }, { "epoch": 0.89, "learning_rate": 1.0060809905053135e-05, "loss": 0.0379, "step": 157600 }, { "epoch": 0.9, "learning_rate": 9.84717521243414e-06, "loss": 0.0121, "step": 157800 }, { "epoch": 0.9, "learning_rate": 9.63577112380061e-06, "loss": 0.0301, "step": 158000 }, { "epoch": 0.9, "learning_rate": 9.426600330218556e-06, "loss": 0.0153, "step": 158200 }, { "epoch": 0.9, "learning_rate": 9.21966549432532e-06, "loss": 0.0139, "step": 158400 }, { "epoch": 0.9, "learning_rate": 9.014969250295535e-06, "loss": 0.0172, "step": 158600 }, { "epoch": 0.9, "learning_rate": 8.812514203807878e-06, "loss": 0.0237, "step": 158800 }, { "epoch": 0.9, "learning_rate": 8.612302932011596e-06, "loss": 0.041, "step": 159000 }, { "epoch": 0.9, "learning_rate": 8.414337983493915e-06, "loss": 0.0103, "step": 159200 }, { "epoch": 0.91, "learning_rate": 8.218621878247375e-06, "loss": 0.0257, "step": 159400 }, { "epoch": 0.91, "learning_rate": 8.025157107638079e-06, "loss": 0.0164, "step": 159600 }, { "epoch": 0.91, "learning_rate": 7.833946134373719e-06, "loss": 0.0135, "step": 159800 }, { "epoch": 0.91, "learning_rate": 7.644991392472185e-06, "loss": 0.0143, "step": 160000 }, { "epoch": 0.91, "learning_rate": 7.4582952872307985e-06, "loss": 0.0145, "step": 160200 }, { "epoch": 0.91, "learning_rate": 7.2738601951956405e-06, "loss": 0.0213, "step": 160400 }, { "epoch": 0.91, "learning_rate": 7.091688464131197e-06, "loss": 0.0215, "step": 160600 }, { "epoch": 0.91, "learning_rate": 6.911782412990403e-06, "loss": 0.0205, "step": 160800 }, { "epoch": 0.91, "learning_rate": 6.734144331885486e-06, "loss": 0.0221, "step": 161000 }, { "epoch": 0.92, "learning_rate": 6.558776482058375e-06, "loss": 0.0125, "step": 161200 }, { "epoch": 0.92, "learning_rate": 6.3856810958522425e-06, "loss": 0.009, "step": 161400 }, { "epoch": 0.92, "learning_rate": 6.215708818435388e-06, "loss": 0.0135, "step": 161600 }, { "epoch": 0.92, "learning_rate": 6.047153551194691e-06, "loss": 0.0271, "step": 161800 }, { "epoch": 0.92, "learning_rate": 5.8808772602734375e-06, "loss": 0.0242, "step": 162000 }, { "epoch": 0.92, "learning_rate": 5.7168820622837906e-06, "loss": 0.0153, "step": 162200 }, { "epoch": 0.92, "learning_rate": 5.555170044800826e-06, "loss": 0.0159, "step": 162400 }, { "epoch": 0.92, "learning_rate": 5.396534712327065e-06, "loss": 0.0279, "step": 162600 }, { "epoch": 0.92, "learning_rate": 5.239383760959439e-06, "loss": 0.0265, "step": 162800 }, { "epoch": 0.93, "learning_rate": 5.084522068407875e-06, "loss": 0.0251, "step": 163000 }, { "epoch": 0.93, "learning_rate": 4.931951605982607e-06, "loss": 0.0141, "step": 163200 }, { "epoch": 0.93, "learning_rate": 4.781674315827682e-06, "loss": 0.0143, "step": 163400 }, { "epoch": 0.93, "learning_rate": 4.633692110896181e-06, "loss": 0.0162, "step": 163600 }, { "epoch": 0.93, "learning_rate": 4.48800687492597e-06, "loss": 0.0108, "step": 163800 }, { "epoch": 0.93, "learning_rate": 4.3446204624156405e-06, "loss": 0.0173, "step": 164000 }, { "epoch": 0.93, "learning_rate": 4.203534698601004e-06, "loss": 0.014, "step": 164200 }, { "epoch": 0.93, "learning_rate": 4.064751379431683e-06, "loss": 0.0243, "step": 164400 }, { "epoch": 0.93, "learning_rate": 3.9282722715484335e-06, "loss": 0.0189, "step": 164600 }, { "epoch": 0.94, "learning_rate": 3.7940991122605757e-06, "loss": 0.016, "step": 164800 }, { "epoch": 0.94, "learning_rate": 3.662233609523829e-06, "loss": 0.0279, "step": 165000 }, { "epoch": 0.94, "learning_rate": 3.5326774419187126e-06, "loss": 0.019, "step": 165200 }, { "epoch": 0.94, "learning_rate": 3.405432258628993e-06, "loss": 0.0136, "step": 165400 }, { "epoch": 0.94, "learning_rate": 3.2804996794208474e-06, "loss": 0.0276, "step": 165600 }, { "epoch": 0.94, "learning_rate": 3.1578812946221724e-06, "loss": 0.024, "step": 165800 }, { "epoch": 0.94, "learning_rate": 3.038174415263218e-06, "loss": 0.0373, "step": 166000 }, { "epoch": 0.94, "learning_rate": 2.920177482219368e-06, "loss": 0.0204, "step": 166200 }, { "epoch": 0.94, "learning_rate": 2.804499330302412e-06, "loss": 0.016, "step": 166400 }, { "epoch": 0.95, "learning_rate": 2.691141432036144e-06, "loss": 0.023, "step": 166600 }, { "epoch": 0.95, "learning_rate": 2.5801052304086817e-06, "loss": 0.018, "step": 166800 }, { "epoch": 0.95, "learning_rate": 2.4713921388541356e-06, "loss": 0.0156, "step": 167000 }, { "epoch": 0.95, "learning_rate": 2.365003541234589e-06, "loss": 0.0249, "step": 167200 }, { "epoch": 0.95, "learning_rate": 2.2609407918225517e-06, "loss": 0.0186, "step": 167400 }, { "epoch": 0.95, "learning_rate": 2.1592052152836374e-06, "loss": 0.0104, "step": 167600 }, { "epoch": 0.95, "learning_rate": 2.059798106659755e-06, "loss": 0.0189, "step": 167800 }, { "epoch": 0.95, "learning_rate": 1.9627207313525887e-06, "loss": 0.028, "step": 168000 }, { "epoch": 0.96, "learning_rate": 1.8679743251075292e-06, "loss": 0.0186, "step": 168200 }, { "epoch": 0.96, "learning_rate": 1.7755600939978937e-06, "loss": 0.0279, "step": 168400 }, { "epoch": 0.96, "learning_rate": 1.6854792144096584e-06, "loss": 0.0143, "step": 168600 }, { "epoch": 0.96, "learning_rate": 1.5977328330263757e-06, "loss": 0.044, "step": 168800 }, { "epoch": 0.96, "learning_rate": 1.5123220668146281e-06, "loss": 0.0219, "step": 169000 }, { "epoch": 0.96, "learning_rate": 1.429248003009848e-06, "loss": 0.0186, "step": 169200 }, { "epoch": 0.96, "learning_rate": 1.3485116991023881e-06, "loss": 0.0167, "step": 169400 }, { "epoch": 0.96, "learning_rate": 1.2701141828241649e-06, "loss": 0.0204, "step": 169600 }, { "epoch": 0.96, "learning_rate": 1.1940564521355066e-06, "loss": 0.0139, "step": 169800 }, { "epoch": 0.97, "learning_rate": 1.1203394752124363e-06, "loss": 0.028, "step": 170000 }, { "epoch": 0.97, "learning_rate": 1.049315240383742e-06, "loss": 0.022, "step": 170200 }, { "epoch": 0.97, "learning_rate": 9.806102344292969e-07, "loss": 0.0216, "step": 170400 }, { "epoch": 0.97, "learning_rate": 9.138975907874411e-07, "loss": 0.0164, "step": 170600 }, { "epoch": 0.97, "learning_rate": 8.495292671902981e-07, "loss": 0.0085, "step": 170800 }, { "epoch": 0.97, "learning_rate": 7.875060830137766e-07, "loss": 0.0185, "step": 171000 }, { "epoch": 0.97, "learning_rate": 7.278288277813219e-07, "loss": 0.0248, "step": 171200 }, { "epoch": 0.97, "learning_rate": 6.704982611538892e-07, "loss": 0.0329, "step": 171400 }, { "epoch": 0.97, "learning_rate": 6.15515112920264e-07, "loss": 0.0138, "step": 171600 }, { "epoch": 0.98, "learning_rate": 5.62880082987879e-07, "loss": 0.0128, "step": 171800 }, { "epoch": 0.98, "learning_rate": 5.125938413737512e-07, "loss": 0.0163, "step": 172000 }, { "epoch": 0.98, "learning_rate": 4.646570281960375e-07, "loss": 0.0347, "step": 172200 }, { "epoch": 0.98, "learning_rate": 4.190702536658564e-07, "loss": 0.0232, "step": 172400 }, { "epoch": 0.98, "learning_rate": 3.7583409807958233e-07, "loss": 0.0225, "step": 172600 }, { "epoch": 0.98, "learning_rate": 3.3494911181136674e-07, "loss": 0.019, "step": 172800 }, { "epoch": 0.98, "learning_rate": 2.964158153061924e-07, "loss": 0.0155, "step": 173000 }, { "epoch": 0.98, "learning_rate": 2.60234699073237e-07, "loss": 0.0186, "step": 173200 }, { "epoch": 0.98, "learning_rate": 2.265695131377354e-07, "loss": 0.0216, "step": 173400 }, { "epoch": 0.99, "learning_rate": 1.9508234282376272e-07, "loss": 0.0087, "step": 173600 }, { "epoch": 0.99, "learning_rate": 1.6594864270533008e-07, "loss": 0.0149, "step": 173800 }, { "epoch": 0.99, "learning_rate": 1.3916878363954376e-07, "loss": 0.0171, "step": 174000 }, { "epoch": 0.99, "learning_rate": 1.1474310652035549e-07, "loss": 0.025, "step": 174200 }, { "epoch": 0.99, "learning_rate": 9.26719222741447e-08, "loss": 0.0136, "step": 174400 }, { "epoch": 0.99, "learning_rate": 7.295551185577334e-08, "loss": 0.0196, "step": 174600 }, { "epoch": 0.99, "learning_rate": 5.559412624511325e-08, "loss": 0.0167, "step": 174800 }, { "epoch": 0.99, "learning_rate": 4.058798644371748e-08, "loss": 0.0247, "step": 175000 }, { "epoch": 0.99, "learning_rate": 2.793728347208737e-08, "loss": 0.0182, "step": 175200 }, { "epoch": 1.0, "learning_rate": 1.7642178367165738e-08, "loss": 0.0112, "step": 175400 }, { "epoch": 1.0, "learning_rate": 9.702802180446488e-09, "loss": 0.0229, "step": 175600 }, { "epoch": 1.0, "learning_rate": 4.11925597606365e-09, "loss": 0.0155, "step": 175800 }, { "epoch": 1.0, "learning_rate": 8.916108297639875e-10, "loss": 0.0201, "step": 176000 }, { "epoch": 1.0, "eval_accuracy": 0.9876599555715365, "eval_auc": 0.8673592596422711, "eval_f1": 0.24899413187145658, "eval_loss": 0.4661065936088562, "eval_mcc": 0.3305508498121041, "eval_precision": 0.14941997670219148, "eval_recall": 0.7463955099754822, "eval_runtime": 10361.3775, "eval_samples_per_second": 43.805, "eval_steps_per_second": 4.38, "step": 176106 } ], "logging_steps": 200, "max_steps": 176106, "num_train_epochs": 1, "save_steps": 500, "total_flos": 3.528456960194662e+17, "trial_name": null, "trial_params": null }