diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,5313 @@ +{ + "best_metric": 0.24899413187145658, + "best_model_checkpoint": "esm2_t12_35M_lora_ptm_sites_2023-10-10_00-58-43/checkpoint-176106", + "epoch": 1.0, + "eval_steps": 500, + "global_step": 176106, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 0.0003701556393528675, + "loss": 0.4195, + "step": 200 + }, + { + "epoch": 0.0, + "learning_rate": 0.0003701521172172271, + "loss": 0.1652, + "step": 400 + }, + { + "epoch": 0.0, + "learning_rate": 0.00037014627443809477, + "loss": 0.1381, + "step": 600 + }, + { + "epoch": 0.0, + "learning_rate": 0.0003701380523685105, + "loss": 0.0871, + "step": 800 + }, + { + "epoch": 0.01, + "learning_rate": 0.000370127474584151, + "loss": 0.0919, + "step": 1000 + }, + { + "epoch": 0.01, + "learning_rate": 0.000370114541219666, + "loss": 0.0678, + "step": 1200 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003700992524396906, + "loss": 0.0678, + "step": 1400 + }, + { + "epoch": 0.01, + "learning_rate": 0.00037008160843884317, + "loss": 0.0565, + "step": 1600 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003700616094417228, + "loss": 0.0712, + "step": 1800 + }, + { + "epoch": 0.01, + "learning_rate": 0.00037003925570290656, + "loss": 0.0604, + "step": 2000 + }, + { + "epoch": 0.01, + "learning_rate": 0.00037001454750694614, + "loss": 0.0632, + "step": 2200 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003699874851683642, + "loss": 0.0697, + "step": 2400 + }, + { + "epoch": 0.01, + "learning_rate": 0.00036995806903165044, + "loss": 0.046, + "step": 2600 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003699264641725442, + "loss": 0.0479, + "step": 2800 + }, + { + "epoch": 0.02, + "learning_rate": 0.00036989235335692274, + "loss": 0.042, + "step": 3000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003698558899541489, + "loss": 0.0533, + "step": 3200 + }, + { + "epoch": 0.02, + "learning_rate": 0.00036981707442838315, + "loss": 0.0508, + "step": 3400 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003697759072737272, + "loss": 0.0492, + "step": 3600 + }, + { + "epoch": 0.02, + "learning_rate": 0.00036973238901421803, + "loss": 0.0496, + "step": 3800 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003696865202038208, + "loss": 0.0729, + "step": 4000 + }, + { + "epoch": 0.02, + "learning_rate": 0.00036963830142642195, + "loss": 0.0556, + "step": 4200 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003695877332958219, + "loss": 0.0393, + "step": 4400 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003695348164557272, + "loss": 0.042, + "step": 4600 + }, + { + "epoch": 0.03, + "learning_rate": 0.00036947955157974214, + "loss": 0.0388, + "step": 4800 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003694222332701908, + "loss": 0.0632, + "step": 5000 + }, + { + "epoch": 0.03, + "learning_rate": 0.00036936228619390763, + "loss": 0.0372, + "step": 5200 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003692999932779562, + "loss": 0.0309, + "step": 5400 + }, + { + "epoch": 0.03, + "learning_rate": 0.00036923535531529345, + "loss": 0.0336, + "step": 5600 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003691683731287276, + "loss": 0.0553, + "step": 5800 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003690990475709077, + "loss": 0.0466, + "step": 6000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0003690277436899897, + "loss": 0.0248, + "step": 6200 + }, + { + "epoch": 0.04, + "learning_rate": 0.00036895374577248087, + "loss": 0.0422, + "step": 6400 + }, + { + "epoch": 0.04, + "learning_rate": 0.00036887740721581455, + "loss": 0.0472, + "step": 6600 + }, + { + "epoch": 0.04, + "learning_rate": 0.0003687987289917415, + "loss": 0.041, + "step": 6800 + }, + { + "epoch": 0.04, + "learning_rate": 0.00036871771210179546, + "loss": 0.0321, + "step": 7000 + }, + { + "epoch": 0.04, + "learning_rate": 0.00036863435757727986, + "loss": 0.036, + "step": 7200 + }, + { + "epoch": 0.04, + "learning_rate": 0.00036854866647925537, + "loss": 0.0355, + "step": 7400 + }, + { + "epoch": 0.04, + "learning_rate": 0.0003684606398985257, + "loss": 0.0423, + "step": 7600 + }, + { + "epoch": 0.04, + "learning_rate": 0.00036837027895562436, + "loss": 0.0391, + "step": 7800 + }, + { + "epoch": 0.05, + "learning_rate": 0.0003682775848008, + "loss": 0.0473, + "step": 8000 + }, + { + "epoch": 0.05, + "learning_rate": 0.00036818255861400173, + "loss": 0.0326, + "step": 8200 + }, + { + "epoch": 0.05, + "learning_rate": 0.00036808520160486446, + "loss": 0.0295, + "step": 8400 + }, + { + "epoch": 0.05, + "learning_rate": 0.0003679855150126932, + "loss": 0.0365, + "step": 8600 + }, + { + "epoch": 0.05, + "learning_rate": 0.00036788350010644714, + "loss": 0.0244, + "step": 8800 + }, + { + "epoch": 0.05, + "learning_rate": 0.0003677791581847241, + "loss": 0.0484, + "step": 9000 + }, + { + "epoch": 0.05, + "learning_rate": 0.00036767249057574337, + "loss": 0.0285, + "step": 9200 + }, + { + "epoch": 0.05, + "learning_rate": 0.0003675634986373291, + "loss": 0.0393, + "step": 9400 + }, + { + "epoch": 0.05, + "learning_rate": 0.0003674521837568929, + "loss": 0.0204, + "step": 9600 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003673385473514164, + "loss": 0.0448, + "step": 9800 + }, + { + "epoch": 0.06, + "learning_rate": 0.00036722259086743295, + "loss": 0.0451, + "step": 10000 + }, + { + "epoch": 0.06, + "learning_rate": 0.00036710431578100935, + "loss": 0.0295, + "step": 10200 + }, + { + "epoch": 0.06, + "learning_rate": 0.00036698372359772696, + "loss": 0.0361, + "step": 10400 + }, + { + "epoch": 0.06, + "learning_rate": 0.00036686081585266277, + "loss": 0.0308, + "step": 10600 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003667355941103695, + "loss": 0.0293, + "step": 10800 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003666087033850132, + "loss": 0.0319, + "step": 11000 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003664788700095357, + "loss": 0.0267, + "step": 11200 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003663467274988045, + "loss": 0.0195, + "step": 11400 + }, + { + "epoch": 0.07, + "learning_rate": 0.00036621227753492634, + "loss": 0.0425, + "step": 11600 + }, + { + "epoch": 0.07, + "learning_rate": 0.00036607552182938043, + "loss": 0.0211, + "step": 11800 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003659364621229971, + "loss": 0.0352, + "step": 12000 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003657951001859353, + "loss": 0.0266, + "step": 12200 + }, + { + "epoch": 0.07, + "learning_rate": 0.00036565216184881143, + "loss": 0.0332, + "step": 12400 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003655062123664866, + "loss": 0.0346, + "step": 12600 + }, + { + "epoch": 0.07, + "learning_rate": 0.00036535796613034296, + "loss": 0.042, + "step": 12800 + }, + { + "epoch": 0.07, + "learning_rate": 0.00036520742502747924, + "loss": 0.0372, + "step": 13000 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003650545909742067, + "loss": 0.0246, + "step": 13200 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003648994659160249, + "loss": 0.0283, + "step": 13400 + }, + { + "epoch": 0.08, + "learning_rate": 0.00036474205182759645, + "loss": 0.0299, + "step": 13600 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003645823507127223, + "loss": 0.0292, + "step": 13800 + }, + { + "epoch": 0.08, + "learning_rate": 0.000364420364604316, + "loss": 0.0354, + "step": 14000 + }, + { + "epoch": 0.08, + "learning_rate": 0.000364256095564378, + "loss": 0.0215, + "step": 14200 + }, + { + "epoch": 0.08, + "learning_rate": 0.00036408954568396915, + "loss": 0.0288, + "step": 14400 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003639207170831844, + "loss": 0.0214, + "step": 14600 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003637496119111255, + "loss": 0.0336, + "step": 14800 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003635762323458739, + "loss": 0.0193, + "step": 15000 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003634005805944629, + "loss": 0.0507, + "step": 15200 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003632226588928494, + "loss": 0.032, + "step": 15400 + }, + { + "epoch": 0.09, + "learning_rate": 0.000363042469505886, + "loss": 0.0303, + "step": 15600 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003628600147272912, + "loss": 0.0271, + "step": 15800 + }, + { + "epoch": 0.09, + "learning_rate": 0.00036267529687962144, + "loss": 0.0275, + "step": 16000 + }, + { + "epoch": 0.09, + "learning_rate": 0.00036248831831424026, + "loss": 0.0352, + "step": 16200 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003622990814112894, + "loss": 0.0302, + "step": 16400 + }, + { + "epoch": 0.09, + "learning_rate": 0.00036210758857965785, + "loss": 0.0175, + "step": 16600 + }, + { + "epoch": 0.1, + "learning_rate": 0.00036191481659005633, + "loss": 0.0525, + "step": 16800 + }, + { + "epoch": 0.1, + "learning_rate": 0.0003617188304915086, + "loss": 0.0259, + "step": 17000 + }, + { + "epoch": 0.1, + "learning_rate": 0.0003615205958505779, + "loss": 0.0265, + "step": 17200 + }, + { + "epoch": 0.1, + "learning_rate": 0.00036132011519068993, + "loss": 0.0304, + "step": 17400 + }, + { + "epoch": 0.1, + "learning_rate": 0.00036111739106386103, + "loss": 0.0351, + "step": 17600 + }, + { + "epoch": 0.1, + "learning_rate": 0.0003609124260506658, + "loss": 0.0564, + "step": 17800 + }, + { + "epoch": 0.1, + "learning_rate": 0.0003607052227602041, + "loss": 0.0235, + "step": 18000 + }, + { + "epoch": 0.1, + "learning_rate": 0.0003604957838300679, + "loss": 0.0427, + "step": 18200 + }, + { + "epoch": 0.1, + "learning_rate": 0.00036028411192630784, + "loss": 0.0259, + "step": 18400 + }, + { + "epoch": 0.11, + "learning_rate": 0.00036007020974339896, + "loss": 0.0226, + "step": 18600 + }, + { + "epoch": 0.11, + "learning_rate": 0.00035985408000420693, + "loss": 0.0361, + "step": 18800 + }, + { + "epoch": 0.11, + "learning_rate": 0.00035963682276229046, + "loss": 0.0396, + "step": 19000 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003594162572956826, + "loss": 0.038, + "step": 19200 + }, + { + "epoch": 0.11, + "learning_rate": 0.000359193472597272, + "loss": 0.0263, + "step": 19400 + }, + { + "epoch": 0.11, + "learning_rate": 0.00035896847150299397, + "loss": 0.0275, + "step": 19600 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003587412568769976, + "loss": 0.0368, + "step": 19800 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003585118316116088, + "loss": 0.0267, + "step": 20000 + }, + { + "epoch": 0.11, + "learning_rate": 0.000358280198627294, + "loss": 0.0252, + "step": 20200 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003580463608726229, + "loss": 0.0396, + "step": 20400 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003578103213242304, + "loss": 0.0297, + "step": 20600 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003575720829867795, + "loss": 0.0214, + "step": 20800 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003573316488929225, + "loss": 0.0252, + "step": 21000 + }, + { + "epoch": 0.12, + "learning_rate": 0.00035708902210326236, + "loss": 0.025, + "step": 21200 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003568442057063144, + "loss": 0.0262, + "step": 21400 + }, + { + "epoch": 0.12, + "learning_rate": 0.000356597202818466, + "loss": 0.022, + "step": 21600 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003563492679409498, + "loss": 0.0458, + "step": 21800 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003560979124246924, + "loss": 0.0289, + "step": 22000 + }, + { + "epoch": 0.13, + "learning_rate": 0.00035584437991746675, + "loss": 0.0247, + "step": 22200 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003555886736466121, + "loss": 0.0198, + "step": 22400 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003553307968671388, + "loss": 0.0259, + "step": 22600 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003550707528616864, + "loss": 0.0134, + "step": 22800 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003548085449404821, + "loss": 0.0344, + "step": 23000 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003545441764412988, + "loss": 0.0308, + "step": 23200 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003542776507294125, + "loss": 0.0295, + "step": 23400 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003540089711975591, + "loss": 0.0237, + "step": 23600 + }, + { + "epoch": 0.14, + "learning_rate": 0.00035373814126589203, + "loss": 0.0229, + "step": 23800 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003534651643819378, + "loss": 0.0203, + "step": 24000 + }, + { + "epoch": 0.14, + "learning_rate": 0.00035319004402055264, + "loss": 0.0297, + "step": 24200 + }, + { + "epoch": 0.14, + "learning_rate": 0.000352914175302923, + "loss": 0.0285, + "step": 24400 + }, + { + "epoch": 0.14, + "learning_rate": 0.00035263478919374686, + "loss": 0.0224, + "step": 24600 + }, + { + "epoch": 0.14, + "learning_rate": 0.00035235327017739155, + "loss": 0.0333, + "step": 24800 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003520696218374504, + "loss": 0.0564, + "step": 25000 + }, + { + "epoch": 0.14, + "learning_rate": 0.00035178384778462185, + "loss": 0.0341, + "step": 25200 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003514959516566635, + "loss": 0.0278, + "step": 25400 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003512059371183459, + "loss": 0.0358, + "step": 25600 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003509138078614061, + "loss": 0.0298, + "step": 25800 + }, + { + "epoch": 0.15, + "learning_rate": 0.00035061956760450006, + "loss": 0.0314, + "step": 26000 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003503232200931561, + "loss": 0.0264, + "step": 26200 + }, + { + "epoch": 0.15, + "learning_rate": 0.00035002476909972645, + "loss": 0.0259, + "step": 26400 + }, + { + "epoch": 0.15, + "learning_rate": 0.00034972421842333984, + "loss": 0.0274, + "step": 26600 + }, + { + "epoch": 0.15, + "learning_rate": 0.00034942309032960303, + "loss": 0.0193, + "step": 26800 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003491183622419467, + "loss": 0.0223, + "step": 27000 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003488115460094296, + "loss": 0.0213, + "step": 27200 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003485026455376655, + "loss": 0.0268, + "step": 27400 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003481916647587996, + "loss": 0.0232, + "step": 27600 + }, + { + "epoch": 0.16, + "learning_rate": 0.00034788017807543066, + "loss": 0.0325, + "step": 27800 + }, + { + "epoch": 0.16, + "learning_rate": 0.00034756505893653175, + "loss": 0.0242, + "step": 28000 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003472478714255293, + "loss": 0.0221, + "step": 28200 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003469286195800583, + "loss": 0.0276, + "step": 28400 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003466073074640316, + "loss": 0.0225, + "step": 28600 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003462839391675882, + "loss": 0.0219, + "step": 28800 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003459585188070413, + "loss": 0.0355, + "step": 29000 + }, + { + "epoch": 0.17, + "learning_rate": 0.00034563105052482586, + "loss": 0.0211, + "step": 29200 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003453015384894458, + "loss": 0.0353, + "step": 29400 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003449699868954208, + "loss": 0.0253, + "step": 29600 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003446363999632333, + "loss": 0.0406, + "step": 29800 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003443007819392746, + "loss": 0.0201, + "step": 30000 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003439631370957905, + "loss": 0.0261, + "step": 30200 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003436234697308274, + "loss": 0.0426, + "step": 30400 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003432817841681772, + "loss": 0.0184, + "step": 30600 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003429380847573226, + "loss": 0.0201, + "step": 30800 + }, + { + "epoch": 0.18, + "learning_rate": 0.00034259237587338153, + "loss": 0.0331, + "step": 31000 + }, + { + "epoch": 0.18, + "learning_rate": 0.00034224466191705135, + "loss": 0.0208, + "step": 31200 + }, + { + "epoch": 0.18, + "learning_rate": 0.000341894947314553, + "loss": 0.0297, + "step": 31400 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003415432365175747, + "loss": 0.0248, + "step": 31600 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003411895340032153, + "loss": 0.0207, + "step": 31800 + }, + { + "epoch": 0.18, + "learning_rate": 0.00034083562765829117, + "loss": 0.0334, + "step": 32000 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003404779651439537, + "loss": 0.0153, + "step": 32200 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003401183244725952, + "loss": 0.0315, + "step": 32400 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003397567102222577, + "loss": 0.0234, + "step": 32600 + }, + { + "epoch": 0.19, + "learning_rate": 0.00033939312699610597, + "loss": 0.026, + "step": 32800 + }, + { + "epoch": 0.19, + "learning_rate": 0.00033902757942236837, + "loss": 0.018, + "step": 33000 + }, + { + "epoch": 0.19, + "learning_rate": 0.00033866007215427904, + "loss": 0.0318, + "step": 33200 + }, + { + "epoch": 0.19, + "learning_rate": 0.00033829060987001754, + "loss": 0.0249, + "step": 33400 + }, + { + "epoch": 0.19, + "learning_rate": 0.00033791919727265, + "loss": 0.0266, + "step": 33600 + }, + { + "epoch": 0.19, + "learning_rate": 0.00033754583909006893, + "loss": 0.0259, + "step": 33800 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003371705400749333, + "loss": 0.03, + "step": 34000 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003367951959879635, + "loss": 0.0283, + "step": 34200 + }, + { + "epoch": 0.2, + "learning_rate": 0.000336416039308738, + "loss": 0.0269, + "step": 34400 + }, + { + "epoch": 0.2, + "learning_rate": 0.00033603495617873176, + "loss": 0.034, + "step": 34600 + }, + { + "epoch": 0.2, + "learning_rate": 0.00033565195144893837, + "loss": 0.0319, + "step": 34800 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003352670299948123, + "loss": 0.018, + "step": 35000 + }, + { + "epoch": 0.2, + "learning_rate": 0.00033488019671620693, + "loss": 0.0419, + "step": 35200 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003344914565373123, + "loss": 0.0371, + "step": 35400 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003341008144065922, + "loss": 0.0253, + "step": 35600 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003337082752967214, + "loss": 0.0254, + "step": 35800 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003333138442045221, + "loss": 0.0287, + "step": 36000 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003329175261509006, + "loss": 0.0203, + "step": 36200 + }, + { + "epoch": 0.21, + "learning_rate": 0.00033251932618078315, + "loss": 0.0234, + "step": 36400 + }, + { + "epoch": 0.21, + "learning_rate": 0.00033211924936305204, + "loss": 0.0162, + "step": 36600 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003317193151808796, + "loss": 0.0215, + "step": 36800 + }, + { + "epoch": 0.21, + "learning_rate": 0.00033131550929049215, + "loss": 0.0274, + "step": 37000 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003309098418764647, + "loss": 0.0337, + "step": 37200 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003305023181027363, + "loss": 0.0214, + "step": 37400 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003300929431568763, + "loss": 0.0192, + "step": 37600 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003296817222500186, + "loss": 0.0373, + "step": 37800 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003292686606167952, + "loss": 0.0341, + "step": 38000 + }, + { + "epoch": 0.22, + "learning_rate": 0.00032885376351526955, + "loss": 0.0267, + "step": 38200 + }, + { + "epoch": 0.22, + "learning_rate": 0.00032843703622686987, + "loss": 0.0257, + "step": 38400 + }, + { + "epoch": 0.22, + "learning_rate": 0.00032801848405632146, + "loss": 0.0352, + "step": 38600 + }, + { + "epoch": 0.22, + "learning_rate": 0.00032759811233157966, + "loss": 0.0291, + "step": 38800 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003271780418373506, + "loss": 0.0236, + "step": 39000 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003267540561114055, + "loss": 0.02, + "step": 39200 + }, + { + "epoch": 0.22, + "learning_rate": 0.00032632826692678864, + "loss": 0.0162, + "step": 39400 + }, + { + "epoch": 0.22, + "learning_rate": 0.00032590067970357875, + "loss": 0.0248, + "step": 39600 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003254712998847426, + "loss": 0.0229, + "step": 39800 + }, + { + "epoch": 0.23, + "learning_rate": 0.00032504013293606604, + "loss": 0.0239, + "step": 40000 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003246071843460839, + "loss": 0.0208, + "step": 40200 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003241724596260105, + "loss": 0.0248, + "step": 40400 + }, + { + "epoch": 0.23, + "learning_rate": 0.00032373596430966946, + "loss": 0.0132, + "step": 40600 + }, + { + "epoch": 0.23, + "learning_rate": 0.000323297703953423, + "loss": 0.0176, + "step": 40800 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003228598886025915, + "loss": 0.0327, + "step": 41000 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003224203368588642, + "loss": 0.045, + "step": 41200 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003219768323998541, + "loss": 0.0251, + "step": 41400 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003215315852937889, + "loss": 0.0296, + "step": 41600 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003210846012084369, + "loss": 0.0327, + "step": 41800 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003206358858336769, + "loss": 0.0261, + "step": 42000 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003201854448814265, + "loss": 0.0321, + "step": 42200 + }, + { + "epoch": 0.24, + "learning_rate": 0.00031973328408556876, + "loss": 0.0228, + "step": 42400 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003192794092018796, + "loss": 0.0234, + "step": 42600 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003188238260079543, + "loss": 0.0227, + "step": 42800 + }, + { + "epoch": 0.24, + "learning_rate": 0.00031836654030313415, + "loss": 0.0187, + "step": 43000 + }, + { + "epoch": 0.25, + "learning_rate": 0.00031790985703126633, + "loss": 0.0381, + "step": 43200 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003174491922289618, + "loss": 0.0221, + "step": 43400 + }, + { + "epoch": 0.25, + "learning_rate": 0.000316986842414148, + "loss": 0.016, + "step": 43600 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003165228134723018, + "loss": 0.0217, + "step": 43800 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003160571113102746, + "loss": 0.0356, + "step": 44000 + }, + { + "epoch": 0.25, + "learning_rate": 0.00031558974185621694, + "loss": 0.0281, + "step": 44200 + }, + { + "epoch": 0.25, + "learning_rate": 0.000315120711059503, + "loss": 0.0231, + "step": 44400 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003146500248906554, + "loss": 0.0322, + "step": 44600 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003141776893412682, + "loss": 0.0247, + "step": 44800 + }, + { + "epoch": 0.26, + "learning_rate": 0.00031370371042393195, + "loss": 0.0271, + "step": 45000 + }, + { + "epoch": 0.26, + "learning_rate": 0.00031322809417215584, + "loss": 0.0184, + "step": 45200 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003127508466402918, + "loss": 0.0232, + "step": 45400 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003122719739034571, + "loss": 0.0475, + "step": 45600 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003117914820574569, + "loss": 0.0303, + "step": 45800 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003113093772187068, + "loss": 0.0394, + "step": 46000 + }, + { + "epoch": 0.26, + "learning_rate": 0.00031082566552415524, + "loss": 0.0267, + "step": 46200 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003103403531312047, + "loss": 0.0316, + "step": 46400 + }, + { + "epoch": 0.26, + "learning_rate": 0.00030985344621763415, + "loss": 0.0216, + "step": 46600 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003093649509815197, + "loss": 0.0199, + "step": 46800 + }, + { + "epoch": 0.27, + "learning_rate": 0.00030887487364115605, + "loss": 0.0231, + "step": 47000 + }, + { + "epoch": 0.27, + "learning_rate": 0.00030838322043497736, + "loss": 0.0202, + "step": 47200 + }, + { + "epoch": 0.27, + "learning_rate": 0.00030789246762956597, + "loss": 0.0283, + "step": 47400 + }, + { + "epoch": 0.27, + "learning_rate": 0.00030739768928821336, + "loss": 0.0394, + "step": 47600 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003069013538848477, + "loss": 0.0264, + "step": 47800 + }, + { + "epoch": 0.27, + "learning_rate": 0.00030640346773756506, + "loss": 0.0411, + "step": 48000 + }, + { + "epoch": 0.27, + "learning_rate": 0.00030590403718420164, + "loss": 0.036, + "step": 48200 + }, + { + "epoch": 0.27, + "learning_rate": 0.00030540306858225326, + "loss": 0.0421, + "step": 48400 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003049005683087941, + "loss": 0.0398, + "step": 48600 + }, + { + "epoch": 0.28, + "learning_rate": 0.000304396542760396, + "loss": 0.0173, + "step": 48800 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003038909983530467, + "loss": 0.0317, + "step": 49000 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003033839415220679, + "loss": 0.048, + "step": 49200 + }, + { + "epoch": 0.28, + "learning_rate": 0.00030287537872203423, + "loss": 0.0345, + "step": 49400 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003023678704574289, + "loss": 0.0316, + "step": 49600 + }, + { + "epoch": 0.28, + "learning_rate": 0.00030185632260843674, + "loss": 0.0247, + "step": 49800 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003013432882361978, + "loss": 0.0165, + "step": 50000 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003008287738713774, + "loss": 0.0267, + "step": 50200 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003003127860634806, + "loss": 0.0236, + "step": 50400 + }, + { + "epoch": 0.29, + "learning_rate": 0.00029979792229214487, + "loss": 0.0526, + "step": 50600 + }, + { + "epoch": 0.29, + "learning_rate": 0.00029927901460657474, + "loss": 0.0176, + "step": 50800 + }, + { + "epoch": 0.29, + "learning_rate": 0.0002987612586177442, + "loss": 0.0237, + "step": 51000 + }, + { + "epoch": 0.29, + "learning_rate": 0.000298239457344193, + "loss": 0.0169, + "step": 51200 + }, + { + "epoch": 0.29, + "learning_rate": 0.00029771621558824144, + "loss": 0.0421, + "step": 51400 + }, + { + "epoch": 0.29, + "learning_rate": 0.00029719154001048997, + "loss": 0.0271, + "step": 51600 + }, + { + "epoch": 0.29, + "learning_rate": 0.0002966654372897905, + "loss": 0.0225, + "step": 51800 + }, + { + "epoch": 0.3, + "learning_rate": 0.00029613791412316185, + "loss": 0.0283, + "step": 52000 + }, + { + "epoch": 0.3, + "learning_rate": 0.00029560897722570427, + "loss": 0.017, + "step": 52200 + }, + { + "epoch": 0.3, + "learning_rate": 0.00029507863333051433, + "loss": 0.0216, + "step": 52400 + }, + { + "epoch": 0.3, + "learning_rate": 0.00029454688918859875, + "loss": 0.0212, + "step": 52600 + }, + { + "epoch": 0.3, + "learning_rate": 0.00029401375156878874, + "loss": 0.0356, + "step": 52800 + }, + { + "epoch": 0.3, + "learning_rate": 0.00029347922725765375, + "loss": 0.0223, + "step": 53000 + }, + { + "epoch": 0.3, + "learning_rate": 0.0002929433230594152, + "loss": 0.0231, + "step": 53200 + }, + { + "epoch": 0.3, + "learning_rate": 0.0002924060457958596, + "loss": 0.0385, + "step": 53400 + }, + { + "epoch": 0.3, + "learning_rate": 0.000291867402306252, + "loss": 0.0194, + "step": 53600 + }, + { + "epoch": 0.31, + "learning_rate": 0.00029132739944724874, + "loss": 0.021, + "step": 53800 + }, + { + "epoch": 0.31, + "learning_rate": 0.0002907860440928105, + "loss": 0.0384, + "step": 54000 + }, + { + "epoch": 0.31, + "learning_rate": 0.00029024334313411393, + "loss": 0.0326, + "step": 54200 + }, + { + "epoch": 0.31, + "learning_rate": 0.00028969930347946533, + "loss": 0.0309, + "step": 54400 + }, + { + "epoch": 0.31, + "learning_rate": 0.00028915393205421116, + "loss": 0.0228, + "step": 54600 + }, + { + "epoch": 0.31, + "learning_rate": 0.00028860723580065116, + "loss": 0.0164, + "step": 54800 + }, + { + "epoch": 0.31, + "learning_rate": 0.0002880592216779493, + "loss": 0.0292, + "step": 55000 + }, + { + "epoch": 0.31, + "learning_rate": 0.0002875098966620452, + "loss": 0.0184, + "step": 55200 + }, + { + "epoch": 0.31, + "learning_rate": 0.0002869592677455658, + "loss": 0.0171, + "step": 55400 + }, + { + "epoch": 0.32, + "learning_rate": 0.00028640734193773564, + "loss": 0.0272, + "step": 55600 + }, + { + "epoch": 0.32, + "learning_rate": 0.0002858541262642884, + "loss": 0.0202, + "step": 55800 + }, + { + "epoch": 0.32, + "learning_rate": 0.00028529962776737674, + "loss": 0.025, + "step": 56000 + }, + { + "epoch": 0.32, + "learning_rate": 0.00028474385350548337, + "loss": 0.0232, + "step": 56200 + }, + { + "epoch": 0.32, + "learning_rate": 0.0002841868105533304, + "loss": 0.0286, + "step": 56400 + }, + { + "epoch": 0.32, + "learning_rate": 0.00028362850600179034, + "loss": 0.0246, + "step": 56600 + }, + { + "epoch": 0.32, + "learning_rate": 0.0002830689469577944, + "loss": 0.0271, + "step": 56800 + }, + { + "epoch": 0.32, + "learning_rate": 0.00028250814054424367, + "loss": 0.0216, + "step": 57000 + }, + { + "epoch": 0.32, + "learning_rate": 0.00028194890720638425, + "loss": 0.0261, + "step": 57200 + }, + { + "epoch": 0.33, + "learning_rate": 0.0002813856336334063, + "loss": 0.0328, + "step": 57400 + }, + { + "epoch": 0.33, + "learning_rate": 0.00028082113411859194, + "loss": 0.0209, + "step": 57600 + }, + { + "epoch": 0.33, + "learning_rate": 0.0002802554158477314, + "loss": 0.0266, + "step": 57800 + }, + { + "epoch": 0.33, + "learning_rate": 0.0002796884860221292, + "loss": 0.0163, + "step": 58000 + }, + { + "epoch": 0.33, + "learning_rate": 0.0002791203518585125, + "loss": 0.0302, + "step": 58200 + }, + { + "epoch": 0.33, + "learning_rate": 0.00027855102058893863, + "loss": 0.0205, + "step": 58400 + }, + { + "epoch": 0.33, + "learning_rate": 0.0002779804994607039, + "loss": 0.0227, + "step": 58600 + }, + { + "epoch": 0.33, + "learning_rate": 0.00027740879573625075, + "loss": 0.035, + "step": 58800 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002768359166930753, + "loss": 0.0255, + "step": 59000 + }, + { + "epoch": 0.34, + "learning_rate": 0.00027626186962363523, + "loss": 0.0261, + "step": 59200 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002756866618352563, + "loss": 0.0306, + "step": 59400 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002751103006500397, + "loss": 0.0106, + "step": 59600 + }, + { + "epoch": 0.34, + "learning_rate": 0.00027453279340476877, + "loss": 0.0199, + "step": 59800 + }, + { + "epoch": 0.34, + "learning_rate": 0.000273957043500945, + "loss": 0.0366, + "step": 60000 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002733772718425448, + "loss": 0.0181, + "step": 60200 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002727992834466737, + "loss": 0.0211, + "step": 60400 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002722172767484197, + "loss": 0.0309, + "step": 60600 + }, + { + "epoch": 0.35, + "learning_rate": 0.00027163416081682745, + "loss": 0.0205, + "step": 60800 + }, + { + "epoch": 0.35, + "learning_rate": 0.00027104994307466473, + "loss": 0.0422, + "step": 61000 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002704646309587249, + "loss": 0.0197, + "step": 61200 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002698782319197321, + "loss": 0.02, + "step": 61400 + }, + { + "epoch": 0.35, + "learning_rate": 0.00026929075342224635, + "loss": 0.0292, + "step": 61600 + }, + { + "epoch": 0.35, + "learning_rate": 0.00026870220294456887, + "loss": 0.0174, + "step": 61800 + }, + { + "epoch": 0.35, + "learning_rate": 0.00026811258797864644, + "loss": 0.0211, + "step": 62000 + }, + { + "epoch": 0.35, + "learning_rate": 0.00026752191602997627, + "loss": 0.0179, + "step": 62200 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002669331558226597, + "loss": 0.0243, + "step": 62400 + }, + { + "epoch": 0.36, + "learning_rate": 0.00026634039766960824, + "loss": 0.0305, + "step": 62600 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002657466050928862, + "loss": 0.0271, + "step": 62800 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002651517856511696, + "loss": 0.045, + "step": 63000 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002645559469162059, + "loss": 0.0267, + "step": 63200 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002639590964727178, + "loss": 0.03, + "step": 63400 + }, + { + "epoch": 0.36, + "learning_rate": 0.00026336124191830645, + "loss": 0.018, + "step": 63600 + }, + { + "epoch": 0.36, + "learning_rate": 0.00026276239086335485, + "loss": 0.0297, + "step": 63800 + }, + { + "epoch": 0.36, + "learning_rate": 0.00026216255093093095, + "loss": 0.0159, + "step": 64000 + }, + { + "epoch": 0.36, + "learning_rate": 0.00026156172975669046, + "loss": 0.0184, + "step": 64200 + }, + { + "epoch": 0.37, + "learning_rate": 0.00026095993498878, + "loss": 0.0272, + "step": 64400 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002603571742877395, + "loss": 0.0142, + "step": 64600 + }, + { + "epoch": 0.37, + "learning_rate": 0.00025975345532640456, + "loss": 0.0188, + "step": 64800 + }, + { + "epoch": 0.37, + "learning_rate": 0.00025915181148932056, + "loss": 0.0468, + "step": 65000 + }, + { + "epoch": 0.37, + "learning_rate": 0.00025854620376982297, + "loss": 0.0152, + "step": 65200 + }, + { + "epoch": 0.37, + "learning_rate": 0.00025793966084276023, + "loss": 0.0351, + "step": 65400 + }, + { + "epoch": 0.37, + "learning_rate": 0.00025733219042911403, + "loss": 0.0259, + "step": 65600 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002567238002616722, + "loss": 0.0349, + "step": 65800 + }, + { + "epoch": 0.37, + "learning_rate": 0.00025611449808493066, + "loss": 0.0122, + "step": 66000 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002555042916549949, + "loss": 0.0207, + "step": 66200 + }, + { + "epoch": 0.38, + "learning_rate": 0.00025489318873948087, + "loss": 0.0338, + "step": 66400 + }, + { + "epoch": 0.38, + "learning_rate": 0.00025428119711741644, + "loss": 0.0264, + "step": 66600 + }, + { + "epoch": 0.38, + "learning_rate": 0.00025366832457914223, + "loss": 0.0211, + "step": 66800 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002530545789262125, + "loss": 0.026, + "step": 67000 + }, + { + "epoch": 0.38, + "learning_rate": 0.00025243996797129576, + "loss": 0.0384, + "step": 67200 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002518244995380754, + "loss": 0.0242, + "step": 67400 + }, + { + "epoch": 0.38, + "learning_rate": 0.00025120818146115014, + "loss": 0.0185, + "step": 67600 + }, + { + "epoch": 0.38, + "learning_rate": 0.00025059102158593404, + "loss": 0.0151, + "step": 67800 + }, + { + "epoch": 0.39, + "learning_rate": 0.00024997302776855716, + "loss": 0.016, + "step": 68000 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002493542078757648, + "loss": 0.0322, + "step": 68200 + }, + { + "epoch": 0.39, + "learning_rate": 0.00024873456978481814, + "loss": 0.0201, + "step": 68400 + }, + { + "epoch": 0.39, + "learning_rate": 0.00024811412138339326, + "loss": 0.0296, + "step": 68600 + }, + { + "epoch": 0.39, + "learning_rate": 0.00024749287056948145, + "loss": 0.0329, + "step": 68800 + }, + { + "epoch": 0.39, + "learning_rate": 0.000246870825251288, + "loss": 0.0136, + "step": 69000 + }, + { + "epoch": 0.39, + "learning_rate": 0.00024624799334713204, + "loss": 0.0267, + "step": 69200 + }, + { + "epoch": 0.39, + "learning_rate": 0.00024562438278534536, + "loss": 0.02, + "step": 69400 + }, + { + "epoch": 0.4, + "learning_rate": 0.00024500000150417183, + "loss": 0.027, + "step": 69600 + }, + { + "epoch": 0.4, + "learning_rate": 0.00024437485745166604, + "loss": 0.0287, + "step": 69800 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002437489585855924, + "loss": 0.0213, + "step": 70000 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002431223128733236, + "loss": 0.027, + "step": 70200 + }, + { + "epoch": 0.4, + "learning_rate": 0.00024249492829173943, + "loss": 0.0204, + "step": 70400 + }, + { + "epoch": 0.4, + "learning_rate": 0.00024186681282712484, + "loss": 0.0226, + "step": 70600 + }, + { + "epoch": 0.4, + "learning_rate": 0.00024123797447506894, + "loss": 0.0235, + "step": 70800 + }, + { + "epoch": 0.4, + "learning_rate": 0.00024060842124036243, + "loss": 0.022, + "step": 71000 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002399781611368965, + "loss": 0.015, + "step": 71200 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002393503587074034, + "loss": 0.02, + "step": 71400 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002387187123780583, + "loss": 0.0172, + "step": 71600 + }, + { + "epoch": 0.41, + "learning_rate": 0.00023808638323498182, + "loss": 0.0184, + "step": 71800 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002374533793274009, + "loss": 0.022, + "step": 72000 + }, + { + "epoch": 0.41, + "learning_rate": 0.00023681970871313178, + "loss": 0.0163, + "step": 72200 + }, + { + "epoch": 0.41, + "learning_rate": 0.00023618537945847764, + "loss": 0.042, + "step": 72400 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002355503996381257, + "loss": 0.0249, + "step": 72600 + }, + { + "epoch": 0.41, + "learning_rate": 0.00023491477733504463, + "loss": 0.0392, + "step": 72800 + }, + { + "epoch": 0.41, + "learning_rate": 0.00023427852064038156, + "loss": 0.0206, + "step": 73000 + }, + { + "epoch": 0.42, + "learning_rate": 0.00023364482361278884, + "loss": 0.0196, + "step": 73200 + }, + { + "epoch": 0.42, + "learning_rate": 0.00023300732551134807, + "loss": 0.0273, + "step": 73400 + }, + { + "epoch": 0.42, + "learning_rate": 0.00023236921729921222, + "loss": 0.0197, + "step": 73600 + }, + { + "epoch": 0.42, + "learning_rate": 0.00023173370213417187, + "loss": 0.0247, + "step": 73800 + }, + { + "epoch": 0.42, + "learning_rate": 0.00023109440102573372, + "loss": 0.0279, + "step": 74000 + }, + { + "epoch": 0.42, + "learning_rate": 0.00023045451415715175, + "loss": 0.0236, + "step": 74200 + }, + { + "epoch": 0.42, + "learning_rate": 0.00022981404967385886, + "loss": 0.0183, + "step": 74400 + }, + { + "epoch": 0.42, + "learning_rate": 0.00022917301572864066, + "loss": 0.0285, + "step": 74600 + }, + { + "epoch": 0.42, + "learning_rate": 0.00022853142048153175, + "loss": 0.0298, + "step": 74800 + }, + { + "epoch": 0.43, + "learning_rate": 0.00022788927209971169, + "loss": 0.0202, + "step": 75000 + }, + { + "epoch": 0.43, + "learning_rate": 0.00022724657875740128, + "loss": 0.0198, + "step": 75200 + }, + { + "epoch": 0.43, + "learning_rate": 0.00022660334863575842, + "loss": 0.0297, + "step": 75400 + }, + { + "epoch": 0.43, + "learning_rate": 0.00022595958992277377, + "loss": 0.0167, + "step": 75600 + }, + { + "epoch": 0.43, + "learning_rate": 0.00022531531081316684, + "loss": 0.0184, + "step": 75800 + }, + { + "epoch": 0.43, + "learning_rate": 0.00022467051950828147, + "loss": 0.0282, + "step": 76000 + }, + { + "epoch": 0.43, + "learning_rate": 0.00022402845193252637, + "loss": 0.0345, + "step": 76200 + }, + { + "epoch": 0.43, + "learning_rate": 0.00022338266332551338, + "loss": 0.0173, + "step": 76400 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002227363871248368, + "loss": 0.0243, + "step": 76600 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002220896315572626, + "loss": 0.0166, + "step": 76800 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002214424048556585, + "loss": 0.0257, + "step": 77000 + }, + { + "epoch": 0.44, + "learning_rate": 0.00022079471525888992, + "loss": 0.0477, + "step": 77200 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002201465710117142, + "loss": 0.0205, + "step": 77400 + }, + { + "epoch": 0.44, + "learning_rate": 0.00021949798036467665, + "loss": 0.0275, + "step": 77600 + }, + { + "epoch": 0.44, + "learning_rate": 0.00021884895157400457, + "loss": 0.0196, + "step": 77800 + }, + { + "epoch": 0.44, + "learning_rate": 0.000218199492901503, + "loss": 0.0342, + "step": 78000 + }, + { + "epoch": 0.44, + "learning_rate": 0.00021754961261444885, + "loss": 0.0219, + "step": 78200 + }, + { + "epoch": 0.45, + "learning_rate": 0.00021689931898548614, + "loss": 0.017, + "step": 78400 + }, + { + "epoch": 0.45, + "learning_rate": 0.00021624862029252044, + "loss": 0.0263, + "step": 78600 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002155975248186137, + "loss": 0.025, + "step": 78800 + }, + { + "epoch": 0.45, + "learning_rate": 0.00021494604085187845, + "loss": 0.0171, + "step": 79000 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002142941766853728, + "loss": 0.0205, + "step": 79200 + }, + { + "epoch": 0.45, + "learning_rate": 0.00021364194061699446, + "loss": 0.0162, + "step": 79400 + }, + { + "epoch": 0.45, + "learning_rate": 0.00021298934094937536, + "loss": 0.0238, + "step": 79600 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002123363859897756, + "loss": 0.0239, + "step": 79800 + }, + { + "epoch": 0.45, + "learning_rate": 0.00021168308404997838, + "loss": 0.0139, + "step": 80000 + }, + { + "epoch": 0.46, + "learning_rate": 0.00021102944344618345, + "loss": 0.0305, + "step": 80200 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002103754724989017, + "loss": 0.0228, + "step": 80400 + }, + { + "epoch": 0.46, + "learning_rate": 0.00020972117953284915, + "loss": 0.0379, + "step": 80600 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002090665728768409, + "loss": 0.0176, + "step": 80800 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002084116608636852, + "loss": 0.0186, + "step": 81000 + }, + { + "epoch": 0.46, + "learning_rate": 0.00020775645183007728, + "loss": 0.019, + "step": 81200 + }, + { + "epoch": 0.46, + "learning_rate": 0.00020710095411649338, + "loss": 0.0251, + "step": 81400 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002064451760670844, + "loss": 0.0229, + "step": 81600 + }, + { + "epoch": 0.46, + "learning_rate": 0.00020578912602956987, + "loss": 0.0303, + "step": 81800 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002051360945654819, + "loss": 0.017, + "step": 82000 + }, + { + "epoch": 0.47, + "learning_rate": 0.00020447952686428194, + "loss": 0.0196, + "step": 82200 + }, + { + "epoch": 0.47, + "learning_rate": 0.00020382271219668632, + "loss": 0.0212, + "step": 82400 + }, + { + "epoch": 0.47, + "learning_rate": 0.00020316565892361012, + "loss": 0.0194, + "step": 82600 + }, + { + "epoch": 0.47, + "learning_rate": 0.00020250837540900578, + "loss": 0.0299, + "step": 82800 + }, + { + "epoch": 0.47, + "learning_rate": 0.00020185087001975654, + "loss": 0.0184, + "step": 83000 + }, + { + "epoch": 0.47, + "learning_rate": 0.00020119315112557005, + "loss": 0.033, + "step": 83200 + }, + { + "epoch": 0.47, + "learning_rate": 0.00020053522709887175, + "loss": 0.0207, + "step": 83400 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019987710631469828, + "loss": 0.0141, + "step": 83600 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019921879715059093, + "loss": 0.0225, + "step": 83800 + }, + { + "epoch": 0.48, + "learning_rate": 0.000198560307986489, + "loss": 0.0263, + "step": 84000 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019790164720462304, + "loss": 0.0284, + "step": 84200 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019724282318940825, + "loss": 0.016, + "step": 84400 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019658384432733769, + "loss": 0.0394, + "step": 84600 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001959247190068755, + "loss": 0.0208, + "step": 84800 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019526545561835023, + "loss": 0.0199, + "step": 85000 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019460606255384803, + "loss": 0.0188, + "step": 85200 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019394654820710546, + "loss": 0.0246, + "step": 85400 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001932869209734034, + "loss": 0.0386, + "step": 85600 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019262718924945921, + "loss": 0.0336, + "step": 85800 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019197066079753742, + "loss": 0.0226, + "step": 86000 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001913107457060452, + "loss": 0.014, + "step": 86200 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019065075128001235, + "loss": 0.0251, + "step": 86400 + }, + { + "epoch": 0.49, + "learning_rate": 0.00018999068592083065, + "loss": 0.0188, + "step": 86600 + }, + { + "epoch": 0.49, + "learning_rate": 0.00018933055803079484, + "loss": 0.0139, + "step": 86800 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001886703760129956, + "loss": 0.0213, + "step": 87000 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001880101482712127, + "loss": 0.0322, + "step": 87200 + }, + { + "epoch": 0.5, + "learning_rate": 0.00018734988320980793, + "loss": 0.0223, + "step": 87400 + }, + { + "epoch": 0.5, + "learning_rate": 0.00018668958923361806, + "loss": 0.0261, + "step": 87600 + }, + { + "epoch": 0.5, + "learning_rate": 0.00018602927474784813, + "loss": 0.019, + "step": 87800 + }, + { + "epoch": 0.5, + "learning_rate": 0.00018536894815796403, + "loss": 0.0222, + "step": 88000 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001847119195163232, + "loss": 0.0147, + "step": 88200 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001840515938906732, + "loss": 0.0206, + "step": 88400 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001833912813357758, + "loss": 0.0189, + "step": 88600 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001827309902570724, + "loss": 0.0224, + "step": 88800 + }, + { + "epoch": 0.51, + "learning_rate": 0.00018207072905973099, + "loss": 0.0197, + "step": 89000 + }, + { + "epoch": 0.51, + "learning_rate": 0.00018141050614853935, + "loss": 0.0323, + "step": 89200 + }, + { + "epoch": 0.51, + "learning_rate": 0.00018075032992779762, + "loss": 0.0138, + "step": 89400 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001800902088012118, + "loss": 0.0197, + "step": 89600 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001794334512880854, + "loss": 0.0307, + "step": 89800 + }, + { + "epoch": 0.51, + "learning_rate": 0.00017877346517762124, + "loss": 0.026, + "step": 90000 + }, + { + "epoch": 0.51, + "learning_rate": 0.00017811355932579115, + "loss": 0.0236, + "step": 90200 + }, + { + "epoch": 0.51, + "learning_rate": 0.00017745374213285934, + "loss": 0.0359, + "step": 90400 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001767940219979617, + "loss": 0.0245, + "step": 90600 + }, + { + "epoch": 0.52, + "learning_rate": 0.00017613440731899813, + "loss": 0.0333, + "step": 90800 + }, + { + "epoch": 0.52, + "learning_rate": 0.00017547490649252667, + "loss": 0.0131, + "step": 91000 + }, + { + "epoch": 0.52, + "learning_rate": 0.00017481552791365573, + "loss": 0.0221, + "step": 91200 + }, + { + "epoch": 0.52, + "learning_rate": 0.00017415627997593782, + "loss": 0.0209, + "step": 91400 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001734971710712621, + "loss": 0.0273, + "step": 91600 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001728382095897483, + "loss": 0.0262, + "step": 91800 + }, + { + "epoch": 0.52, + "learning_rate": 0.00017217940391963928, + "loss": 0.0366, + "step": 92000 + }, + { + "epoch": 0.52, + "learning_rate": 0.00017152076244719467, + "loss": 0.0265, + "step": 92200 + }, + { + "epoch": 0.52, + "learning_rate": 0.00017086229355658372, + "loss": 0.023, + "step": 92400 + }, + { + "epoch": 0.53, + "learning_rate": 0.00017020400562977906, + "loss": 0.0304, + "step": 92600 + }, + { + "epoch": 0.53, + "learning_rate": 0.00016954590704644948, + "loss": 0.017, + "step": 92800 + }, + { + "epoch": 0.53, + "learning_rate": 0.00016888800618385382, + "loss": 0.0414, + "step": 93000 + }, + { + "epoch": 0.53, + "learning_rate": 0.00016823031141673374, + "loss": 0.0462, + "step": 93200 + }, + { + "epoch": 0.53, + "learning_rate": 0.00016757611797137148, + "loss": 0.0284, + "step": 93400 + }, + { + "epoch": 0.53, + "learning_rate": 0.00016691885937382842, + "loss": 0.0256, + "step": 93600 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001662618319379937, + "loss": 0.0148, + "step": 93800 + }, + { + "epoch": 0.53, + "learning_rate": 0.00016560504402749084, + "loss": 0.0144, + "step": 94000 + }, + { + "epoch": 0.53, + "learning_rate": 0.00016494850400289434, + "loss": 0.0202, + "step": 94200 + }, + { + "epoch": 0.54, + "learning_rate": 0.00016429222022162316, + "loss": 0.0264, + "step": 94400 + }, + { + "epoch": 0.54, + "learning_rate": 0.00016363620103783448, + "loss": 0.0175, + "step": 94600 + }, + { + "epoch": 0.54, + "learning_rate": 0.00016298045480231735, + "loss": 0.0187, + "step": 94800 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001623249898623863, + "loss": 0.0153, + "step": 95000 + }, + { + "epoch": 0.54, + "learning_rate": 0.00016166981456177496, + "loss": 0.0145, + "step": 95200 + }, + { + "epoch": 0.54, + "learning_rate": 0.00016101493724053015, + "loss": 0.0196, + "step": 95400 + }, + { + "epoch": 0.54, + "learning_rate": 0.00016036036623490562, + "loss": 0.0138, + "step": 95600 + }, + { + "epoch": 0.54, + "learning_rate": 0.00015970610987725575, + "loss": 0.0189, + "step": 95800 + }, + { + "epoch": 0.55, + "learning_rate": 0.00015905217649592963, + "loss": 0.0367, + "step": 96000 + }, + { + "epoch": 0.55, + "learning_rate": 0.00015839857441516498, + "loss": 0.0134, + "step": 96200 + }, + { + "epoch": 0.55, + "learning_rate": 0.00015774857740871961, + "loss": 0.0236, + "step": 96400 + }, + { + "epoch": 0.55, + "learning_rate": 0.00015709566112445702, + "loss": 0.0288, + "step": 96600 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001564431010461969, + "loss": 0.0204, + "step": 96800 + }, + { + "epoch": 0.55, + "learning_rate": 0.00015579090548069552, + "loss": 0.0108, + "step": 97000 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001551390827300693, + "loss": 0.0264, + "step": 97200 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001544876410916887, + "loss": 0.0196, + "step": 97400 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001538365888580729, + "loss": 0.0221, + "step": 97600 + }, + { + "epoch": 0.56, + "learning_rate": 0.00015318593431678411, + "loss": 0.0236, + "step": 97800 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001525356857503223, + "loss": 0.0229, + "step": 98000 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001518858514360193, + "loss": 0.0231, + "step": 98200 + }, + { + "epoch": 0.56, + "learning_rate": 0.00015123643964593393, + "loss": 0.0162, + "step": 98400 + }, + { + "epoch": 0.56, + "learning_rate": 0.00015058745864674644, + "loss": 0.0411, + "step": 98600 + }, + { + "epoch": 0.56, + "learning_rate": 0.00014993891669965337, + "loss": 0.0197, + "step": 98800 + }, + { + "epoch": 0.56, + "learning_rate": 0.00014929082206026223, + "loss": 0.0285, + "step": 99000 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001486431829784866, + "loss": 0.0316, + "step": 99200 + }, + { + "epoch": 0.56, + "learning_rate": 0.00014799600769844118, + "loss": 0.0223, + "step": 99400 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001473493044583364, + "loss": 0.0255, + "step": 99600 + }, + { + "epoch": 0.57, + "learning_rate": 0.00014670308149037416, + "loss": 0.0137, + "step": 99800 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001460573470206426, + "loss": 0.0174, + "step": 100000 + }, + { + "epoch": 0.57, + "learning_rate": 0.00014541533420859412, + "loss": 0.0322, + "step": 100200 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001447705988435316, + "loss": 0.0253, + "step": 100400 + }, + { + "epoch": 0.57, + "learning_rate": 0.00014412637657621645, + "loss": 0.0251, + "step": 100600 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001434826756072689, + "loss": 0.0185, + "step": 100800 + }, + { + "epoch": 0.57, + "learning_rate": 0.00014283950413067326, + "loss": 0.0184, + "step": 101000 + }, + { + "epoch": 0.57, + "learning_rate": 0.00014219687033367387, + "loss": 0.0137, + "step": 101200 + }, + { + "epoch": 0.58, + "learning_rate": 0.00014155478239667057, + "loss": 0.0191, + "step": 101400 + }, + { + "epoch": 0.58, + "learning_rate": 0.00014091324849311473, + "loss": 0.0234, + "step": 101600 + }, + { + "epoch": 0.58, + "learning_rate": 0.000140272276789405, + "loss": 0.0199, + "step": 101800 + }, + { + "epoch": 0.58, + "learning_rate": 0.00013963187544478376, + "loss": 0.0141, + "step": 102000 + }, + { + "epoch": 0.58, + "learning_rate": 0.00013899205261123283, + "loss": 0.0207, + "step": 102200 + }, + { + "epoch": 0.58, + "learning_rate": 0.00013835281643337, + "loss": 0.0242, + "step": 102400 + }, + { + "epoch": 0.58, + "learning_rate": 0.00013771417504834503, + "loss": 0.031, + "step": 102600 + }, + { + "epoch": 0.58, + "learning_rate": 0.00013707613658573656, + "loss": 0.0239, + "step": 102800 + }, + { + "epoch": 0.58, + "learning_rate": 0.00013643870916744814, + "loss": 0.0238, + "step": 103000 + }, + { + "epoch": 0.59, + "learning_rate": 0.00013580190090760512, + "loss": 0.0349, + "step": 103200 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001351657199124511, + "loss": 0.0177, + "step": 103400 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001345301742802452, + "loss": 0.0139, + "step": 103600 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001338952721011585, + "loss": 0.018, + "step": 103800 + }, + { + "epoch": 0.59, + "learning_rate": 0.00013326102145717149, + "loss": 0.0179, + "step": 104000 + }, + { + "epoch": 0.59, + "learning_rate": 0.00013262743042197046, + "loss": 0.0218, + "step": 104200 + }, + { + "epoch": 0.59, + "learning_rate": 0.00013199450706084573, + "loss": 0.0117, + "step": 104400 + }, + { + "epoch": 0.59, + "learning_rate": 0.00013136225943058828, + "loss": 0.0231, + "step": 104600 + }, + { + "epoch": 0.6, + "learning_rate": 0.00013073069557938726, + "loss": 0.031, + "step": 104800 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001300998235467278, + "loss": 0.0206, + "step": 105000 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001294728004700576, + "loss": 0.0213, + "step": 105200 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001288433325983142, + "loss": 0.0407, + "step": 105400 + }, + { + "epoch": 0.6, + "learning_rate": 0.00012821458057027873, + "loss": 0.0225, + "step": 105600 + }, + { + "epoch": 0.6, + "learning_rate": 0.00012758969071674774, + "loss": 0.0235, + "step": 105800 + }, + { + "epoch": 0.6, + "learning_rate": 0.00012696239069890963, + "loss": 0.0212, + "step": 106000 + }, + { + "epoch": 0.6, + "learning_rate": 0.00012633583046820873, + "loss": 0.0328, + "step": 106200 + }, + { + "epoch": 0.6, + "learning_rate": 0.00012571001800043652, + "loss": 0.0278, + "step": 106400 + }, + { + "epoch": 0.61, + "learning_rate": 0.000125084961261866, + "loss": 0.0158, + "step": 106600 + }, + { + "epoch": 0.61, + "learning_rate": 0.00012446066820914994, + "loss": 0.0146, + "step": 106800 + }, + { + "epoch": 0.61, + "learning_rate": 0.00012383714678922, + "loss": 0.0284, + "step": 107000 + }, + { + "epoch": 0.61, + "learning_rate": 0.00012321440493918523, + "loss": 0.0149, + "step": 107200 + }, + { + "epoch": 0.61, + "learning_rate": 0.00012259245058623115, + "loss": 0.0138, + "step": 107400 + }, + { + "epoch": 0.61, + "learning_rate": 0.00012197129164751876, + "loss": 0.02, + "step": 107600 + }, + { + "epoch": 0.61, + "learning_rate": 0.00012135093603008409, + "loss": 0.0187, + "step": 107800 + }, + { + "epoch": 0.61, + "learning_rate": 0.00012073139163073704, + "loss": 0.0233, + "step": 108000 + }, + { + "epoch": 0.61, + "learning_rate": 0.00012011266633596143, + "loss": 0.0244, + "step": 108200 + }, + { + "epoch": 0.62, + "learning_rate": 0.00011949476802181382, + "loss": 0.0282, + "step": 108400 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001188777045538242, + "loss": 0.0217, + "step": 108600 + }, + { + "epoch": 0.62, + "learning_rate": 0.00011826456278153069, + "loss": 0.0214, + "step": 108800 + }, + { + "epoch": 0.62, + "learning_rate": 0.000117649188287617, + "loss": 0.0245, + "step": 109000 + }, + { + "epoch": 0.62, + "learning_rate": 0.00011703467213314812, + "loss": 0.018, + "step": 109200 + }, + { + "epoch": 0.62, + "learning_rate": 0.00011642102214060081, + "loss": 0.0136, + "step": 109400 + }, + { + "epoch": 0.62, + "learning_rate": 0.00011580824612142588, + "loss": 0.0274, + "step": 109600 + }, + { + "epoch": 0.62, + "learning_rate": 0.00011519635187594886, + "loss": 0.0316, + "step": 109800 + }, + { + "epoch": 0.62, + "learning_rate": 0.00011458534719327081, + "loss": 0.0195, + "step": 110000 + }, + { + "epoch": 0.63, + "learning_rate": 0.00011397523985116925, + "loss": 0.0194, + "step": 110200 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001133660376159988, + "loss": 0.0417, + "step": 110400 + }, + { + "epoch": 0.63, + "learning_rate": 0.00011275774824259256, + "loss": 0.0226, + "step": 110600 + }, + { + "epoch": 0.63, + "learning_rate": 0.00011215037947416353, + "loss": 0.0247, + "step": 110800 + }, + { + "epoch": 0.63, + "learning_rate": 0.00011154393904220578, + "loss": 0.0123, + "step": 111000 + }, + { + "epoch": 0.63, + "learning_rate": 0.00011093843466639602, + "loss": 0.0215, + "step": 111200 + }, + { + "epoch": 0.63, + "learning_rate": 0.00011033387405449557, + "loss": 0.0169, + "step": 111400 + }, + { + "epoch": 0.63, + "learning_rate": 0.00010973026490225217, + "loss": 0.0335, + "step": 111600 + }, + { + "epoch": 0.63, + "learning_rate": 0.00010912761489330187, + "loss": 0.021, + "step": 111800 + }, + { + "epoch": 0.64, + "learning_rate": 0.00010852593169907127, + "loss": 0.0212, + "step": 112000 + }, + { + "epoch": 0.64, + "learning_rate": 0.00010792522297867997, + "loss": 0.0251, + "step": 112200 + }, + { + "epoch": 0.64, + "learning_rate": 0.00010732549637884315, + "loss": 0.0195, + "step": 112400 + }, + { + "epoch": 0.64, + "learning_rate": 0.00010672975074337141, + "loss": 0.0154, + "step": 112600 + }, + { + "epoch": 0.64, + "learning_rate": 0.00010613200626886399, + "loss": 0.0161, + "step": 112800 + }, + { + "epoch": 0.64, + "learning_rate": 0.00010553526674164345, + "loss": 0.022, + "step": 113000 + }, + { + "epoch": 0.64, + "learning_rate": 0.00010493953975789901, + "loss": 0.0183, + "step": 113200 + }, + { + "epoch": 0.64, + "learning_rate": 0.00010434483290093065, + "loss": 0.0301, + "step": 113400 + }, + { + "epoch": 0.65, + "learning_rate": 0.00010375115374105277, + "loss": 0.0272, + "step": 113600 + }, + { + "epoch": 0.65, + "learning_rate": 0.00010315850983549783, + "loss": 0.0169, + "step": 113800 + }, + { + "epoch": 0.65, + "learning_rate": 0.00010256690872831991, + "loss": 0.0258, + "step": 114000 + }, + { + "epoch": 0.65, + "learning_rate": 0.00010197635795029873, + "loss": 0.0339, + "step": 114200 + }, + { + "epoch": 0.65, + "learning_rate": 0.00010138686501884381, + "loss": 0.0213, + "step": 114400 + }, + { + "epoch": 0.65, + "learning_rate": 0.00010079843743789918, + "loss": 0.0287, + "step": 114600 + }, + { + "epoch": 0.65, + "learning_rate": 0.00010021401679048213, + "loss": 0.0281, + "step": 114800 + }, + { + "epoch": 0.65, + "learning_rate": 9.962773694788469e-05, + "loss": 0.029, + "step": 115000 + }, + { + "epoch": 0.65, + "learning_rate": 9.904254484859887e-05, + "loss": 0.028, + "step": 115200 + }, + { + "epoch": 0.66, + "learning_rate": 9.845844794182107e-05, + "loss": 0.0131, + "step": 115400 + }, + { + "epoch": 0.66, + "learning_rate": 9.787545366280647e-05, + "loss": 0.0328, + "step": 115600 + }, + { + "epoch": 0.66, + "learning_rate": 9.729356943277424e-05, + "loss": 0.0271, + "step": 115800 + }, + { + "epoch": 0.66, + "learning_rate": 9.67128026588135e-05, + "loss": 0.0318, + "step": 116000 + }, + { + "epoch": 0.66, + "learning_rate": 9.613316073378832e-05, + "loss": 0.0234, + "step": 116200 + }, + { + "epoch": 0.66, + "learning_rate": 9.555465103624428e-05, + "loss": 0.025, + "step": 116400 + }, + { + "epoch": 0.66, + "learning_rate": 9.497728093031412e-05, + "loss": 0.0344, + "step": 116600 + }, + { + "epoch": 0.66, + "learning_rate": 9.440105776562451e-05, + "loss": 0.0164, + "step": 116800 + }, + { + "epoch": 0.66, + "learning_rate": 9.382598887720169e-05, + "loss": 0.0298, + "step": 117000 + }, + { + "epoch": 0.67, + "learning_rate": 9.325208158537876e-05, + "loss": 0.024, + "step": 117200 + }, + { + "epoch": 0.67, + "learning_rate": 9.267934319570226e-05, + "loss": 0.0153, + "step": 117400 + }, + { + "epoch": 0.67, + "learning_rate": 9.210778099883943e-05, + "loss": 0.0135, + "step": 117600 + }, + { + "epoch": 0.67, + "learning_rate": 9.153740227048476e-05, + "loss": 0.0168, + "step": 117800 + }, + { + "epoch": 0.67, + "learning_rate": 9.097105723733374e-05, + "loss": 0.0126, + "step": 118000 + }, + { + "epoch": 0.67, + "learning_rate": 9.040589819318035e-05, + "loss": 0.0195, + "step": 118200 + }, + { + "epoch": 0.67, + "learning_rate": 8.983910128561952e-05, + "loss": 0.0322, + "step": 118400 + }, + { + "epoch": 0.67, + "learning_rate": 8.92735167257004e-05, + "loss": 0.0205, + "step": 118600 + }, + { + "epoch": 0.67, + "learning_rate": 8.870915171302544e-05, + "loss": 0.0218, + "step": 118800 + }, + { + "epoch": 0.68, + "learning_rate": 8.814601343167284e-05, + "loss": 0.0164, + "step": 119000 + }, + { + "epoch": 0.68, + "learning_rate": 8.758410905010516e-05, + "loss": 0.0244, + "step": 119200 + }, + { + "epoch": 0.68, + "learning_rate": 8.702344572107807e-05, + "loss": 0.0099, + "step": 119400 + }, + { + "epoch": 0.68, + "learning_rate": 8.646403058154925e-05, + "loss": 0.0237, + "step": 119600 + }, + { + "epoch": 0.68, + "learning_rate": 8.590587075258757e-05, + "loss": 0.0274, + "step": 119800 + }, + { + "epoch": 0.68, + "learning_rate": 8.534897333928242e-05, + "loss": 0.0172, + "step": 120000 + }, + { + "epoch": 0.68, + "learning_rate": 8.479334543065332e-05, + "loss": 0.0254, + "step": 120200 + }, + { + "epoch": 0.68, + "learning_rate": 8.423899409955962e-05, + "loss": 0.0226, + "step": 120400 + }, + { + "epoch": 0.68, + "learning_rate": 8.368592640261049e-05, + "loss": 0.0171, + "step": 120600 + }, + { + "epoch": 0.69, + "learning_rate": 8.313414938007512e-05, + "loss": 0.0276, + "step": 120800 + }, + { + "epoch": 0.69, + "learning_rate": 8.258367005579311e-05, + "loss": 0.0146, + "step": 121000 + }, + { + "epoch": 0.69, + "learning_rate": 8.203449543708476e-05, + "loss": 0.0269, + "step": 121200 + }, + { + "epoch": 0.69, + "learning_rate": 8.148663251466253e-05, + "loss": 0.0174, + "step": 121400 + }, + { + "epoch": 0.69, + "learning_rate": 8.094008826254145e-05, + "loss": 0.0158, + "step": 121600 + }, + { + "epoch": 0.69, + "learning_rate": 8.039486963795052e-05, + "loss": 0.0353, + "step": 121800 + }, + { + "epoch": 0.69, + "learning_rate": 7.985098358124426e-05, + "loss": 0.0342, + "step": 122000 + }, + { + "epoch": 0.69, + "learning_rate": 7.930843701581424e-05, + "loss": 0.0251, + "step": 122200 + }, + { + "epoch": 0.7, + "learning_rate": 7.8767236848001e-05, + "loss": 0.0154, + "step": 122400 + }, + { + "epoch": 0.7, + "learning_rate": 7.822738996700614e-05, + "loss": 0.031, + "step": 122600 + }, + { + "epoch": 0.7, + "learning_rate": 7.768890324480457e-05, + "loss": 0.0205, + "step": 122800 + }, + { + "epoch": 0.7, + "learning_rate": 7.715178353605712e-05, + "loss": 0.012, + "step": 123000 + }, + { + "epoch": 0.7, + "learning_rate": 7.661603767802323e-05, + "loss": 0.012, + "step": 123200 + }, + { + "epoch": 0.7, + "learning_rate": 7.60816724904739e-05, + "loss": 0.0198, + "step": 123400 + }, + { + "epoch": 0.7, + "learning_rate": 7.55486947756049e-05, + "loss": 0.0189, + "step": 123600 + }, + { + "epoch": 0.7, + "learning_rate": 7.501711131795021e-05, + "loss": 0.0162, + "step": 123800 + }, + { + "epoch": 0.7, + "learning_rate": 7.448692888429562e-05, + "loss": 0.0146, + "step": 124000 + }, + { + "epoch": 0.71, + "learning_rate": 7.395815422359255e-05, + "loss": 0.016, + "step": 124200 + }, + { + "epoch": 0.71, + "learning_rate": 7.343342733796512e-05, + "loss": 0.0168, + "step": 124400 + }, + { + "epoch": 0.71, + "learning_rate": 7.290748127549915e-05, + "loss": 0.0294, + "step": 124600 + }, + { + "epoch": 0.71, + "learning_rate": 7.238296309154575e-05, + "loss": 0.0112, + "step": 124800 + }, + { + "epoch": 0.71, + "learning_rate": 7.185987946295322e-05, + "loss": 0.013, + "step": 125000 + }, + { + "epoch": 0.71, + "learning_rate": 7.133823704830904e-05, + "loss": 0.0237, + "step": 125200 + }, + { + "epoch": 0.71, + "learning_rate": 7.081804248785451e-05, + "loss": 0.0247, + "step": 125400 + }, + { + "epoch": 0.71, + "learning_rate": 7.029930240340067e-05, + "loss": 0.0225, + "step": 125600 + }, + { + "epoch": 0.71, + "learning_rate": 6.978202339824351e-05, + "loss": 0.028, + "step": 125800 + }, + { + "epoch": 0.72, + "learning_rate": 6.926621205708063e-05, + "loss": 0.0234, + "step": 126000 + }, + { + "epoch": 0.72, + "learning_rate": 6.875187494592678e-05, + "loss": 0.0175, + "step": 126200 + }, + { + "epoch": 0.72, + "learning_rate": 6.82390186120306e-05, + "loss": 0.0198, + "step": 126400 + }, + { + "epoch": 0.72, + "learning_rate": 6.773020271848358e-05, + "loss": 0.0233, + "step": 126600 + }, + { + "epoch": 0.72, + "learning_rate": 6.722032002013071e-05, + "loss": 0.0213, + "step": 126800 + }, + { + "epoch": 0.72, + "learning_rate": 6.6711937594947e-05, + "loss": 0.0209, + "step": 127000 + }, + { + "epoch": 0.72, + "learning_rate": 6.620506191438099e-05, + "loss": 0.0329, + "step": 127200 + }, + { + "epoch": 0.72, + "learning_rate": 6.569969943070103e-05, + "loss": 0.0224, + "step": 127400 + }, + { + "epoch": 0.72, + "learning_rate": 6.51958565769135e-05, + "loss": 0.0301, + "step": 127600 + }, + { + "epoch": 0.73, + "learning_rate": 6.469604754411272e-05, + "loss": 0.0303, + "step": 127800 + }, + { + "epoch": 0.73, + "learning_rate": 6.41952554936078e-05, + "loss": 0.0163, + "step": 128000 + }, + { + "epoch": 0.73, + "learning_rate": 6.369600222379772e-05, + "loss": 0.0162, + "step": 128200 + }, + { + "epoch": 0.73, + "learning_rate": 6.319829408992151e-05, + "loss": 0.0233, + "step": 128400 + }, + { + "epoch": 0.73, + "learning_rate": 6.27021374275494e-05, + "loss": 0.0273, + "step": 128600 + }, + { + "epoch": 0.73, + "learning_rate": 6.220753855250208e-05, + "loss": 0.0312, + "step": 128800 + }, + { + "epoch": 0.73, + "learning_rate": 6.171450376077071e-05, + "loss": 0.0216, + "step": 129000 + }, + { + "epoch": 0.73, + "learning_rate": 6.122303932843605e-05, + "loss": 0.0358, + "step": 129200 + }, + { + "epoch": 0.73, + "learning_rate": 6.073315151158924e-05, + "loss": 0.0106, + "step": 129400 + }, + { + "epoch": 0.74, + "learning_rate": 6.0244846546251834e-05, + "loss": 0.0101, + "step": 129600 + }, + { + "epoch": 0.74, + "learning_rate": 5.9758130648296665e-05, + "loss": 0.0247, + "step": 129800 + }, + { + "epoch": 0.74, + "learning_rate": 5.927301001336826e-05, + "loss": 0.0157, + "step": 130000 + }, + { + "epoch": 0.74, + "learning_rate": 5.878949081680443e-05, + "loss": 0.0219, + "step": 130200 + }, + { + "epoch": 0.74, + "learning_rate": 5.830998476252924e-05, + "loss": 0.0217, + "step": 130400 + }, + { + "epoch": 0.74, + "learning_rate": 5.7829678803223054e-05, + "loss": 0.0145, + "step": 130600 + }, + { + "epoch": 0.74, + "learning_rate": 5.735099265515025e-05, + "loss": 0.0225, + "step": 130800 + }, + { + "epoch": 0.74, + "learning_rate": 5.687393241174086e-05, + "loss": 0.0211, + "step": 131000 + }, + { + "epoch": 0.75, + "learning_rate": 5.639850414572804e-05, + "loss": 0.0275, + "step": 131200 + }, + { + "epoch": 0.75, + "learning_rate": 5.5924713909070656e-05, + "loss": 0.0171, + "step": 131400 + }, + { + "epoch": 0.75, + "learning_rate": 5.545256773287633e-05, + "loss": 0.0211, + "step": 131600 + }, + { + "epoch": 0.75, + "learning_rate": 5.498207162732463e-05, + "loss": 0.0194, + "step": 131800 + }, + { + "epoch": 0.75, + "learning_rate": 5.451323158159054e-05, + "loss": 0.0235, + "step": 132000 + }, + { + "epoch": 0.75, + "learning_rate": 5.4046053563768266e-05, + "loss": 0.0229, + "step": 132200 + }, + { + "epoch": 0.75, + "learning_rate": 5.358054352079529e-05, + "loss": 0.0073, + "step": 132400 + }, + { + "epoch": 0.75, + "learning_rate": 5.311670737837655e-05, + "loss": 0.0174, + "step": 132600 + }, + { + "epoch": 0.75, + "learning_rate": 5.265455104090913e-05, + "loss": 0.0173, + "step": 132800 + }, + { + "epoch": 0.76, + "learning_rate": 5.2194080391407055e-05, + "loss": 0.0173, + "step": 133000 + }, + { + "epoch": 0.76, + "learning_rate": 5.173530129142639e-05, + "loss": 0.0188, + "step": 133200 + }, + { + "epoch": 0.76, + "learning_rate": 5.127821958099065e-05, + "loss": 0.0264, + "step": 133400 + }, + { + "epoch": 0.76, + "learning_rate": 5.082284107851646e-05, + "loss": 0.0263, + "step": 133600 + }, + { + "epoch": 0.76, + "learning_rate": 5.036917158073942e-05, + "loss": 0.0139, + "step": 133800 + }, + { + "epoch": 0.76, + "learning_rate": 4.991721686264047e-05, + "loss": 0.0184, + "step": 134000 + }, + { + "epoch": 0.76, + "learning_rate": 4.946698267737218e-05, + "loss": 0.0207, + "step": 134200 + }, + { + "epoch": 0.76, + "learning_rate": 4.901847475618568e-05, + "loss": 0.0242, + "step": 134400 + }, + { + "epoch": 0.76, + "learning_rate": 4.857169880835763e-05, + "loss": 0.0192, + "step": 134600 + }, + { + "epoch": 0.77, + "learning_rate": 4.812666052111755e-05, + "loss": 0.0148, + "step": 134800 + }, + { + "epoch": 0.77, + "learning_rate": 4.7687789861181634e-05, + "loss": 0.0365, + "step": 135000 + }, + { + "epoch": 0.77, + "learning_rate": 4.724622635071022e-05, + "loss": 0.0186, + "step": 135200 + }, + { + "epoch": 0.77, + "learning_rate": 4.6806417373413885e-05, + "loss": 0.0225, + "step": 135400 + }, + { + "epoch": 0.77, + "learning_rate": 4.6368368527836036e-05, + "loss": 0.0176, + "step": 135600 + }, + { + "epoch": 0.77, + "learning_rate": 4.5932085390114806e-05, + "loss": 0.0263, + "step": 135800 + }, + { + "epoch": 0.77, + "learning_rate": 4.549757351391151e-05, + "loss": 0.0364, + "step": 136000 + }, + { + "epoch": 0.77, + "learning_rate": 4.506483843034039e-05, + "loss": 0.033, + "step": 136200 + }, + { + "epoch": 0.77, + "learning_rate": 4.463388564789776e-05, + "loss": 0.0167, + "step": 136400 + }, + { + "epoch": 0.78, + "learning_rate": 4.420472065239248e-05, + "loss": 0.0108, + "step": 136600 + }, + { + "epoch": 0.78, + "learning_rate": 4.377734890687561e-05, + "loss": 0.022, + "step": 136800 + }, + { + "epoch": 0.78, + "learning_rate": 4.335177585157113e-05, + "loss": 0.0164, + "step": 137000 + }, + { + "epoch": 0.78, + "learning_rate": 4.2928006903806404e-05, + "loss": 0.0185, + "step": 137200 + }, + { + "epoch": 0.78, + "learning_rate": 4.2508152745142374e-05, + "loss": 0.0273, + "step": 137400 + }, + { + "epoch": 0.78, + "learning_rate": 4.208799908482074e-05, + "loss": 0.0141, + "step": 137600 + }, + { + "epoch": 0.78, + "learning_rate": 4.166966561927144e-05, + "loss": 0.0173, + "step": 137800 + }, + { + "epoch": 0.78, + "learning_rate": 4.1253157673665675e-05, + "loss": 0.02, + "step": 138000 + }, + { + "epoch": 0.78, + "learning_rate": 4.083848054993692e-05, + "loss": 0.0276, + "step": 138200 + }, + { + "epoch": 0.79, + "learning_rate": 4.042563952671287e-05, + "loss": 0.0162, + "step": 138400 + }, + { + "epoch": 0.79, + "learning_rate": 4.0014639859248885e-05, + "loss": 0.0213, + "step": 138600 + }, + { + "epoch": 0.79, + "learning_rate": 3.960548677936065e-05, + "loss": 0.0228, + "step": 138800 + }, + { + "epoch": 0.79, + "learning_rate": 3.9198185495357965e-05, + "loss": 0.0224, + "step": 139000 + }, + { + "epoch": 0.79, + "learning_rate": 3.879274119197787e-05, + "loss": 0.0195, + "step": 139200 + }, + { + "epoch": 0.79, + "learning_rate": 3.8389159030319236e-05, + "loss": 0.0163, + "step": 139400 + }, + { + "epoch": 0.79, + "learning_rate": 3.79874441477767e-05, + "loss": 0.0194, + "step": 139600 + }, + { + "epoch": 0.79, + "learning_rate": 3.758760165797558e-05, + "loss": 0.0177, + "step": 139800 + }, + { + "epoch": 0.79, + "learning_rate": 3.718963665070633e-05, + "loss": 0.0197, + "step": 140000 + }, + { + "epoch": 0.8, + "learning_rate": 3.6793554191860186e-05, + "loss": 0.0202, + "step": 140200 + }, + { + "epoch": 0.8, + "learning_rate": 3.639935932336438e-05, + "loss": 0.0127, + "step": 140400 + }, + { + "epoch": 0.8, + "learning_rate": 3.6007057063118326e-05, + "loss": 0.0247, + "step": 140600 + }, + { + "epoch": 0.8, + "learning_rate": 3.561665240492917e-05, + "loss": 0.022, + "step": 140800 + }, + { + "epoch": 0.8, + "learning_rate": 3.522815031844875e-05, + "loss": 0.0198, + "step": 141000 + }, + { + "epoch": 0.8, + "learning_rate": 3.4841555749110164e-05, + "loss": 0.0164, + "step": 141200 + }, + { + "epoch": 0.8, + "learning_rate": 3.4458792263419346e-05, + "loss": 0.0249, + "step": 141400 + }, + { + "epoch": 0.8, + "learning_rate": 3.4076017868658e-05, + "loss": 0.0087, + "step": 141600 + }, + { + "epoch": 0.81, + "learning_rate": 3.369706512869315e-05, + "loss": 0.0293, + "step": 141800 + }, + { + "epoch": 0.81, + "learning_rate": 3.3318130301208905e-05, + "loss": 0.0273, + "step": 142000 + }, + { + "epoch": 0.81, + "learning_rate": 3.2941127304445294e-05, + "loss": 0.0147, + "step": 142200 + }, + { + "epoch": 0.81, + "learning_rate": 3.256606093745782e-05, + "loss": 0.0262, + "step": 142400 + }, + { + "epoch": 0.81, + "learning_rate": 3.219293597464966e-05, + "loss": 0.0278, + "step": 142600 + }, + { + "epoch": 0.81, + "learning_rate": 3.182175716571092e-05, + "loss": 0.025, + "step": 142800 + }, + { + "epoch": 0.81, + "learning_rate": 3.1452529235558165e-05, + "loss": 0.0212, + "step": 143000 + }, + { + "epoch": 0.81, + "learning_rate": 3.108525688427432e-05, + "loss": 0.0297, + "step": 143200 + }, + { + "epoch": 0.81, + "learning_rate": 3.071994478704871e-05, + "loss": 0.0141, + "step": 143400 + }, + { + "epoch": 0.82, + "learning_rate": 3.035659759411763e-05, + "loss": 0.0298, + "step": 143600 + }, + { + "epoch": 0.82, + "learning_rate": 2.9995219930705253e-05, + "loss": 0.0212, + "step": 143800 + }, + { + "epoch": 0.82, + "learning_rate": 2.9637608496407227e-05, + "loss": 0.0261, + "step": 144000 + }, + { + "epoch": 0.82, + "learning_rate": 2.928017376249928e-05, + "loss": 0.0436, + "step": 144200 + }, + { + "epoch": 0.82, + "learning_rate": 2.8924722260435328e-05, + "loss": 0.019, + "step": 144400 + }, + { + "epoch": 0.82, + "learning_rate": 2.8571258514931404e-05, + "loss": 0.0157, + "step": 144600 + }, + { + "epoch": 0.82, + "learning_rate": 2.8219787025400236e-05, + "loss": 0.0216, + "step": 144800 + }, + { + "epoch": 0.82, + "learning_rate": 2.787031226589443e-05, + "loss": 0.0246, + "step": 145000 + }, + { + "epoch": 0.82, + "learning_rate": 2.752283868504904e-05, + "loss": 0.0252, + "step": 145200 + }, + { + "epoch": 0.83, + "learning_rate": 2.7177370706025224e-05, + "loss": 0.0182, + "step": 145400 + }, + { + "epoch": 0.83, + "learning_rate": 2.6833912726453738e-05, + "loss": 0.0143, + "step": 145600 + }, + { + "epoch": 0.83, + "learning_rate": 2.649246911837925e-05, + "loss": 0.0198, + "step": 145800 + }, + { + "epoch": 0.83, + "learning_rate": 2.6153044228204397e-05, + "loss": 0.0123, + "step": 146000 + }, + { + "epoch": 0.83, + "learning_rate": 2.5815642376634615e-05, + "loss": 0.0385, + "step": 146200 + }, + { + "epoch": 0.83, + "learning_rate": 2.5480267858622927e-05, + "loss": 0.0177, + "step": 146400 + }, + { + "epoch": 0.83, + "learning_rate": 2.5148586597250578e-05, + "loss": 0.0278, + "step": 146600 + }, + { + "epoch": 0.83, + "learning_rate": 2.4817269338190568e-05, + "loss": 0.0252, + "step": 146800 + }, + { + "epoch": 0.83, + "learning_rate": 2.448799212146731e-05, + "loss": 0.0135, + "step": 147000 + }, + { + "epoch": 0.84, + "learning_rate": 2.4162390211592713e-05, + "loss": 0.0327, + "step": 147200 + }, + { + "epoch": 0.84, + "learning_rate": 2.3837195375797726e-05, + "loss": 0.0292, + "step": 147400 + }, + { + "epoch": 0.84, + "learning_rate": 2.351405305818026e-05, + "loss": 0.0288, + "step": 147600 + }, + { + "epoch": 0.84, + "learning_rate": 2.319296737217692e-05, + "loss": 0.0272, + "step": 147800 + }, + { + "epoch": 0.84, + "learning_rate": 2.2873942405044402e-05, + "loss": 0.0238, + "step": 148000 + }, + { + "epoch": 0.84, + "learning_rate": 2.2556982217807548e-05, + "loss": 0.0142, + "step": 148200 + }, + { + "epoch": 0.84, + "learning_rate": 2.2242090845207555e-05, + "loss": 0.0175, + "step": 148400 + }, + { + "epoch": 0.84, + "learning_rate": 2.19292722956507e-05, + "loss": 0.0151, + "step": 148600 + }, + { + "epoch": 0.84, + "learning_rate": 2.1618530551157263e-05, + "loss": 0.028, + "step": 148800 + }, + { + "epoch": 0.85, + "learning_rate": 2.1309869567310876e-05, + "loss": 0.0214, + "step": 149000 + }, + { + "epoch": 0.85, + "learning_rate": 2.100329327320813e-05, + "loss": 0.0218, + "step": 149200 + }, + { + "epoch": 0.85, + "learning_rate": 2.0698805571408578e-05, + "loss": 0.0188, + "step": 149400 + }, + { + "epoch": 0.85, + "learning_rate": 2.039641033788514e-05, + "loss": 0.0071, + "step": 149600 + }, + { + "epoch": 0.85, + "learning_rate": 2.0096111421974547e-05, + "loss": 0.0138, + "step": 149800 + }, + { + "epoch": 0.85, + "learning_rate": 1.979791264632855e-05, + "loss": 0.0284, + "step": 150000 + }, + { + "epoch": 0.85, + "learning_rate": 1.9501817806865195e-05, + "loss": 0.0154, + "step": 150200 + }, + { + "epoch": 0.85, + "learning_rate": 1.9207830672720558e-05, + "loss": 0.0229, + "step": 150400 + }, + { + "epoch": 0.86, + "learning_rate": 1.8915954986200532e-05, + "loss": 0.0259, + "step": 150600 + }, + { + "epoch": 0.86, + "learning_rate": 1.8626194462733508e-05, + "loss": 0.0187, + "step": 150800 + }, + { + "epoch": 0.86, + "learning_rate": 1.8338552790822838e-05, + "loss": 0.0171, + "step": 151000 + }, + { + "epoch": 0.86, + "learning_rate": 1.8053033632000137e-05, + "loss": 0.0266, + "step": 151200 + }, + { + "epoch": 0.86, + "learning_rate": 1.7771052291063565e-05, + "loss": 0.0356, + "step": 151400 + }, + { + "epoch": 0.86, + "learning_rate": 1.7489778377186878e-05, + "loss": 0.014, + "step": 151600 + }, + { + "epoch": 0.86, + "learning_rate": 1.7210637780862658e-05, + "loss": 0.0141, + "step": 151800 + }, + { + "epoch": 0.86, + "learning_rate": 1.693363405540805e-05, + "loss": 0.0244, + "step": 152000 + }, + { + "epoch": 0.86, + "learning_rate": 1.665877072693892e-05, + "loss": 0.0172, + "step": 152200 + }, + { + "epoch": 0.87, + "learning_rate": 1.638605129432503e-05, + "loss": 0.0271, + "step": 152400 + }, + { + "epoch": 0.87, + "learning_rate": 1.611547922914535e-05, + "loss": 0.0096, + "step": 152600 + }, + { + "epoch": 0.87, + "learning_rate": 1.584705797564406e-05, + "loss": 0.0308, + "step": 152800 + }, + { + "epoch": 0.87, + "learning_rate": 1.5580790950686504e-05, + "loss": 0.027, + "step": 153000 + }, + { + "epoch": 0.87, + "learning_rate": 1.531668154371589e-05, + "loss": 0.021, + "step": 153200 + }, + { + "epoch": 0.87, + "learning_rate": 1.5054733116709978e-05, + "loss": 0.0108, + "step": 153400 + }, + { + "epoch": 0.87, + "learning_rate": 1.4794949004138424e-05, + "loss": 0.0152, + "step": 153600 + }, + { + "epoch": 0.87, + "learning_rate": 1.4537332512920213e-05, + "loss": 0.0208, + "step": 153800 + }, + { + "epoch": 0.87, + "learning_rate": 1.4281886922381655e-05, + "loss": 0.0189, + "step": 154000 + }, + { + "epoch": 0.88, + "learning_rate": 1.4028615484214573e-05, + "loss": 0.0262, + "step": 154200 + }, + { + "epoch": 0.88, + "learning_rate": 1.3778771471221268e-05, + "loss": 0.0305, + "step": 154400 + }, + { + "epoch": 0.88, + "learning_rate": 1.35298470713599e-05, + "loss": 0.0378, + "step": 154600 + }, + { + "epoch": 0.88, + "learning_rate": 1.3283106396952985e-05, + "loss": 0.0212, + "step": 154800 + }, + { + "epoch": 0.88, + "learning_rate": 1.3038552588883296e-05, + "loss": 0.0139, + "step": 155000 + }, + { + "epoch": 0.88, + "learning_rate": 1.2796188760195822e-05, + "loss": 0.0268, + "step": 155200 + }, + { + "epoch": 0.88, + "learning_rate": 1.2556017996058265e-05, + "loss": 0.0164, + "step": 155400 + }, + { + "epoch": 0.88, + "learning_rate": 1.2318043353721693e-05, + "loss": 0.0118, + "step": 155600 + }, + { + "epoch": 0.88, + "learning_rate": 1.2082267862481735e-05, + "loss": 0.0203, + "step": 155800 + }, + { + "epoch": 0.89, + "learning_rate": 1.1848694523639894e-05, + "loss": 0.0221, + "step": 156000 + }, + { + "epoch": 0.89, + "learning_rate": 1.1617326310465425e-05, + "loss": 0.0155, + "step": 156200 + }, + { + "epoch": 0.89, + "learning_rate": 1.1388166168157457e-05, + "loss": 0.0217, + "step": 156400 + }, + { + "epoch": 0.89, + "learning_rate": 1.1161217013807514e-05, + "loss": 0.0277, + "step": 156600 + }, + { + "epoch": 0.89, + "learning_rate": 1.0937599900986833e-05, + "loss": 0.0159, + "step": 156800 + }, + { + "epoch": 0.89, + "learning_rate": 1.0715070270453825e-05, + "loss": 0.0238, + "step": 157000 + }, + { + "epoch": 0.89, + "learning_rate": 1.0494760196045681e-05, + "loss": 0.0184, + "step": 157200 + }, + { + "epoch": 0.89, + "learning_rate": 1.0276672482197057e-05, + "loss": 0.0125, + "step": 157400 + }, + { + "epoch": 0.89, + "learning_rate": 1.0060809905053135e-05, + "loss": 0.0379, + "step": 157600 + }, + { + "epoch": 0.9, + "learning_rate": 9.84717521243414e-06, + "loss": 0.0121, + "step": 157800 + }, + { + "epoch": 0.9, + "learning_rate": 9.63577112380061e-06, + "loss": 0.0301, + "step": 158000 + }, + { + "epoch": 0.9, + "learning_rate": 9.426600330218556e-06, + "loss": 0.0153, + "step": 158200 + }, + { + "epoch": 0.9, + "learning_rate": 9.21966549432532e-06, + "loss": 0.0139, + "step": 158400 + }, + { + "epoch": 0.9, + "learning_rate": 9.014969250295535e-06, + "loss": 0.0172, + "step": 158600 + }, + { + "epoch": 0.9, + "learning_rate": 8.812514203807878e-06, + "loss": 0.0237, + "step": 158800 + }, + { + "epoch": 0.9, + "learning_rate": 8.612302932011596e-06, + "loss": 0.041, + "step": 159000 + }, + { + "epoch": 0.9, + "learning_rate": 8.414337983493915e-06, + "loss": 0.0103, + "step": 159200 + }, + { + "epoch": 0.91, + "learning_rate": 8.218621878247375e-06, + "loss": 0.0257, + "step": 159400 + }, + { + "epoch": 0.91, + "learning_rate": 8.025157107638079e-06, + "loss": 0.0164, + "step": 159600 + }, + { + "epoch": 0.91, + "learning_rate": 7.833946134373719e-06, + "loss": 0.0135, + "step": 159800 + }, + { + "epoch": 0.91, + "learning_rate": 7.644991392472185e-06, + "loss": 0.0143, + "step": 160000 + }, + { + "epoch": 0.91, + "learning_rate": 7.4582952872307985e-06, + "loss": 0.0145, + "step": 160200 + }, + { + "epoch": 0.91, + "learning_rate": 7.2738601951956405e-06, + "loss": 0.0213, + "step": 160400 + }, + { + "epoch": 0.91, + "learning_rate": 7.091688464131197e-06, + "loss": 0.0215, + "step": 160600 + }, + { + "epoch": 0.91, + "learning_rate": 6.911782412990403e-06, + "loss": 0.0205, + "step": 160800 + }, + { + "epoch": 0.91, + "learning_rate": 6.734144331885486e-06, + "loss": 0.0221, + "step": 161000 + }, + { + "epoch": 0.92, + "learning_rate": 6.558776482058375e-06, + "loss": 0.0125, + "step": 161200 + }, + { + "epoch": 0.92, + "learning_rate": 6.3856810958522425e-06, + "loss": 0.009, + "step": 161400 + }, + { + "epoch": 0.92, + "learning_rate": 6.215708818435388e-06, + "loss": 0.0135, + "step": 161600 + }, + { + "epoch": 0.92, + "learning_rate": 6.047153551194691e-06, + "loss": 0.0271, + "step": 161800 + }, + { + "epoch": 0.92, + "learning_rate": 5.8808772602734375e-06, + "loss": 0.0242, + "step": 162000 + }, + { + "epoch": 0.92, + "learning_rate": 5.7168820622837906e-06, + "loss": 0.0153, + "step": 162200 + }, + { + "epoch": 0.92, + "learning_rate": 5.555170044800826e-06, + "loss": 0.0159, + "step": 162400 + }, + { + "epoch": 0.92, + "learning_rate": 5.396534712327065e-06, + "loss": 0.0279, + "step": 162600 + }, + { + "epoch": 0.92, + "learning_rate": 5.239383760959439e-06, + "loss": 0.0265, + "step": 162800 + }, + { + "epoch": 0.93, + "learning_rate": 5.084522068407875e-06, + "loss": 0.0251, + "step": 163000 + }, + { + "epoch": 0.93, + "learning_rate": 4.931951605982607e-06, + "loss": 0.0141, + "step": 163200 + }, + { + "epoch": 0.93, + "learning_rate": 4.781674315827682e-06, + "loss": 0.0143, + "step": 163400 + }, + { + "epoch": 0.93, + "learning_rate": 4.633692110896181e-06, + "loss": 0.0162, + "step": 163600 + }, + { + "epoch": 0.93, + "learning_rate": 4.48800687492597e-06, + "loss": 0.0108, + "step": 163800 + }, + { + "epoch": 0.93, + "learning_rate": 4.3446204624156405e-06, + "loss": 0.0173, + "step": 164000 + }, + { + "epoch": 0.93, + "learning_rate": 4.203534698601004e-06, + "loss": 0.014, + "step": 164200 + }, + { + "epoch": 0.93, + "learning_rate": 4.064751379431683e-06, + "loss": 0.0243, + "step": 164400 + }, + { + "epoch": 0.93, + "learning_rate": 3.9282722715484335e-06, + "loss": 0.0189, + "step": 164600 + }, + { + "epoch": 0.94, + "learning_rate": 3.7940991122605757e-06, + "loss": 0.016, + "step": 164800 + }, + { + "epoch": 0.94, + "learning_rate": 3.662233609523829e-06, + "loss": 0.0279, + "step": 165000 + }, + { + "epoch": 0.94, + "learning_rate": 3.5326774419187126e-06, + "loss": 0.019, + "step": 165200 + }, + { + "epoch": 0.94, + "learning_rate": 3.405432258628993e-06, + "loss": 0.0136, + "step": 165400 + }, + { + "epoch": 0.94, + "learning_rate": 3.2804996794208474e-06, + "loss": 0.0276, + "step": 165600 + }, + { + "epoch": 0.94, + "learning_rate": 3.1578812946221724e-06, + "loss": 0.024, + "step": 165800 + }, + { + "epoch": 0.94, + "learning_rate": 3.038174415263218e-06, + "loss": 0.0373, + "step": 166000 + }, + { + "epoch": 0.94, + "learning_rate": 2.920177482219368e-06, + "loss": 0.0204, + "step": 166200 + }, + { + "epoch": 0.94, + "learning_rate": 2.804499330302412e-06, + "loss": 0.016, + "step": 166400 + }, + { + "epoch": 0.95, + "learning_rate": 2.691141432036144e-06, + "loss": 0.023, + "step": 166600 + }, + { + "epoch": 0.95, + "learning_rate": 2.5801052304086817e-06, + "loss": 0.018, + "step": 166800 + }, + { + "epoch": 0.95, + "learning_rate": 2.4713921388541356e-06, + "loss": 0.0156, + "step": 167000 + }, + { + "epoch": 0.95, + "learning_rate": 2.365003541234589e-06, + "loss": 0.0249, + "step": 167200 + }, + { + "epoch": 0.95, + "learning_rate": 2.2609407918225517e-06, + "loss": 0.0186, + "step": 167400 + }, + { + "epoch": 0.95, + "learning_rate": 2.1592052152836374e-06, + "loss": 0.0104, + "step": 167600 + }, + { + "epoch": 0.95, + "learning_rate": 2.059798106659755e-06, + "loss": 0.0189, + "step": 167800 + }, + { + "epoch": 0.95, + "learning_rate": 1.9627207313525887e-06, + "loss": 0.028, + "step": 168000 + }, + { + "epoch": 0.96, + "learning_rate": 1.8679743251075292e-06, + "loss": 0.0186, + "step": 168200 + }, + { + "epoch": 0.96, + "learning_rate": 1.7755600939978937e-06, + "loss": 0.0279, + "step": 168400 + }, + { + "epoch": 0.96, + "learning_rate": 1.6854792144096584e-06, + "loss": 0.0143, + "step": 168600 + }, + { + "epoch": 0.96, + "learning_rate": 1.5977328330263757e-06, + "loss": 0.044, + "step": 168800 + }, + { + "epoch": 0.96, + "learning_rate": 1.5123220668146281e-06, + "loss": 0.0219, + "step": 169000 + }, + { + "epoch": 0.96, + "learning_rate": 1.429248003009848e-06, + "loss": 0.0186, + "step": 169200 + }, + { + "epoch": 0.96, + "learning_rate": 1.3485116991023881e-06, + "loss": 0.0167, + "step": 169400 + }, + { + "epoch": 0.96, + "learning_rate": 1.2701141828241649e-06, + "loss": 0.0204, + "step": 169600 + }, + { + "epoch": 0.96, + "learning_rate": 1.1940564521355066e-06, + "loss": 0.0139, + "step": 169800 + }, + { + "epoch": 0.97, + "learning_rate": 1.1203394752124363e-06, + "loss": 0.028, + "step": 170000 + }, + { + "epoch": 0.97, + "learning_rate": 1.049315240383742e-06, + "loss": 0.022, + "step": 170200 + }, + { + "epoch": 0.97, + "learning_rate": 9.806102344292969e-07, + "loss": 0.0216, + "step": 170400 + }, + { + "epoch": 0.97, + "learning_rate": 9.138975907874411e-07, + "loss": 0.0164, + "step": 170600 + }, + { + "epoch": 0.97, + "learning_rate": 8.495292671902981e-07, + "loss": 0.0085, + "step": 170800 + }, + { + "epoch": 0.97, + "learning_rate": 7.875060830137766e-07, + "loss": 0.0185, + "step": 171000 + }, + { + "epoch": 0.97, + "learning_rate": 7.278288277813219e-07, + "loss": 0.0248, + "step": 171200 + }, + { + "epoch": 0.97, + "learning_rate": 6.704982611538892e-07, + "loss": 0.0329, + "step": 171400 + }, + { + "epoch": 0.97, + "learning_rate": 6.15515112920264e-07, + "loss": 0.0138, + "step": 171600 + }, + { + "epoch": 0.98, + "learning_rate": 5.62880082987879e-07, + "loss": 0.0128, + "step": 171800 + }, + { + "epoch": 0.98, + "learning_rate": 5.125938413737512e-07, + "loss": 0.0163, + "step": 172000 + }, + { + "epoch": 0.98, + "learning_rate": 4.646570281960375e-07, + "loss": 0.0347, + "step": 172200 + }, + { + "epoch": 0.98, + "learning_rate": 4.190702536658564e-07, + "loss": 0.0232, + "step": 172400 + }, + { + "epoch": 0.98, + "learning_rate": 3.7583409807958233e-07, + "loss": 0.0225, + "step": 172600 + }, + { + "epoch": 0.98, + "learning_rate": 3.3494911181136674e-07, + "loss": 0.019, + "step": 172800 + }, + { + "epoch": 0.98, + "learning_rate": 2.964158153061924e-07, + "loss": 0.0155, + "step": 173000 + }, + { + "epoch": 0.98, + "learning_rate": 2.60234699073237e-07, + "loss": 0.0186, + "step": 173200 + }, + { + "epoch": 0.98, + "learning_rate": 2.265695131377354e-07, + "loss": 0.0216, + "step": 173400 + }, + { + "epoch": 0.99, + "learning_rate": 1.9508234282376272e-07, + "loss": 0.0087, + "step": 173600 + }, + { + "epoch": 0.99, + "learning_rate": 1.6594864270533008e-07, + "loss": 0.0149, + "step": 173800 + }, + { + "epoch": 0.99, + "learning_rate": 1.3916878363954376e-07, + "loss": 0.0171, + "step": 174000 + }, + { + "epoch": 0.99, + "learning_rate": 1.1474310652035549e-07, + "loss": 0.025, + "step": 174200 + }, + { + "epoch": 0.99, + "learning_rate": 9.26719222741447e-08, + "loss": 0.0136, + "step": 174400 + }, + { + "epoch": 0.99, + "learning_rate": 7.295551185577334e-08, + "loss": 0.0196, + "step": 174600 + }, + { + "epoch": 0.99, + "learning_rate": 5.559412624511325e-08, + "loss": 0.0167, + "step": 174800 + }, + { + "epoch": 0.99, + "learning_rate": 4.058798644371748e-08, + "loss": 0.0247, + "step": 175000 + }, + { + "epoch": 0.99, + "learning_rate": 2.793728347208737e-08, + "loss": 0.0182, + "step": 175200 + }, + { + "epoch": 1.0, + "learning_rate": 1.7642178367165738e-08, + "loss": 0.0112, + "step": 175400 + }, + { + "epoch": 1.0, + "learning_rate": 9.702802180446488e-09, + "loss": 0.0229, + "step": 175600 + }, + { + "epoch": 1.0, + "learning_rate": 4.11925597606365e-09, + "loss": 0.0155, + "step": 175800 + }, + { + "epoch": 1.0, + "learning_rate": 8.916108297639875e-10, + "loss": 0.0201, + "step": 176000 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.9876599555715365, + "eval_auc": 0.8673592596422711, + "eval_f1": 0.24899413187145658, + "eval_loss": 0.4661065936088562, + "eval_mcc": 0.3305508498121041, + "eval_precision": 0.14941997670219148, + "eval_recall": 0.7463955099754822, + "eval_runtime": 10361.3775, + "eval_samples_per_second": 43.805, + "eval_steps_per_second": 4.38, + "step": 176106 + } + ], + "logging_steps": 200, + "max_steps": 176106, + "num_train_epochs": 1, + "save_steps": 500, + "total_flos": 3.528456960194662e+17, + "trial_name": null, + "trial_params": null +}