MilaWang commited on
Commit
c742c4f
·
verified ·
1 Parent(s): 72710eb

Upload folder using huggingface_hub

Browse files
Qwen2-7B-Instruct_int4_medmcqa_full_con_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-51190-sd-1/training_log.jsonl ADDED
@@ -0,0 +1 @@
 
 
1
+ {"epoch": 0.9999265300124899, "step": 6805, "epoch_duration": 50610.295933008194, "total_accumulated_duration": 50610.295933008194, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 7595.9169921875}, "peak_memory_usage": {"GPU_0": 9729.68408203125}, "avg_memory_reserved": {"GPU_0": 10386.0}, "peak_memory_reserved": {"GPU_0": 10386.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "N/A", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 1.3944, "grad_norm": 0.2628404498100281, "learning_rate": 0.0002, "epoch": 0.0014693997502020426, "step": 10}, {"loss": 1.1685, "grad_norm": 0.26902055740356445, "learning_rate": 0.0002, "epoch": 0.002938799500404085, "step": 20}, {"loss": 0.9726, "grad_norm": 0.23114199936389923, "learning_rate": 0.0002, "epoch": 0.004408199250606128, "step": 30}, {"loss": 0.9646, "grad_norm": 0.17827735841274261, "learning_rate": 0.0002, "epoch": 0.00587759900080817, "step": 40}, {"loss": 0.8965, "grad_norm": 0.19027037918567657, "learning_rate": 0.0002, "epoch": 0.007346998751010212, "step": 50}, {"loss": 0.8958, "grad_norm": 0.2437688112258911, "learning_rate": 0.0002, "epoch": 0.008816398501212255, "step": 60}, {"loss": 0.9566, "grad_norm": 0.23619255423545837, "learning_rate": 0.0002, "epoch": 0.010285798251414298, "step": 70}, {"loss": 0.9482, "grad_norm": 0.26972177624702454, "learning_rate": 0.0002, "epoch": 0.01175519800161634, "step": 80}, {"loss": 0.8933, "grad_norm": 0.21696068346500397, "learning_rate": 0.0002, "epoch": 0.013224597751818383, "step": 90}, {"loss": 0.89, "grad_norm": 0.1964431256055832, "learning_rate": 0.0002, "epoch": 0.014693997502020424, "step": 100}, {"loss": 0.8994, "grad_norm": 0.20619083940982819, "learning_rate": 0.0002, "epoch": 0.016163397252222468, "step": 110}, {"loss": 0.9072, "grad_norm": 0.16468696296215057, "learning_rate": 0.0002, "epoch": 0.01763279700242451, "step": 120}, {"loss": 0.9101, "grad_norm": 0.1845995932817459, "learning_rate": 0.0002, "epoch": 0.019102196752626553, "step": 130}, {"loss": 1.0233, "grad_norm": 0.20066170394420624, "learning_rate": 0.0002, "epoch": 0.020571596502828596, "step": 140}, {"loss": 0.8837, "grad_norm": 0.2059483677148819, "learning_rate": 0.0002, "epoch": 0.02204099625303064, "step": 150}, {"loss": 0.8807, "grad_norm": 0.20251287519931793, "learning_rate": 0.0002, "epoch": 0.02351039600323268, "step": 160}, {"loss": 0.9276, "grad_norm": 0.14769341051578522, "learning_rate": 0.0002, "epoch": 0.024979795753434723, "step": 170}, {"loss": 0.9855, "grad_norm": 0.2342429906129837, "learning_rate": 0.0002, "epoch": 0.026449195503636766, "step": 180}, {"loss": 0.9103, "grad_norm": 0.21408909559249878, "learning_rate": 0.0002, "epoch": 0.02791859525383881, "step": 190}, {"loss": 0.9208, "grad_norm": 0.252082884311676, "learning_rate": 0.0002, "epoch": 0.029387995004040848, "step": 200}, {"loss": 0.9592, "grad_norm": 0.1764577180147171, "learning_rate": 0.0002, "epoch": 0.03085739475424289, "step": 210}, {"loss": 0.8845, "grad_norm": 0.16425837576389313, "learning_rate": 0.0002, "epoch": 0.032326794504444936, "step": 220}, {"loss": 0.886, "grad_norm": 0.20786285400390625, "learning_rate": 0.0002, "epoch": 0.03379619425464698, "step": 230}, {"loss": 0.9075, "grad_norm": 0.1842186599969864, "learning_rate": 0.0002, "epoch": 0.03526559400484902, "step": 240}, {"loss": 0.9969, "grad_norm": 0.19879893958568573, "learning_rate": 0.0002, "epoch": 0.036734993755051064, "step": 250}, {"loss": 0.904, "grad_norm": 0.1845850795507431, "learning_rate": 0.0002, "epoch": 0.038204393505253106, "step": 260}, {"loss": 0.9372, "grad_norm": 0.15562455356121063, "learning_rate": 0.0002, "epoch": 0.03967379325545515, "step": 270}, {"loss": 0.9113, "grad_norm": 0.18162056803703308, "learning_rate": 0.0002, "epoch": 0.04114319300565719, "step": 280}, {"loss": 0.8798, "grad_norm": 0.19138266146183014, "learning_rate": 0.0002, "epoch": 0.042612592755859234, "step": 290}, {"loss": 0.9455, "grad_norm": 0.19662004709243774, "learning_rate": 0.0002, "epoch": 0.04408199250606128, "step": 300}, {"loss": 0.873, "grad_norm": 0.19902509450912476, "learning_rate": 0.0002, "epoch": 0.04555139225626332, "step": 310}, {"loss": 0.8569, "grad_norm": 0.20064237713813782, "learning_rate": 0.0002, "epoch": 0.04702079200646536, "step": 320}, {"loss": 0.9147, "grad_norm": 0.20255033671855927, "learning_rate": 0.0002, "epoch": 0.048490191756667404, "step": 330}, {"loss": 0.8577, "grad_norm": 0.1405775398015976, "learning_rate": 0.0002, "epoch": 0.04995959150686945, "step": 340}, {"loss": 0.8856, "grad_norm": 0.1666014939546585, "learning_rate": 0.0002, "epoch": 0.05142899125707149, "step": 350}, {"loss": 0.8844, "grad_norm": 0.1537073850631714, "learning_rate": 0.0002, "epoch": 0.05289839100727353, "step": 360}, {"loss": 0.8853, "grad_norm": 0.14424993097782135, "learning_rate": 0.0002, "epoch": 0.054367790757475574, "step": 370}, {"loss": 0.9154, "grad_norm": 0.176529198884964, "learning_rate": 0.0002, "epoch": 0.05583719050767762, "step": 380}, {"loss": 0.9033, "grad_norm": 0.20812897384166718, "learning_rate": 0.0002, "epoch": 0.05730659025787966, "step": 390}, {"loss": 0.8935, "grad_norm": 0.1401512622833252, "learning_rate": 0.0002, "epoch": 0.058775990008081695, "step": 400}, {"loss": 0.8733, "grad_norm": 0.13527163863182068, "learning_rate": 0.0002, "epoch": 0.06024538975828374, "step": 410}, {"loss": 0.8785, "grad_norm": 0.15009990334510803, "learning_rate": 0.0002, "epoch": 0.06171478950848578, "step": 420}, {"loss": 0.9569, "grad_norm": 0.31983858346939087, "learning_rate": 0.0002, "epoch": 0.06318418925868782, "step": 430}, {"loss": 0.9225, "grad_norm": 0.16097241640090942, "learning_rate": 0.0002, "epoch": 0.06465358900888987, "step": 440}, {"loss": 0.893, "grad_norm": 0.15686756372451782, "learning_rate": 0.0002, "epoch": 0.06612298875909191, "step": 450}, {"loss": 0.8477, "grad_norm": 0.1496899425983429, "learning_rate": 0.0002, "epoch": 0.06759238850929396, "step": 460}, {"loss": 0.9012, "grad_norm": 0.15703721344470978, "learning_rate": 0.0002, "epoch": 0.069061788259496, "step": 470}, {"loss": 0.9421, "grad_norm": 0.1983751803636551, "learning_rate": 0.0002, "epoch": 0.07053118800969804, "step": 480}, {"loss": 0.8585, "grad_norm": 0.16249017417430878, "learning_rate": 0.0002, "epoch": 0.07200058775990008, "step": 490}, {"loss": 0.8916, "grad_norm": 0.15216057002544403, "learning_rate": 0.0002, "epoch": 0.07346998751010213, "step": 500}, {"loss": 0.9181, "grad_norm": 0.16753150522708893, "learning_rate": 0.0002, "epoch": 0.07493938726030416, "step": 510}, {"loss": 0.9113, "grad_norm": 0.14005446434020996, "learning_rate": 0.0002, "epoch": 0.07640878701050621, "step": 520}, {"loss": 0.8607, "grad_norm": 0.14988040924072266, "learning_rate": 0.0002, "epoch": 0.07787818676070825, "step": 530}, {"loss": 0.8743, "grad_norm": 0.16093963384628296, "learning_rate": 0.0002, "epoch": 0.0793475865109103, "step": 540}, {"loss": 0.8878, "grad_norm": 0.16959452629089355, "learning_rate": 0.0002, "epoch": 0.08081698626111233, "step": 550}, {"loss": 0.8678, "grad_norm": 0.18972469866275787, "learning_rate": 0.0002, "epoch": 0.08228638601131438, "step": 560}, {"loss": 0.9888, "grad_norm": 0.15368588268756866, "learning_rate": 0.0002, "epoch": 0.08375578576151642, "step": 570}, {"loss": 0.9115, "grad_norm": 0.16340570151805878, "learning_rate": 0.0002, "epoch": 0.08522518551171847, "step": 580}, {"loss": 0.9767, "grad_norm": 0.14627322554588318, "learning_rate": 0.0002, "epoch": 0.0866945852619205, "step": 590}, {"loss": 0.9094, "grad_norm": 0.174230694770813, "learning_rate": 0.0002, "epoch": 0.08816398501212255, "step": 600}, {"loss": 0.9153, "grad_norm": 0.15915071964263916, "learning_rate": 0.0002, "epoch": 0.08963338476232459, "step": 610}, {"loss": 0.8582, "grad_norm": 0.14797250926494598, "learning_rate": 0.0002, "epoch": 0.09110278451252664, "step": 620}, {"loss": 0.8607, "grad_norm": 0.19358739256858826, "learning_rate": 0.0002, "epoch": 0.09257218426272867, "step": 630}, {"loss": 0.9048, "grad_norm": 0.15533459186553955, "learning_rate": 0.0002, "epoch": 0.09404158401293072, "step": 640}, {"loss": 0.9169, "grad_norm": 0.1713729053735733, "learning_rate": 0.0002, "epoch": 0.09551098376313276, "step": 650}, {"loss": 0.9082, "grad_norm": 0.179523304104805, "learning_rate": 0.0002, "epoch": 0.09698038351333481, "step": 660}, {"loss": 0.9156, "grad_norm": 0.149130716919899, "learning_rate": 0.0002, "epoch": 0.09844978326353684, "step": 670}, {"loss": 0.8137, "grad_norm": 0.15347588062286377, "learning_rate": 0.0002, "epoch": 0.0999191830137389, "step": 680}, {"loss": 0.9087, "grad_norm": 0.2856849133968353, "learning_rate": 0.0002, "epoch": 0.10138858276394093, "step": 690}, {"loss": 0.8736, "grad_norm": 0.17660492658615112, "learning_rate": 0.0002, "epoch": 0.10285798251414298, "step": 700}, {"loss": 0.9027, "grad_norm": 0.1642356514930725, "learning_rate": 0.0002, "epoch": 0.10432738226434501, "step": 710}, {"loss": 0.895, "grad_norm": 0.16411985456943512, "learning_rate": 0.0002, "epoch": 0.10579678201454706, "step": 720}, {"loss": 0.8852, "grad_norm": 0.15387141704559326, "learning_rate": 0.0002, "epoch": 0.1072661817647491, "step": 730}, {"loss": 0.9004, "grad_norm": 0.1746252477169037, "learning_rate": 0.0002, "epoch": 0.10873558151495115, "step": 740}, {"loss": 0.8936, "grad_norm": 0.15617932379245758, "learning_rate": 0.0002, "epoch": 0.11020498126515318, "step": 750}, {"loss": 0.9514, "grad_norm": 0.2052323818206787, "learning_rate": 0.0002, "epoch": 0.11167438101535523, "step": 760}, {"loss": 0.8808, "grad_norm": 0.2170325517654419, "learning_rate": 0.0002, "epoch": 0.11314378076555727, "step": 770}, {"loss": 0.9022, "grad_norm": 0.17382995784282684, "learning_rate": 0.0002, "epoch": 0.11461318051575932, "step": 780}, {"loss": 0.8807, "grad_norm": 0.1570100337266922, "learning_rate": 0.0002, "epoch": 0.11608258026596135, "step": 790}, {"loss": 0.8951, "grad_norm": 0.14517875015735626, "learning_rate": 0.0002, "epoch": 0.11755198001616339, "step": 800}, {"loss": 0.8555, "grad_norm": 0.13214264810085297, "learning_rate": 0.0002, "epoch": 0.11902137976636544, "step": 810}, {"loss": 0.9038, "grad_norm": 0.15619876980781555, "learning_rate": 0.0002, "epoch": 0.12049077951656748, "step": 820}, {"loss": 0.9089, "grad_norm": 0.1697862595319748, "learning_rate": 0.0002, "epoch": 0.12196017926676953, "step": 830}, {"loss": 0.8905, "grad_norm": 0.1580158770084381, "learning_rate": 0.0002, "epoch": 0.12342957901697156, "step": 840}, {"loss": 0.8636, "grad_norm": 0.2058696299791336, "learning_rate": 0.0002, "epoch": 0.12489897876717361, "step": 850}, {"loss": 0.9145, "grad_norm": 0.19274809956550598, "learning_rate": 0.0002, "epoch": 0.12636837851737565, "step": 860}, {"loss": 0.8972, "grad_norm": 0.17793923616409302, "learning_rate": 0.0002, "epoch": 0.1278377782675777, "step": 870}, {"loss": 0.8328, "grad_norm": 0.16360078752040863, "learning_rate": 0.0002, "epoch": 0.12930717801777974, "step": 880}, {"loss": 0.9026, "grad_norm": 0.18728993833065033, "learning_rate": 0.0002, "epoch": 0.13077657776798177, "step": 890}, {"loss": 0.8687, "grad_norm": 0.14911283552646637, "learning_rate": 0.0002, "epoch": 0.13224597751818382, "step": 900}, {"loss": 0.8341, "grad_norm": 0.17675413191318512, "learning_rate": 0.0002, "epoch": 0.13371537726838587, "step": 910}, {"loss": 0.8775, "grad_norm": 0.15698543190956116, "learning_rate": 0.0002, "epoch": 0.13518477701858791, "step": 920}, {"loss": 0.9024, "grad_norm": 0.19533422589302063, "learning_rate": 0.0002, "epoch": 0.13665417676878994, "step": 930}, {"loss": 0.8989, "grad_norm": 0.15936076641082764, "learning_rate": 0.0002, "epoch": 0.138123576518992, "step": 940}, {"loss": 0.8995, "grad_norm": 0.17310801148414612, "learning_rate": 0.0002, "epoch": 0.13959297626919404, "step": 950}, {"loss": 0.8811, "grad_norm": 0.18028485774993896, "learning_rate": 0.0002, "epoch": 0.14106237601939609, "step": 960}, {"loss": 0.9251, "grad_norm": 0.18243688344955444, "learning_rate": 0.0002, "epoch": 0.1425317757695981, "step": 970}, {"loss": 0.9257, "grad_norm": 0.2133779674768448, "learning_rate": 0.0002, "epoch": 0.14400117551980016, "step": 980}, {"loss": 0.8625, "grad_norm": 0.20065979659557343, "learning_rate": 0.0002, "epoch": 0.1454705752700022, "step": 990}, {"loss": 0.944, "grad_norm": 0.18161214888095856, "learning_rate": 0.0002, "epoch": 0.14693997502020426, "step": 1000}, {"loss": 0.9391, "grad_norm": 0.16673357784748077, "learning_rate": 0.0002, "epoch": 0.14840937477040628, "step": 1010}, {"loss": 0.9164, "grad_norm": 0.20218193531036377, "learning_rate": 0.0002, "epoch": 0.14987877452060833, "step": 1020}, {"loss": 0.8906, "grad_norm": 0.1889781355857849, "learning_rate": 0.0002, "epoch": 0.15134817427081038, "step": 1030}, {"loss": 0.86, "grad_norm": 0.17030788958072662, "learning_rate": 0.0002, "epoch": 0.15281757402101243, "step": 1040}, {"loss": 0.9063, "grad_norm": 0.16211169958114624, "learning_rate": 0.0002, "epoch": 0.15428697377121445, "step": 1050}, {"loss": 0.8581, "grad_norm": 0.16340748965740204, "learning_rate": 0.0002, "epoch": 0.1557563735214165, "step": 1060}, {"loss": 0.9662, "grad_norm": 0.1806364357471466, "learning_rate": 0.0002, "epoch": 0.15722577327161855, "step": 1070}, {"loss": 0.8573, "grad_norm": 0.19921894371509552, "learning_rate": 0.0002, "epoch": 0.1586951730218206, "step": 1080}, {"loss": 0.8745, "grad_norm": 0.19322258234024048, "learning_rate": 0.0002, "epoch": 0.16016457277202262, "step": 1090}, {"loss": 0.8664, "grad_norm": 0.18861214816570282, "learning_rate": 0.0002, "epoch": 0.16163397252222467, "step": 1100}, {"loss": 0.9013, "grad_norm": 0.16663247346878052, "learning_rate": 0.0002, "epoch": 0.16310337227242672, "step": 1110}, {"loss": 0.869, "grad_norm": 0.1504918783903122, "learning_rate": 0.0002, "epoch": 0.16457277202262877, "step": 1120}, {"loss": 0.8595, "grad_norm": 0.17304487526416779, "learning_rate": 0.0002, "epoch": 0.1660421717728308, "step": 1130}, {"loss": 0.863, "grad_norm": 0.21680335700511932, "learning_rate": 0.0002, "epoch": 0.16751157152303284, "step": 1140}, {"loss": 0.8856, "grad_norm": 0.1611625701189041, "learning_rate": 0.0002, "epoch": 0.1689809712732349, "step": 1150}, {"loss": 0.8839, "grad_norm": 0.169824481010437, "learning_rate": 0.0002, "epoch": 0.17045037102343694, "step": 1160}, {"loss": 0.9471, "grad_norm": 0.152205690741539, "learning_rate": 0.0002, "epoch": 0.17191977077363896, "step": 1170}, {"loss": 0.8782, "grad_norm": 0.1666167974472046, "learning_rate": 0.0002, "epoch": 0.173389170523841, "step": 1180}, {"loss": 0.8795, "grad_norm": 0.17074887454509735, "learning_rate": 0.0002, "epoch": 0.17485857027404306, "step": 1190}, {"loss": 0.8865, "grad_norm": 0.16788874566555023, "learning_rate": 0.0002, "epoch": 0.1763279700242451, "step": 1200}, {"loss": 0.8618, "grad_norm": 0.19489890336990356, "learning_rate": 0.0002, "epoch": 0.17779736977444713, "step": 1210}, {"loss": 0.8671, "grad_norm": 0.1895831674337387, "learning_rate": 0.0002, "epoch": 0.17926676952464918, "step": 1220}, {"loss": 0.932, "grad_norm": 0.18392078578472137, "learning_rate": 0.0002, "epoch": 0.18073616927485123, "step": 1230}, {"loss": 0.8877, "grad_norm": 0.19173933565616608, "learning_rate": 0.0002, "epoch": 0.18220556902505328, "step": 1240}, {"loss": 0.8628, "grad_norm": 0.146264910697937, "learning_rate": 0.0002, "epoch": 0.1836749687752553, "step": 1250}, {"loss": 0.8934, "grad_norm": 0.21783572435379028, "learning_rate": 0.0002, "epoch": 0.18514436852545735, "step": 1260}, {"loss": 0.8971, "grad_norm": 0.19816766679286957, "learning_rate": 0.0002, "epoch": 0.1866137682756594, "step": 1270}, {"loss": 0.9032, "grad_norm": 0.17717234790325165, "learning_rate": 0.0002, "epoch": 0.18808316802586145, "step": 1280}, {"loss": 0.9073, "grad_norm": 0.16765505075454712, "learning_rate": 0.0002, "epoch": 0.18955256777606347, "step": 1290}, {"loss": 0.8943, "grad_norm": 0.19781021773815155, "learning_rate": 0.0002, "epoch": 0.19102196752626552, "step": 1300}, {"loss": 0.8957, "grad_norm": 0.16942673921585083, "learning_rate": 0.0002, "epoch": 0.19249136727646757, "step": 1310}, {"loss": 0.9209, "grad_norm": 0.15926216542720795, "learning_rate": 0.0002, "epoch": 0.19396076702666962, "step": 1320}, {"loss": 0.8857, "grad_norm": 0.18393829464912415, "learning_rate": 0.0002, "epoch": 0.19543016677687164, "step": 1330}, {"loss": 0.9231, "grad_norm": 0.15749335289001465, "learning_rate": 0.0002, "epoch": 0.1968995665270737, "step": 1340}, {"loss": 0.8278, "grad_norm": 0.1571228802204132, "learning_rate": 0.0002, "epoch": 0.19836896627727574, "step": 1350}, {"loss": 0.9103, "grad_norm": 0.17407122254371643, "learning_rate": 0.0002, "epoch": 0.1998383660274778, "step": 1360}, {"loss": 0.8562, "grad_norm": 0.19394035637378693, "learning_rate": 0.0002, "epoch": 0.2013077657776798, "step": 1370}, {"loss": 0.8661, "grad_norm": 0.16006936132907867, "learning_rate": 0.0002, "epoch": 0.20277716552788186, "step": 1380}, {"loss": 0.8849, "grad_norm": 0.17441940307617188, "learning_rate": 0.0002, "epoch": 0.2042465652780839, "step": 1390}, {"loss": 0.9134, "grad_norm": 0.19510607421398163, "learning_rate": 0.0002, "epoch": 0.20571596502828596, "step": 1400}, {"loss": 0.936, "grad_norm": 0.17903536558151245, "learning_rate": 0.0002, "epoch": 0.20718536477848798, "step": 1410}, {"loss": 0.8737, "grad_norm": 0.19734551012516022, "learning_rate": 0.0002, "epoch": 0.20865476452869003, "step": 1420}, {"loss": 0.8345, "grad_norm": 0.16567498445510864, "learning_rate": 0.0002, "epoch": 0.21012416427889208, "step": 1430}, {"loss": 0.859, "grad_norm": 0.16674315929412842, "learning_rate": 0.0002, "epoch": 0.21159356402909413, "step": 1440}, {"loss": 0.8554, "grad_norm": 0.17553065717220306, "learning_rate": 0.0002, "epoch": 0.21306296377929615, "step": 1450}, {"loss": 0.8733, "grad_norm": 0.19126258790493011, "learning_rate": 0.0002, "epoch": 0.2145323635294982, "step": 1460}, {"loss": 0.8554, "grad_norm": 0.15958143770694733, "learning_rate": 0.0002, "epoch": 0.21600176327970025, "step": 1470}, {"loss": 0.9339, "grad_norm": 0.20436005294322968, "learning_rate": 0.0002, "epoch": 0.2174711630299023, "step": 1480}, {"loss": 0.8781, "grad_norm": 0.14376017451286316, "learning_rate": 0.0002, "epoch": 0.21894056278010432, "step": 1490}, {"loss": 0.9155, "grad_norm": 0.18457838892936707, "learning_rate": 0.0002, "epoch": 0.22040996253030637, "step": 1500}, {"loss": 0.8922, "grad_norm": 0.1956702023744583, "learning_rate": 0.0002, "epoch": 0.22187936228050842, "step": 1510}, {"loss": 0.9124, "grad_norm": 0.1598522961139679, "learning_rate": 0.0002, "epoch": 0.22334876203071047, "step": 1520}, {"loss": 0.8725, "grad_norm": 0.17595723271369934, "learning_rate": 0.0002, "epoch": 0.2248181617809125, "step": 1530}, {"loss": 0.915, "grad_norm": 0.1683538407087326, "learning_rate": 0.0002, "epoch": 0.22628756153111454, "step": 1540}, {"loss": 0.9638, "grad_norm": 0.1862228959798813, "learning_rate": 0.0002, "epoch": 0.2277569612813166, "step": 1550}, {"loss": 0.9493, "grad_norm": 0.18573135137557983, "learning_rate": 0.0002, "epoch": 0.22922636103151864, "step": 1560}, {"loss": 0.8887, "grad_norm": 0.15138770639896393, "learning_rate": 0.0002, "epoch": 0.23069576078172066, "step": 1570}, {"loss": 0.8579, "grad_norm": 0.18936845660209656, "learning_rate": 0.0002, "epoch": 0.2321651605319227, "step": 1580}, {"loss": 0.8588, "grad_norm": 0.1812523901462555, "learning_rate": 0.0002, "epoch": 0.23363456028212476, "step": 1590}, {"loss": 0.9177, "grad_norm": 0.18287526071071625, "learning_rate": 0.0002, "epoch": 0.23510396003232678, "step": 1600}, {"loss": 0.8716, "grad_norm": 0.18949879705905914, "learning_rate": 0.0002, "epoch": 0.23657335978252883, "step": 1610}, {"loss": 0.8591, "grad_norm": 0.16236399114131927, "learning_rate": 0.0002, "epoch": 0.23804275953273088, "step": 1620}, {"loss": 0.9337, "grad_norm": 0.1702779084444046, "learning_rate": 0.0002, "epoch": 0.23951215928293293, "step": 1630}, {"loss": 0.8399, "grad_norm": 0.1654399037361145, "learning_rate": 0.0002, "epoch": 0.24098155903313495, "step": 1640}, {"loss": 0.9413, "grad_norm": 0.19532331824302673, "learning_rate": 0.0002, "epoch": 0.242450958783337, "step": 1650}, {"loss": 0.8764, "grad_norm": 0.1892472207546234, "learning_rate": 0.0002, "epoch": 0.24392035853353905, "step": 1660}, {"loss": 0.8801, "grad_norm": 0.1531067043542862, "learning_rate": 0.0002, "epoch": 0.2453897582837411, "step": 1670}, {"loss": 0.9068, "grad_norm": 0.18884031474590302, "learning_rate": 0.0002, "epoch": 0.24685915803394312, "step": 1680}, {"loss": 0.8957, "grad_norm": 0.15703779458999634, "learning_rate": 0.0002, "epoch": 0.24832855778414517, "step": 1690}, {"loss": 0.9255, "grad_norm": 0.2761041820049286, "learning_rate": 0.0002, "epoch": 0.24979795753434722, "step": 1700}, {"loss": 0.9356, "grad_norm": 0.23378264904022217, "learning_rate": 0.0002, "epoch": 0.25126735728454924, "step": 1710}, {"loss": 0.8968, "grad_norm": 0.1831875592470169, "learning_rate": 0.0002, "epoch": 0.2527367570347513, "step": 1720}, {"loss": 0.8474, "grad_norm": 0.17603611946105957, "learning_rate": 0.0002, "epoch": 0.25420615678495334, "step": 1730}, {"loss": 0.8666, "grad_norm": 0.17539545893669128, "learning_rate": 0.0002, "epoch": 0.2556755565351554, "step": 1740}, {"loss": 0.8972, "grad_norm": 0.17839759588241577, "learning_rate": 0.0002, "epoch": 0.25714495628535744, "step": 1750}, {"loss": 0.9272, "grad_norm": 0.17030727863311768, "learning_rate": 0.0002, "epoch": 0.2586143560355595, "step": 1760}, {"loss": 0.9503, "grad_norm": 0.19519369304180145, "learning_rate": 0.0002, "epoch": 0.26008375578576154, "step": 1770}, {"loss": 0.9229, "grad_norm": 0.18777932226657867, "learning_rate": 0.0002, "epoch": 0.26155315553596353, "step": 1780}, {"loss": 0.9547, "grad_norm": 0.19552971422672272, "learning_rate": 0.0002, "epoch": 0.2630225552861656, "step": 1790}, {"loss": 0.8338, "grad_norm": 0.14793473482131958, "learning_rate": 0.0002, "epoch": 0.26449195503636763, "step": 1800}, {"loss": 0.9378, "grad_norm": 0.1663934290409088, "learning_rate": 0.0002, "epoch": 0.2659613547865697, "step": 1810}, {"loss": 0.9257, "grad_norm": 0.17549936473369598, "learning_rate": 0.0002, "epoch": 0.26743075453677173, "step": 1820}, {"loss": 0.8995, "grad_norm": 0.16554151475429535, "learning_rate": 0.0002, "epoch": 0.2689001542869738, "step": 1830}, {"loss": 0.9364, "grad_norm": 0.16224057972431183, "learning_rate": 0.0002, "epoch": 0.27036955403717583, "step": 1840}, {"loss": 0.8922, "grad_norm": 0.17428523302078247, "learning_rate": 0.0002, "epoch": 0.2718389537873779, "step": 1850}, {"loss": 0.8444, "grad_norm": 0.18068307638168335, "learning_rate": 0.0002, "epoch": 0.2733083535375799, "step": 1860}, {"loss": 0.9284, "grad_norm": 0.15775635838508606, "learning_rate": 0.0002, "epoch": 0.2747777532877819, "step": 1870}, {"loss": 0.9045, "grad_norm": 0.3021157383918762, "learning_rate": 0.0002, "epoch": 0.276247153037984, "step": 1880}, {"loss": 0.8423, "grad_norm": 0.17214266955852509, "learning_rate": 0.0002, "epoch": 0.277716552788186, "step": 1890}, {"loss": 0.8626, "grad_norm": 0.16640028357505798, "learning_rate": 0.0002, "epoch": 0.27918595253838807, "step": 1900}, {"loss": 0.8682, "grad_norm": 0.1825748085975647, "learning_rate": 0.0002, "epoch": 0.2806553522885901, "step": 1910}, {"loss": 0.9022, "grad_norm": 0.15389439463615417, "learning_rate": 0.0002, "epoch": 0.28212475203879217, "step": 1920}, {"loss": 0.872, "grad_norm": 0.17119665443897247, "learning_rate": 0.0002, "epoch": 0.2835941517889942, "step": 1930}, {"loss": 0.8633, "grad_norm": 0.14557567238807678, "learning_rate": 0.0002, "epoch": 0.2850635515391962, "step": 1940}, {"loss": 0.8663, "grad_norm": 0.21484458446502686, "learning_rate": 0.0002, "epoch": 0.28653295128939826, "step": 1950}, {"loss": 0.8638, "grad_norm": 0.1946394443511963, "learning_rate": 0.0002, "epoch": 0.2880023510396003, "step": 1960}, {"loss": 0.8728, "grad_norm": 0.18898609280586243, "learning_rate": 0.0002, "epoch": 0.28947175078980236, "step": 1970}, {"loss": 0.8421, "grad_norm": 0.1833840012550354, "learning_rate": 0.0002, "epoch": 0.2909411505400044, "step": 1980}, {"loss": 0.9293, "grad_norm": 0.16596315801143646, "learning_rate": 0.0002, "epoch": 0.29241055029020646, "step": 1990}, {"loss": 0.8912, "grad_norm": 0.20588211715221405, "learning_rate": 0.0002, "epoch": 0.2938799500404085, "step": 2000}, {"loss": 0.8375, "grad_norm": 0.20303083956241608, "learning_rate": 0.0002, "epoch": 0.29534934979061056, "step": 2010}, {"loss": 0.9014, "grad_norm": 0.23940697312355042, "learning_rate": 0.0002, "epoch": 0.29681874954081255, "step": 2020}, {"loss": 0.9149, "grad_norm": 0.16720391809940338, "learning_rate": 0.0002, "epoch": 0.2982881492910146, "step": 2030}, {"loss": 0.8547, "grad_norm": 0.1968228816986084, "learning_rate": 0.0002, "epoch": 0.29975754904121665, "step": 2040}, {"loss": 0.9042, "grad_norm": 0.19703173637390137, "learning_rate": 0.0002, "epoch": 0.3012269487914187, "step": 2050}, {"loss": 0.8833, "grad_norm": 0.19083744287490845, "learning_rate": 0.0002, "epoch": 0.30269634854162075, "step": 2060}, {"loss": 0.8503, "grad_norm": 0.16350123286247253, "learning_rate": 0.0002, "epoch": 0.3041657482918228, "step": 2070}, {"loss": 0.8652, "grad_norm": 0.16439028084278107, "learning_rate": 0.0002, "epoch": 0.30563514804202485, "step": 2080}, {"loss": 0.875, "grad_norm": 0.22233879566192627, "learning_rate": 0.0002, "epoch": 0.3071045477922269, "step": 2090}, {"loss": 0.875, "grad_norm": 0.15111422538757324, "learning_rate": 0.0002, "epoch": 0.3085739475424289, "step": 2100}, {"loss": 0.9062, "grad_norm": 0.19138678908348083, "learning_rate": 0.0002, "epoch": 0.31004334729263094, "step": 2110}, {"loss": 0.8432, "grad_norm": 0.21213558316230774, "learning_rate": 0.0002, "epoch": 0.311512747042833, "step": 2120}, {"loss": 0.9253, "grad_norm": 0.18738001585006714, "learning_rate": 0.0002, "epoch": 0.31298214679303504, "step": 2130}, {"loss": 0.9364, "grad_norm": 0.2186465561389923, "learning_rate": 0.0002, "epoch": 0.3144515465432371, "step": 2140}, {"loss": 0.878, "grad_norm": 0.17202387750148773, "learning_rate": 0.0002, "epoch": 0.31592094629343914, "step": 2150}, {"loss": 0.9072, "grad_norm": 0.15669584274291992, "learning_rate": 0.0002, "epoch": 0.3173903460436412, "step": 2160}, {"loss": 0.9071, "grad_norm": 0.2147863358259201, "learning_rate": 0.0002, "epoch": 0.31885974579384324, "step": 2170}, {"loss": 0.8665, "grad_norm": 0.1684272140264511, "learning_rate": 0.0002, "epoch": 0.32032914554404524, "step": 2180}, {"loss": 0.8506, "grad_norm": 0.1736169308423996, "learning_rate": 0.0002, "epoch": 0.3217985452942473, "step": 2190}, {"loss": 0.8258, "grad_norm": 0.17526443302631378, "learning_rate": 0.0002, "epoch": 0.32326794504444933, "step": 2200}, {"loss": 0.8162, "grad_norm": 0.18431158363819122, "learning_rate": 0.0002, "epoch": 0.3247373447946514, "step": 2210}, {"loss": 0.9071, "grad_norm": 0.2074470967054367, "learning_rate": 0.0002, "epoch": 0.32620674454485343, "step": 2220}, {"loss": 0.8833, "grad_norm": 0.17141957581043243, "learning_rate": 0.0002, "epoch": 0.3276761442950555, "step": 2230}, {"loss": 0.8668, "grad_norm": 0.20623619854450226, "learning_rate": 0.0002, "epoch": 0.32914554404525753, "step": 2240}, {"loss": 0.8861, "grad_norm": 0.17226505279541016, "learning_rate": 0.0002, "epoch": 0.3306149437954596, "step": 2250}, {"loss": 0.9226, "grad_norm": 0.17701441049575806, "learning_rate": 0.0002, "epoch": 0.3320843435456616, "step": 2260}, {"loss": 0.906, "grad_norm": 0.20146964490413666, "learning_rate": 0.0002, "epoch": 0.3335537432958636, "step": 2270}, {"loss": 0.8344, "grad_norm": 0.15297937393188477, "learning_rate": 0.0002, "epoch": 0.3350231430460657, "step": 2280}, {"loss": 0.8744, "grad_norm": 0.18793007731437683, "learning_rate": 0.0002, "epoch": 0.3364925427962677, "step": 2290}, {"loss": 0.8857, "grad_norm": 0.1594923436641693, "learning_rate": 0.0002, "epoch": 0.3379619425464698, "step": 2300}, {"loss": 0.8752, "grad_norm": 0.17530961334705353, "learning_rate": 0.0002, "epoch": 0.3394313422966718, "step": 2310}, {"loss": 0.8858, "grad_norm": 0.15771639347076416, "learning_rate": 0.0002, "epoch": 0.3409007420468739, "step": 2320}, {"loss": 0.8317, "grad_norm": 0.15540577471256256, "learning_rate": 0.0002, "epoch": 0.3423701417970759, "step": 2330}, {"loss": 0.8512, "grad_norm": 0.19260241091251373, "learning_rate": 0.0002, "epoch": 0.3438395415472779, "step": 2340}, {"loss": 0.9256, "grad_norm": 0.14925982058048248, "learning_rate": 0.0002, "epoch": 0.34530894129747997, "step": 2350}, {"loss": 0.9234, "grad_norm": 0.20382159948349, "learning_rate": 0.0002, "epoch": 0.346778341047682, "step": 2360}, {"loss": 0.9263, "grad_norm": 0.2570635676383972, "learning_rate": 0.0002, "epoch": 0.34824774079788406, "step": 2370}, {"loss": 0.8193, "grad_norm": 0.16751402616500854, "learning_rate": 0.0002, "epoch": 0.3497171405480861, "step": 2380}, {"loss": 0.8946, "grad_norm": 0.16061227023601532, "learning_rate": 0.0002, "epoch": 0.35118654029828816, "step": 2390}, {"loss": 0.8727, "grad_norm": 0.16183720529079437, "learning_rate": 0.0002, "epoch": 0.3526559400484902, "step": 2400}, {"loss": 0.8721, "grad_norm": 0.17135287821292877, "learning_rate": 0.0002, "epoch": 0.3541253397986922, "step": 2410}, {"loss": 0.853, "grad_norm": 0.16616292297840118, "learning_rate": 0.0002, "epoch": 0.35559473954889426, "step": 2420}, {"loss": 0.9202, "grad_norm": 0.1661687046289444, "learning_rate": 0.0002, "epoch": 0.3570641392990963, "step": 2430}, {"loss": 0.8626, "grad_norm": 0.17011168599128723, "learning_rate": 0.0002, "epoch": 0.35853353904929836, "step": 2440}, {"loss": 0.8357, "grad_norm": 0.16639764606952667, "learning_rate": 0.0002, "epoch": 0.3600029387995004, "step": 2450}, {"loss": 0.8876, "grad_norm": 0.16195109486579895, "learning_rate": 0.0002, "epoch": 0.36147233854970245, "step": 2460}, {"loss": 0.8434, "grad_norm": 0.3125733435153961, "learning_rate": 0.0002, "epoch": 0.3629417382999045, "step": 2470}, {"loss": 0.9052, "grad_norm": 0.16183999180793762, "learning_rate": 0.0002, "epoch": 0.36441113805010655, "step": 2480}, {"loss": 0.8391, "grad_norm": 0.17696015536785126, "learning_rate": 0.0002, "epoch": 0.36588053780030855, "step": 2490}, {"loss": 0.8942, "grad_norm": 0.1957734078168869, "learning_rate": 0.0002, "epoch": 0.3673499375505106, "step": 2500}, {"loss": 0.8834, "grad_norm": 0.179367333650589, "learning_rate": 0.0002, "epoch": 0.36881933730071265, "step": 2510}, {"loss": 0.8642, "grad_norm": 0.15720799565315247, "learning_rate": 0.0002, "epoch": 0.3702887370509147, "step": 2520}, {"loss": 0.8663, "grad_norm": 0.20373153686523438, "learning_rate": 0.0002, "epoch": 0.37175813680111675, "step": 2530}, {"loss": 0.8464, "grad_norm": 0.14838048815727234, "learning_rate": 0.0002, "epoch": 0.3732275365513188, "step": 2540}, {"loss": 0.8909, "grad_norm": 0.24546244740486145, "learning_rate": 0.0002, "epoch": 0.37469693630152084, "step": 2550}, {"loss": 0.923, "grad_norm": 0.18115679919719696, "learning_rate": 0.0002, "epoch": 0.3761663360517229, "step": 2560}, {"loss": 0.9195, "grad_norm": 0.19443969428539276, "learning_rate": 0.0002, "epoch": 0.3776357358019249, "step": 2570}, {"loss": 0.8226, "grad_norm": 0.17321591079235077, "learning_rate": 0.0002, "epoch": 0.37910513555212694, "step": 2580}, {"loss": 0.9381, "grad_norm": 0.2034158706665039, "learning_rate": 0.0002, "epoch": 0.380574535302329, "step": 2590}, {"loss": 0.8669, "grad_norm": 0.16666273772716522, "learning_rate": 0.0002, "epoch": 0.38204393505253104, "step": 2600}, {"loss": 0.8755, "grad_norm": 0.2102932333946228, "learning_rate": 0.0002, "epoch": 0.3835133348027331, "step": 2610}, {"loss": 0.9036, "grad_norm": 0.19231173396110535, "learning_rate": 0.0002, "epoch": 0.38498273455293514, "step": 2620}, {"loss": 0.8819, "grad_norm": 0.19412517547607422, "learning_rate": 0.0002, "epoch": 0.3864521343031372, "step": 2630}, {"loss": 0.8581, "grad_norm": 0.16703838109970093, "learning_rate": 0.0002, "epoch": 0.38792153405333923, "step": 2640}, {"loss": 0.882, "grad_norm": 0.17818975448608398, "learning_rate": 0.0002, "epoch": 0.38939093380354123, "step": 2650}, {"loss": 0.8479, "grad_norm": 0.19919253885746002, "learning_rate": 0.0002, "epoch": 0.3908603335537433, "step": 2660}, {"loss": 0.9016, "grad_norm": 0.2024874985218048, "learning_rate": 0.0002, "epoch": 0.3923297333039453, "step": 2670}, {"loss": 0.8804, "grad_norm": 0.17316050827503204, "learning_rate": 0.0002, "epoch": 0.3937991330541474, "step": 2680}, {"loss": 0.8963, "grad_norm": 0.20312771201133728, "learning_rate": 0.0002, "epoch": 0.3952685328043494, "step": 2690}, {"loss": 0.8526, "grad_norm": 0.19131846725940704, "learning_rate": 0.0002, "epoch": 0.3967379325545515, "step": 2700}, {"loss": 0.8562, "grad_norm": 0.22599078714847565, "learning_rate": 0.0002, "epoch": 0.3982073323047535, "step": 2710}, {"loss": 0.8878, "grad_norm": 0.19016793370246887, "learning_rate": 0.0002, "epoch": 0.3996767320549556, "step": 2720}, {"loss": 0.9445, "grad_norm": 0.18586859107017517, "learning_rate": 0.0002, "epoch": 0.40114613180515757, "step": 2730}, {"loss": 0.9231, "grad_norm": 0.17405042052268982, "learning_rate": 0.0002, "epoch": 0.4026155315553596, "step": 2740}, {"loss": 0.8885, "grad_norm": 0.18743965029716492, "learning_rate": 0.0002, "epoch": 0.40408493130556167, "step": 2750}, {"loss": 0.9278, "grad_norm": 0.17220088839530945, "learning_rate": 0.0002, "epoch": 0.4055543310557637, "step": 2760}, {"loss": 0.9378, "grad_norm": 0.19248218834400177, "learning_rate": 0.0002, "epoch": 0.40702373080596577, "step": 2770}, {"loss": 0.8821, "grad_norm": 0.16138921678066254, "learning_rate": 0.0002, "epoch": 0.4084931305561678, "step": 2780}, {"loss": 0.8409, "grad_norm": 0.21790088713169098, "learning_rate": 0.0002, "epoch": 0.40996253030636987, "step": 2790}, {"loss": 0.9048, "grad_norm": 0.2847873866558075, "learning_rate": 0.0002, "epoch": 0.4114319300565719, "step": 2800}, {"loss": 0.9497, "grad_norm": 0.17424573004245758, "learning_rate": 0.0002, "epoch": 0.4129013298067739, "step": 2810}, {"loss": 0.8809, "grad_norm": 0.18194137513637543, "learning_rate": 0.0002, "epoch": 0.41437072955697596, "step": 2820}, {"loss": 0.8352, "grad_norm": 0.183488130569458, "learning_rate": 0.0002, "epoch": 0.415840129307178, "step": 2830}, {"loss": 0.9076, "grad_norm": 0.17924755811691284, "learning_rate": 0.0002, "epoch": 0.41730952905738006, "step": 2840}, {"loss": 0.9184, "grad_norm": 0.20655010640621185, "learning_rate": 0.0002, "epoch": 0.4187789288075821, "step": 2850}, {"loss": 0.9441, "grad_norm": 0.18873105943202972, "learning_rate": 0.0002, "epoch": 0.42024832855778416, "step": 2860}, {"loss": 0.8967, "grad_norm": 0.16043813526630402, "learning_rate": 0.0002, "epoch": 0.4217177283079862, "step": 2870}, {"loss": 0.8878, "grad_norm": 0.21485184133052826, "learning_rate": 0.0002, "epoch": 0.42318712805818826, "step": 2880}, {"loss": 0.9092, "grad_norm": 0.17327025532722473, "learning_rate": 0.0002, "epoch": 0.42465652780839025, "step": 2890}, {"loss": 0.8927, "grad_norm": 0.1703249216079712, "learning_rate": 0.0002, "epoch": 0.4261259275585923, "step": 2900}, {"loss": 0.8994, "grad_norm": 0.16202223300933838, "learning_rate": 0.0002, "epoch": 0.42759532730879435, "step": 2910}, {"loss": 0.882, "grad_norm": 0.16794748604297638, "learning_rate": 0.0002, "epoch": 0.4290647270589964, "step": 2920}, {"loss": 0.8518, "grad_norm": 0.20017556846141815, "learning_rate": 0.0002, "epoch": 0.43053412680919845, "step": 2930}, {"loss": 0.9317, "grad_norm": 0.21763676404953003, "learning_rate": 0.0002, "epoch": 0.4320035265594005, "step": 2940}, {"loss": 0.9208, "grad_norm": 0.1905764788389206, "learning_rate": 0.0002, "epoch": 0.43347292630960255, "step": 2950}, {"loss": 0.9261, "grad_norm": 0.19738046824932098, "learning_rate": 0.0002, "epoch": 0.4349423260598046, "step": 2960}, {"loss": 0.8966, "grad_norm": 0.2011863738298416, "learning_rate": 0.0002, "epoch": 0.4364117258100066, "step": 2970}, {"loss": 0.9385, "grad_norm": 0.18249142169952393, "learning_rate": 0.0002, "epoch": 0.43788112556020864, "step": 2980}, {"loss": 0.8505, "grad_norm": 0.20251347124576569, "learning_rate": 0.0002, "epoch": 0.4393505253104107, "step": 2990}, {"loss": 0.9161, "grad_norm": 0.21611613035202026, "learning_rate": 0.0002, "epoch": 0.44081992506061274, "step": 3000}, {"loss": 0.8534, "grad_norm": 0.17481130361557007, "learning_rate": 0.0002, "epoch": 0.4422893248108148, "step": 3010}, {"loss": 0.8569, "grad_norm": 0.20249882340431213, "learning_rate": 0.0002, "epoch": 0.44375872456101684, "step": 3020}, {"loss": 0.8799, "grad_norm": 0.1865909844636917, "learning_rate": 0.0002, "epoch": 0.4452281243112189, "step": 3030}, {"loss": 0.8818, "grad_norm": 0.1844954788684845, "learning_rate": 0.0002, "epoch": 0.44669752406142094, "step": 3040}, {"loss": 0.8368, "grad_norm": 0.19112421572208405, "learning_rate": 0.0002, "epoch": 0.44816692381162293, "step": 3050}, {"loss": 0.8489, "grad_norm": 0.1842113435268402, "learning_rate": 0.0002, "epoch": 0.449636323561825, "step": 3060}, {"loss": 0.885, "grad_norm": 0.17375756800174713, "learning_rate": 0.0002, "epoch": 0.45110572331202703, "step": 3070}, {"loss": 0.8701, "grad_norm": 0.20470349490642548, "learning_rate": 0.0002, "epoch": 0.4525751230622291, "step": 3080}, {"loss": 0.9216, "grad_norm": 0.23505260050296783, "learning_rate": 0.0002, "epoch": 0.45404452281243113, "step": 3090}, {"loss": 0.8375, "grad_norm": 0.1842467337846756, "learning_rate": 0.0002, "epoch": 0.4555139225626332, "step": 3100}, {"loss": 0.8474, "grad_norm": 0.16366849839687347, "learning_rate": 0.0002, "epoch": 0.4569833223128352, "step": 3110}, {"loss": 0.8909, "grad_norm": 0.16462667286396027, "learning_rate": 0.0002, "epoch": 0.4584527220630373, "step": 3120}, {"loss": 0.8656, "grad_norm": 0.18586328625679016, "learning_rate": 0.0002, "epoch": 0.45992212181323927, "step": 3130}, {"loss": 0.8554, "grad_norm": 0.1920444518327713, "learning_rate": 0.0002, "epoch": 0.4613915215634413, "step": 3140}, {"loss": 0.8936, "grad_norm": 0.19603130221366882, "learning_rate": 0.0002, "epoch": 0.46286092131364337, "step": 3150}, {"loss": 0.8784, "grad_norm": 0.18018363416194916, "learning_rate": 0.0002, "epoch": 0.4643303210638454, "step": 3160}, {"loss": 0.8397, "grad_norm": 0.18458117544651031, "learning_rate": 0.0002, "epoch": 0.46579972081404747, "step": 3170}, {"loss": 0.8143, "grad_norm": 0.19677187502384186, "learning_rate": 0.0002, "epoch": 0.4672691205642495, "step": 3180}, {"loss": 0.8814, "grad_norm": 0.18515309691429138, "learning_rate": 0.0002, "epoch": 0.46873852031445157, "step": 3190}, {"loss": 0.8828, "grad_norm": 0.210871160030365, "learning_rate": 0.0002, "epoch": 0.47020792006465356, "step": 3200}, {"loss": 0.8718, "grad_norm": 0.20154137909412384, "learning_rate": 0.0002, "epoch": 0.4716773198148556, "step": 3210}, {"loss": 0.89, "grad_norm": 0.16787075996398926, "learning_rate": 0.0002, "epoch": 0.47314671956505766, "step": 3220}, {"loss": 0.859, "grad_norm": 0.17295874655246735, "learning_rate": 0.0002, "epoch": 0.4746161193152597, "step": 3230}, {"loss": 0.8983, "grad_norm": 0.22389985620975494, "learning_rate": 0.0002, "epoch": 0.47608551906546176, "step": 3240}, {"loss": 0.8467, "grad_norm": 0.1770496368408203, "learning_rate": 0.0002, "epoch": 0.4775549188156638, "step": 3250}, {"loss": 0.8171, "grad_norm": 0.1780874878168106, "learning_rate": 0.0002, "epoch": 0.47902431856586586, "step": 3260}, {"loss": 0.9376, "grad_norm": 0.18358926475048065, "learning_rate": 0.0002, "epoch": 0.4804937183160679, "step": 3270}, {"loss": 0.8456, "grad_norm": 0.20781372487545013, "learning_rate": 0.0002, "epoch": 0.4819631180662699, "step": 3280}, {"loss": 0.9027, "grad_norm": 0.22318099439144135, "learning_rate": 0.0002, "epoch": 0.48343251781647195, "step": 3290}, {"loss": 0.8592, "grad_norm": 0.17822790145874023, "learning_rate": 0.0002, "epoch": 0.484901917566674, "step": 3300}, {"loss": 0.8605, "grad_norm": 0.16665315628051758, "learning_rate": 0.0002, "epoch": 0.48637131731687605, "step": 3310}, {"loss": 0.912, "grad_norm": 0.1579129546880722, "learning_rate": 0.0002, "epoch": 0.4878407170670781, "step": 3320}, {"loss": 0.923, "grad_norm": 0.20965060591697693, "learning_rate": 0.0002, "epoch": 0.48931011681728015, "step": 3330}, {"loss": 0.872, "grad_norm": 0.1992150843143463, "learning_rate": 0.0002, "epoch": 0.4907795165674822, "step": 3340}, {"loss": 0.9394, "grad_norm": 0.1955576241016388, "learning_rate": 0.0002, "epoch": 0.49224891631768425, "step": 3350}, {"loss": 0.8767, "grad_norm": 0.20882335305213928, "learning_rate": 0.0002, "epoch": 0.49371831606788624, "step": 3360}, {"loss": 0.8616, "grad_norm": 0.1847611516714096, "learning_rate": 0.0002, "epoch": 0.4951877158180883, "step": 3370}, {"loss": 0.8153, "grad_norm": 0.18568037450313568, "learning_rate": 0.0002, "epoch": 0.49665711556829034, "step": 3380}, {"loss": 0.9437, "grad_norm": 0.20918430387973785, "learning_rate": 0.0002, "epoch": 0.4981265153184924, "step": 3390}, {"loss": 0.8762, "grad_norm": 0.19934356212615967, "learning_rate": 0.0002, "epoch": 0.49959591506869444, "step": 3400}, {"loss": 0.8881, "grad_norm": 0.19008080661296844, "learning_rate": 0.0002, "epoch": 0.5010653148188965, "step": 3410}, {"loss": 0.8631, "grad_norm": 0.20370124280452728, "learning_rate": 0.0002, "epoch": 0.5025347145690985, "step": 3420}, {"loss": 0.8598, "grad_norm": 0.20064856112003326, "learning_rate": 0.0002, "epoch": 0.5040041143193006, "step": 3430}, {"loss": 0.8732, "grad_norm": 0.17745134234428406, "learning_rate": 0.0002, "epoch": 0.5054735140695026, "step": 3440}, {"loss": 0.8645, "grad_norm": 0.1792389452457428, "learning_rate": 0.0002, "epoch": 0.5069429138197047, "step": 3450}, {"loss": 0.8349, "grad_norm": 0.1756240576505661, "learning_rate": 0.0002, "epoch": 0.5084123135699067, "step": 3460}, {"loss": 0.904, "grad_norm": 0.17075671255588531, "learning_rate": 0.0002, "epoch": 0.5098817133201088, "step": 3470}, {"loss": 0.8666, "grad_norm": 0.3933296501636505, "learning_rate": 0.0002, "epoch": 0.5113511130703108, "step": 3480}, {"loss": 0.896, "grad_norm": 0.1581103652715683, "learning_rate": 0.0002, "epoch": 0.5128205128205128, "step": 3490}, {"loss": 0.8566, "grad_norm": 0.17405696213245392, "learning_rate": 0.0002, "epoch": 0.5142899125707149, "step": 3500}, {"loss": 0.8875, "grad_norm": 0.21243174374103546, "learning_rate": 0.0002, "epoch": 0.5157593123209169, "step": 3510}, {"loss": 0.9123, "grad_norm": 0.1835220754146576, "learning_rate": 0.0002, "epoch": 0.517228712071119, "step": 3520}, {"loss": 0.8621, "grad_norm": 0.2090080976486206, "learning_rate": 0.0002, "epoch": 0.518698111821321, "step": 3530}, {"loss": 0.9009, "grad_norm": 0.20329876244068146, "learning_rate": 0.0002, "epoch": 0.5201675115715231, "step": 3540}, {"loss": 0.9046, "grad_norm": 0.19022735953330994, "learning_rate": 0.0002, "epoch": 0.5216369113217251, "step": 3550}, {"loss": 0.9165, "grad_norm": 0.2283077836036682, "learning_rate": 0.0002, "epoch": 0.5231063110719271, "step": 3560}, {"loss": 0.9032, "grad_norm": 0.20263773202896118, "learning_rate": 0.0002, "epoch": 0.5245757108221292, "step": 3570}, {"loss": 0.8496, "grad_norm": 0.18724724650382996, "learning_rate": 0.0002, "epoch": 0.5260451105723312, "step": 3580}, {"loss": 0.8864, "grad_norm": 0.17436717450618744, "learning_rate": 0.0002, "epoch": 0.5275145103225333, "step": 3590}, {"loss": 0.863, "grad_norm": 0.21936658024787903, "learning_rate": 0.0002, "epoch": 0.5289839100727353, "step": 3600}, {"loss": 0.8936, "grad_norm": 0.2073739618062973, "learning_rate": 0.0002, "epoch": 0.5304533098229374, "step": 3610}, {"loss": 0.8352, "grad_norm": 0.1949140727519989, "learning_rate": 0.0002, "epoch": 0.5319227095731394, "step": 3620}, {"loss": 0.8906, "grad_norm": 0.18793059885501862, "learning_rate": 0.0002, "epoch": 0.5333921093233415, "step": 3630}, {"loss": 0.8517, "grad_norm": 0.2857314348220825, "learning_rate": 0.0002, "epoch": 0.5348615090735435, "step": 3640}, {"loss": 0.8854, "grad_norm": 0.21001498401165009, "learning_rate": 0.0002, "epoch": 0.5363309088237455, "step": 3650}, {"loss": 0.8465, "grad_norm": 0.21162116527557373, "learning_rate": 0.0002, "epoch": 0.5378003085739476, "step": 3660}, {"loss": 0.9206, "grad_norm": 0.21106836199760437, "learning_rate": 0.0002, "epoch": 0.5392697083241496, "step": 3670}, {"loss": 0.8413, "grad_norm": 0.20596356689929962, "learning_rate": 0.0002, "epoch": 0.5407391080743517, "step": 3680}, {"loss": 0.8394, "grad_norm": 0.1906813085079193, "learning_rate": 0.0002, "epoch": 0.5422085078245537, "step": 3690}, {"loss": 0.8489, "grad_norm": 0.22221796214580536, "learning_rate": 0.0002, "epoch": 0.5436779075747558, "step": 3700}, {"loss": 0.8718, "grad_norm": 0.16922327876091003, "learning_rate": 0.0002, "epoch": 0.5451473073249578, "step": 3710}, {"loss": 0.8435, "grad_norm": 0.20244888961315155, "learning_rate": 0.0002, "epoch": 0.5466167070751597, "step": 3720}, {"loss": 0.8705, "grad_norm": 0.15896661579608917, "learning_rate": 0.0002, "epoch": 0.5480861068253619, "step": 3730}, {"loss": 0.8886, "grad_norm": 0.20930814743041992, "learning_rate": 0.0002, "epoch": 0.5495555065755638, "step": 3740}, {"loss": 0.9039, "grad_norm": 0.23652398586273193, "learning_rate": 0.0002, "epoch": 0.551024906325766, "step": 3750}, {"loss": 0.9013, "grad_norm": 0.2031068503856659, "learning_rate": 0.0002, "epoch": 0.552494306075968, "step": 3760}, {"loss": 0.8488, "grad_norm": 0.19994738698005676, "learning_rate": 0.0002, "epoch": 0.55396370582617, "step": 3770}, {"loss": 0.8628, "grad_norm": 0.2220754623413086, "learning_rate": 0.0002, "epoch": 0.555433105576372, "step": 3780}, {"loss": 0.9065, "grad_norm": 0.21490134298801422, "learning_rate": 0.0002, "epoch": 0.5569025053265741, "step": 3790}, {"loss": 0.8502, "grad_norm": 0.17468570172786713, "learning_rate": 0.0002, "epoch": 0.5583719050767761, "step": 3800}, {"loss": 0.8321, "grad_norm": 0.19589351117610931, "learning_rate": 0.0002, "epoch": 0.5598413048269781, "step": 3810}, {"loss": 0.898, "grad_norm": 0.1818351298570633, "learning_rate": 0.0002, "epoch": 0.5613107045771802, "step": 3820}, {"loss": 0.9198, "grad_norm": 0.18889200687408447, "learning_rate": 0.0002, "epoch": 0.5627801043273822, "step": 3830}, {"loss": 0.8555, "grad_norm": 0.17498554289340973, "learning_rate": 0.0002, "epoch": 0.5642495040775843, "step": 3840}, {"loss": 0.8819, "grad_norm": 0.2251986563205719, "learning_rate": 0.0002, "epoch": 0.5657189038277863, "step": 3850}, {"loss": 0.8668, "grad_norm": 0.2071862667798996, "learning_rate": 0.0002, "epoch": 0.5671883035779884, "step": 3860}, {"loss": 0.843, "grad_norm": 0.20290961861610413, "learning_rate": 0.0002, "epoch": 0.5686577033281904, "step": 3870}, {"loss": 0.9162, "grad_norm": 0.2014969438314438, "learning_rate": 0.0002, "epoch": 0.5701271030783924, "step": 3880}, {"loss": 0.8218, "grad_norm": 0.20613284409046173, "learning_rate": 0.0002, "epoch": 0.5715965028285945, "step": 3890}, {"loss": 0.8828, "grad_norm": 0.1892530471086502, "learning_rate": 0.0002, "epoch": 0.5730659025787965, "step": 3900}, {"loss": 0.8611, "grad_norm": 0.21220922470092773, "learning_rate": 0.0002, "epoch": 0.5745353023289986, "step": 3910}, {"loss": 0.8583, "grad_norm": 0.15298207104206085, "learning_rate": 0.0002, "epoch": 0.5760047020792006, "step": 3920}, {"loss": 0.8875, "grad_norm": 0.20951399207115173, "learning_rate": 0.0002, "epoch": 0.5774741018294027, "step": 3930}, {"loss": 0.9084, "grad_norm": 0.20798131823539734, "learning_rate": 0.0002, "epoch": 0.5789435015796047, "step": 3940}, {"loss": 0.858, "grad_norm": 0.20694024860858917, "learning_rate": 0.0002, "epoch": 0.5804129013298067, "step": 3950}, {"loss": 0.8708, "grad_norm": 0.1910879760980606, "learning_rate": 0.0002, "epoch": 0.5818823010800088, "step": 3960}, {"loss": 0.8774, "grad_norm": 0.21418365836143494, "learning_rate": 0.0002, "epoch": 0.5833517008302108, "step": 3970}, {"loss": 0.8772, "grad_norm": 0.19750036299228668, "learning_rate": 0.0002, "epoch": 0.5848211005804129, "step": 3980}, {"loss": 0.8892, "grad_norm": 0.19439974427223206, "learning_rate": 0.0002, "epoch": 0.5862905003306149, "step": 3990}, {"loss": 0.8967, "grad_norm": 0.2128968983888626, "learning_rate": 0.0002, "epoch": 0.587759900080817, "step": 4000}, {"loss": 0.8948, "grad_norm": 0.1731695979833603, "learning_rate": 0.0002, "epoch": 0.589229299831019, "step": 4010}, {"loss": 0.8779, "grad_norm": 0.2164681851863861, "learning_rate": 0.0002, "epoch": 0.5906986995812211, "step": 4020}, {"loss": 0.8376, "grad_norm": 0.19831566512584686, "learning_rate": 0.0002, "epoch": 0.5921680993314231, "step": 4030}, {"loss": 0.8949, "grad_norm": 0.19890721142292023, "learning_rate": 0.0002, "epoch": 0.5936374990816251, "step": 4040}, {"loss": 0.8508, "grad_norm": 0.2423945814371109, "learning_rate": 0.0002, "epoch": 0.5951068988318272, "step": 4050}, {"loss": 0.8902, "grad_norm": 0.2131136655807495, "learning_rate": 0.0002, "epoch": 0.5965762985820292, "step": 4060}, {"loss": 0.9167, "grad_norm": 0.20811966061592102, "learning_rate": 0.0002, "epoch": 0.5980456983322313, "step": 4070}, {"loss": 0.8838, "grad_norm": 0.17137253284454346, "learning_rate": 0.0002, "epoch": 0.5995150980824333, "step": 4080}, {"loss": 0.9575, "grad_norm": 0.21309101581573486, "learning_rate": 0.0002, "epoch": 0.6009844978326354, "step": 4090}, {"loss": 0.8579, "grad_norm": 0.20811927318572998, "learning_rate": 0.0002, "epoch": 0.6024538975828374, "step": 4100}, {"loss": 0.8647, "grad_norm": 0.18655802309513092, "learning_rate": 0.0002, "epoch": 0.6039232973330394, "step": 4110}, {"loss": 0.902, "grad_norm": 0.22078685462474823, "learning_rate": 0.0002, "epoch": 0.6053926970832415, "step": 4120}, {"loss": 0.9161, "grad_norm": 0.19692769646644592, "learning_rate": 0.0002, "epoch": 0.6068620968334435, "step": 4130}, {"loss": 0.919, "grad_norm": 0.21485823392868042, "learning_rate": 0.0002, "epoch": 0.6083314965836456, "step": 4140}, {"loss": 0.8226, "grad_norm": 0.27000460028648376, "learning_rate": 0.0002, "epoch": 0.6098008963338476, "step": 4150}, {"loss": 0.8794, "grad_norm": 0.17075423896312714, "learning_rate": 0.0002, "epoch": 0.6112702960840497, "step": 4160}, {"loss": 0.9004, "grad_norm": 0.20538625121116638, "learning_rate": 0.0002, "epoch": 0.6127396958342517, "step": 4170}, {"loss": 0.8919, "grad_norm": 0.19913120567798615, "learning_rate": 0.0002, "epoch": 0.6142090955844538, "step": 4180}, {"loss": 0.869, "grad_norm": 0.1864994317293167, "learning_rate": 0.0002, "epoch": 0.6156784953346558, "step": 4190}, {"loss": 0.8938, "grad_norm": 0.21280141174793243, "learning_rate": 0.0002, "epoch": 0.6171478950848578, "step": 4200}, {"loss": 0.9146, "grad_norm": 0.20486411452293396, "learning_rate": 0.0002, "epoch": 0.6186172948350599, "step": 4210}, {"loss": 0.8602, "grad_norm": 0.2367752641439438, "learning_rate": 0.0002, "epoch": 0.6200866945852619, "step": 4220}, {"loss": 0.8786, "grad_norm": 0.1956392228603363, "learning_rate": 0.0002, "epoch": 0.621556094335464, "step": 4230}, {"loss": 0.9115, "grad_norm": 0.20413194596767426, "learning_rate": 0.0002, "epoch": 0.623025494085666, "step": 4240}, {"loss": 0.8834, "grad_norm": 0.16064126789569855, "learning_rate": 0.0002, "epoch": 0.6244948938358681, "step": 4250}, {"loss": 0.8496, "grad_norm": 0.22033268213272095, "learning_rate": 0.0002, "epoch": 0.6259642935860701, "step": 4260}, {"loss": 0.8868, "grad_norm": 0.1988251656293869, "learning_rate": 0.0002, "epoch": 0.6274336933362721, "step": 4270}, {"loss": 0.8999, "grad_norm": 0.19203788042068481, "learning_rate": 0.0002, "epoch": 0.6289030930864742, "step": 4280}, {"loss": 0.8757, "grad_norm": 0.2187374085187912, "learning_rate": 0.0002, "epoch": 0.6303724928366762, "step": 4290}, {"loss": 0.9113, "grad_norm": 0.20379137992858887, "learning_rate": 0.0002, "epoch": 0.6318418925868783, "step": 4300}, {"loss": 0.9214, "grad_norm": 0.21784429252147675, "learning_rate": 0.0002, "epoch": 0.6333112923370803, "step": 4310}, {"loss": 0.8783, "grad_norm": 0.20314247906208038, "learning_rate": 0.0002, "epoch": 0.6347806920872824, "step": 4320}, {"loss": 0.9264, "grad_norm": 0.20686393976211548, "learning_rate": 0.0002, "epoch": 0.6362500918374844, "step": 4330}, {"loss": 0.8583, "grad_norm": 0.2880207896232605, "learning_rate": 0.0002, "epoch": 0.6377194915876865, "step": 4340}, {"loss": 0.943, "grad_norm": 0.21820850670337677, "learning_rate": 0.0002, "epoch": 0.6391888913378885, "step": 4350}, {"loss": 0.8918, "grad_norm": 0.21594226360321045, "learning_rate": 0.0002, "epoch": 0.6406582910880905, "step": 4360}, {"loss": 0.8693, "grad_norm": 0.19260689616203308, "learning_rate": 0.0002, "epoch": 0.6421276908382926, "step": 4370}, {"loss": 0.8695, "grad_norm": 0.21893763542175293, "learning_rate": 0.0002, "epoch": 0.6435970905884946, "step": 4380}, {"loss": 0.8184, "grad_norm": 0.20681653916835785, "learning_rate": 0.0002, "epoch": 0.6450664903386967, "step": 4390}, {"loss": 0.9174, "grad_norm": 0.2099543958902359, "learning_rate": 0.0002, "epoch": 0.6465358900888987, "step": 4400}, {"loss": 0.8681, "grad_norm": 0.17054030299186707, "learning_rate": 0.0002, "epoch": 0.6480052898391008, "step": 4410}, {"loss": 0.8865, "grad_norm": 0.24127532541751862, "learning_rate": 0.0002, "epoch": 0.6494746895893028, "step": 4420}, {"loss": 0.9023, "grad_norm": 0.21654072403907776, "learning_rate": 0.0002, "epoch": 0.6509440893395048, "step": 4430}, {"loss": 0.8551, "grad_norm": 0.20616871118545532, "learning_rate": 0.0002, "epoch": 0.6524134890897069, "step": 4440}, {"loss": 0.8559, "grad_norm": 0.17742393910884857, "learning_rate": 0.0002, "epoch": 0.6538828888399089, "step": 4450}, {"loss": 0.8544, "grad_norm": 0.20121945440769196, "learning_rate": 0.0002, "epoch": 0.655352288590111, "step": 4460}, {"loss": 0.9316, "grad_norm": 0.2139420062303543, "learning_rate": 0.0002, "epoch": 0.656821688340313, "step": 4470}, {"loss": 0.8729, "grad_norm": 0.20026612281799316, "learning_rate": 0.0002, "epoch": 0.6582910880905151, "step": 4480}, {"loss": 0.8409, "grad_norm": 0.189228355884552, "learning_rate": 0.0002, "epoch": 0.6597604878407171, "step": 4490}, {"loss": 0.8045, "grad_norm": 0.22036349773406982, "learning_rate": 0.0002, "epoch": 0.6612298875909192, "step": 4500}, {"loss": 0.8439, "grad_norm": 0.18675324320793152, "learning_rate": 0.0002, "epoch": 0.6626992873411212, "step": 4510}, {"loss": 0.8486, "grad_norm": 0.19283509254455566, "learning_rate": 0.0002, "epoch": 0.6641686870913232, "step": 4520}, {"loss": 0.8738, "grad_norm": 0.21325606107711792, "learning_rate": 0.0002, "epoch": 0.6656380868415253, "step": 4530}, {"loss": 0.871, "grad_norm": 0.19563665986061096, "learning_rate": 0.0002, "epoch": 0.6671074865917273, "step": 4540}, {"loss": 0.8543, "grad_norm": 0.18388667702674866, "learning_rate": 0.0002, "epoch": 0.6685768863419294, "step": 4550}, {"loss": 0.9017, "grad_norm": 0.18575367331504822, "learning_rate": 0.0002, "epoch": 0.6700462860921313, "step": 4560}, {"loss": 0.8777, "grad_norm": 0.17234192788600922, "learning_rate": 0.0002, "epoch": 0.6715156858423335, "step": 4570}, {"loss": 0.9109, "grad_norm": 0.1903233677148819, "learning_rate": 0.0002, "epoch": 0.6729850855925354, "step": 4580}, {"loss": 0.8768, "grad_norm": 0.1852804273366928, "learning_rate": 0.0002, "epoch": 0.6744544853427374, "step": 4590}, {"loss": 0.8573, "grad_norm": 0.19101595878601074, "learning_rate": 0.0002, "epoch": 0.6759238850929395, "step": 4600}, {"loss": 0.8766, "grad_norm": 0.22025969624519348, "learning_rate": 0.0002, "epoch": 0.6773932848431415, "step": 4610}, {"loss": 0.8476, "grad_norm": 0.19934341311454773, "learning_rate": 0.0002, "epoch": 0.6788626845933436, "step": 4620}, {"loss": 0.8833, "grad_norm": 0.19982978701591492, "learning_rate": 0.0002, "epoch": 0.6803320843435456, "step": 4630}, {"loss": 0.8266, "grad_norm": 0.16410236060619354, "learning_rate": 0.0002, "epoch": 0.6818014840937477, "step": 4640}, {"loss": 0.8745, "grad_norm": 0.24662552773952484, "learning_rate": 0.0002, "epoch": 0.6832708838439497, "step": 4650}, {"loss": 0.8566, "grad_norm": 0.28984692692756653, "learning_rate": 0.0002, "epoch": 0.6847402835941518, "step": 4660}, {"loss": 0.8067, "grad_norm": 0.21145619451999664, "learning_rate": 0.0002, "epoch": 0.6862096833443538, "step": 4670}, {"loss": 0.8556, "grad_norm": 0.19237872958183289, "learning_rate": 0.0002, "epoch": 0.6876790830945558, "step": 4680}, {"loss": 0.8318, "grad_norm": 0.1729259043931961, "learning_rate": 0.0002, "epoch": 0.6891484828447579, "step": 4690}, {"loss": 0.8534, "grad_norm": 0.1768421232700348, "learning_rate": 0.0002, "epoch": 0.6906178825949599, "step": 4700}, {"loss": 0.9205, "grad_norm": 0.20244674384593964, "learning_rate": 0.0002, "epoch": 0.692087282345162, "step": 4710}, {"loss": 0.9373, "grad_norm": 0.2057008296251297, "learning_rate": 0.0002, "epoch": 0.693556682095364, "step": 4720}, {"loss": 0.8581, "grad_norm": 0.23223090171813965, "learning_rate": 0.0002, "epoch": 0.6950260818455661, "step": 4730}, {"loss": 0.8422, "grad_norm": 0.2055569887161255, "learning_rate": 0.0002, "epoch": 0.6964954815957681, "step": 4740}, {"loss": 0.8704, "grad_norm": 0.22745701670646667, "learning_rate": 0.0002, "epoch": 0.6979648813459701, "step": 4750}, {"loss": 0.8722, "grad_norm": 0.20105138421058655, "learning_rate": 0.0002, "epoch": 0.6994342810961722, "step": 4760}, {"loss": 0.8539, "grad_norm": 0.1754033863544464, "learning_rate": 0.0002, "epoch": 0.7009036808463742, "step": 4770}, {"loss": 0.916, "grad_norm": 0.1993197798728943, "learning_rate": 0.0002, "epoch": 0.7023730805965763, "step": 4780}, {"loss": 0.8007, "grad_norm": 0.1936049610376358, "learning_rate": 0.0002, "epoch": 0.7038424803467783, "step": 4790}, {"loss": 0.8574, "grad_norm": 0.20394669473171234, "learning_rate": 0.0002, "epoch": 0.7053118800969804, "step": 4800}, {"loss": 0.8695, "grad_norm": 0.17336945235729218, "learning_rate": 0.0002, "epoch": 0.7067812798471824, "step": 4810}, {"loss": 0.8555, "grad_norm": 0.21182642877101898, "learning_rate": 0.0002, "epoch": 0.7082506795973844, "step": 4820}, {"loss": 0.8746, "grad_norm": 0.22350825369358063, "learning_rate": 0.0002, "epoch": 0.7097200793475865, "step": 4830}, {"loss": 0.8739, "grad_norm": 0.19174501299858093, "learning_rate": 0.0002, "epoch": 0.7111894790977885, "step": 4840}, {"loss": 0.8703, "grad_norm": 0.2016707956790924, "learning_rate": 0.0002, "epoch": 0.7126588788479906, "step": 4850}, {"loss": 0.9153, "grad_norm": 0.18826618790626526, "learning_rate": 0.0002, "epoch": 0.7141282785981926, "step": 4860}, {"loss": 0.9162, "grad_norm": 0.22155073285102844, "learning_rate": 0.0002, "epoch": 0.7155976783483947, "step": 4870}, {"loss": 0.8619, "grad_norm": 0.19846324622631073, "learning_rate": 0.0002, "epoch": 0.7170670780985967, "step": 4880}, {"loss": 0.8795, "grad_norm": 0.1801115870475769, "learning_rate": 0.0002, "epoch": 0.7185364778487988, "step": 4890}, {"loss": 0.8593, "grad_norm": 0.1965809315443039, "learning_rate": 0.0002, "epoch": 0.7200058775990008, "step": 4900}, {"loss": 0.9257, "grad_norm": 0.24628283083438873, "learning_rate": 0.0002, "epoch": 0.7214752773492028, "step": 4910}, {"loss": 0.7985, "grad_norm": 0.17960895597934723, "learning_rate": 0.0002, "epoch": 0.7229446770994049, "step": 4920}, {"loss": 0.909, "grad_norm": 0.20084136724472046, "learning_rate": 0.0002, "epoch": 0.7244140768496069, "step": 4930}, {"loss": 0.8605, "grad_norm": 0.20200081169605255, "learning_rate": 0.0002, "epoch": 0.725883476599809, "step": 4940}, {"loss": 0.8883, "grad_norm": 0.1855274736881256, "learning_rate": 0.0002, "epoch": 0.727352876350011, "step": 4950}, {"loss": 0.9093, "grad_norm": 0.1796240210533142, "learning_rate": 0.0002, "epoch": 0.7288222761002131, "step": 4960}, {"loss": 0.8482, "grad_norm": 0.16853567957878113, "learning_rate": 0.0002, "epoch": 0.7302916758504151, "step": 4970}, {"loss": 0.8639, "grad_norm": 0.16957546770572662, "learning_rate": 0.0002, "epoch": 0.7317610756006171, "step": 4980}, {"loss": 0.8676, "grad_norm": 0.22221924364566803, "learning_rate": 0.0002, "epoch": 0.7332304753508192, "step": 4990}, {"loss": 0.847, "grad_norm": 0.19172297418117523, "learning_rate": 0.0002, "epoch": 0.7346998751010212, "step": 5000}, {"loss": 0.8611, "grad_norm": 0.1819739043712616, "learning_rate": 0.0002, "epoch": 0.7361692748512233, "step": 5010}, {"loss": 0.9207, "grad_norm": 0.19842708110809326, "learning_rate": 0.0002, "epoch": 0.7376386746014253, "step": 5020}, {"loss": 0.9317, "grad_norm": 0.21730005741119385, "learning_rate": 0.0002, "epoch": 0.7391080743516274, "step": 5030}, {"loss": 0.8711, "grad_norm": 0.19902893900871277, "learning_rate": 0.0002, "epoch": 0.7405774741018294, "step": 5040}, {"loss": 0.9127, "grad_norm": 0.32140645384788513, "learning_rate": 0.0002, "epoch": 0.7420468738520315, "step": 5050}, {"loss": 0.8503, "grad_norm": 0.19738434255123138, "learning_rate": 0.0002, "epoch": 0.7435162736022335, "step": 5060}, {"loss": 0.8595, "grad_norm": 0.20060673356056213, "learning_rate": 0.0002, "epoch": 0.7449856733524355, "step": 5070}, {"loss": 0.8745, "grad_norm": 0.2181681990623474, "learning_rate": 0.0002, "epoch": 0.7464550731026376, "step": 5080}, {"loss": 0.8158, "grad_norm": 0.17173151671886444, "learning_rate": 0.0002, "epoch": 0.7479244728528396, "step": 5090}, {"loss": 0.8731, "grad_norm": 0.20893409848213196, "learning_rate": 0.0002, "epoch": 0.7493938726030417, "step": 5100}, {"loss": 0.8195, "grad_norm": 0.2332800328731537, "learning_rate": 0.0002, "epoch": 0.7508632723532437, "step": 5110}, {"loss": 0.8858, "grad_norm": 0.1776638776063919, "learning_rate": 0.0002, "epoch": 0.7523326721034458, "step": 5120}, {"loss": 0.9075, "grad_norm": 0.22636181116104126, "learning_rate": 0.0002, "epoch": 0.7538020718536478, "step": 5130}, {"loss": 0.9181, "grad_norm": 0.19356651604175568, "learning_rate": 0.0002, "epoch": 0.7552714716038498, "step": 5140}, {"loss": 0.8544, "grad_norm": 0.1688968986272812, "learning_rate": 0.0002, "epoch": 0.7567408713540519, "step": 5150}, {"loss": 0.8856, "grad_norm": 0.23556996881961823, "learning_rate": 0.0002, "epoch": 0.7582102711042539, "step": 5160}, {"loss": 0.8306, "grad_norm": 0.1970166265964508, "learning_rate": 0.0002, "epoch": 0.759679670854456, "step": 5170}, {"loss": 0.8523, "grad_norm": 0.21596881747245789, "learning_rate": 0.0002, "epoch": 0.761149070604658, "step": 5180}, {"loss": 0.852, "grad_norm": 0.2110803723335266, "learning_rate": 0.0002, "epoch": 0.7626184703548601, "step": 5190}, {"loss": 0.9321, "grad_norm": 0.17843499779701233, "learning_rate": 0.0002, "epoch": 0.7640878701050621, "step": 5200}, {"loss": 0.8705, "grad_norm": 0.20315982401371002, "learning_rate": 0.0002, "epoch": 0.7655572698552642, "step": 5210}, {"loss": 0.8578, "grad_norm": 0.20971283316612244, "learning_rate": 0.0002, "epoch": 0.7670266696054662, "step": 5220}, {"loss": 0.8508, "grad_norm": 0.2418205440044403, "learning_rate": 0.0002, "epoch": 0.7684960693556682, "step": 5230}, {"loss": 0.8774, "grad_norm": 0.23278863728046417, "learning_rate": 0.0002, "epoch": 0.7699654691058703, "step": 5240}, {"loss": 0.874, "grad_norm": 0.24266687035560608, "learning_rate": 0.0002, "epoch": 0.7714348688560723, "step": 5250}, {"loss": 0.8593, "grad_norm": 0.22577625513076782, "learning_rate": 0.0002, "epoch": 0.7729042686062744, "step": 5260}, {"loss": 0.8249, "grad_norm": 0.1861124336719513, "learning_rate": 0.0002, "epoch": 0.7743736683564764, "step": 5270}, {"loss": 0.9396, "grad_norm": 0.22679945826530457, "learning_rate": 0.0002, "epoch": 0.7758430681066785, "step": 5280}, {"loss": 0.8809, "grad_norm": 0.1723605990409851, "learning_rate": 0.0002, "epoch": 0.7773124678568805, "step": 5290}, {"loss": 0.8348, "grad_norm": 0.17632266879081726, "learning_rate": 0.0002, "epoch": 0.7787818676070825, "step": 5300}, {"loss": 0.856, "grad_norm": 0.21744535863399506, "learning_rate": 0.0002, "epoch": 0.7802512673572846, "step": 5310}, {"loss": 0.866, "grad_norm": 0.20521794259548187, "learning_rate": 0.0002, "epoch": 0.7817206671074866, "step": 5320}, {"loss": 0.8404, "grad_norm": 0.20621269941329956, "learning_rate": 0.0002, "epoch": 0.7831900668576887, "step": 5330}, {"loss": 0.9365, "grad_norm": 0.23038426041603088, "learning_rate": 0.0002, "epoch": 0.7846594666078907, "step": 5340}, {"loss": 0.8675, "grad_norm": 0.24003778398036957, "learning_rate": 0.0002, "epoch": 0.7861288663580928, "step": 5350}, {"loss": 0.8387, "grad_norm": 0.18108169734477997, "learning_rate": 0.0002, "epoch": 0.7875982661082948, "step": 5360}, {"loss": 0.8689, "grad_norm": 0.19610895216464996, "learning_rate": 0.0002, "epoch": 0.7890676658584969, "step": 5370}, {"loss": 0.8522, "grad_norm": 0.19682256877422333, "learning_rate": 0.0002, "epoch": 0.7905370656086989, "step": 5380}, {"loss": 0.8903, "grad_norm": 0.18536333739757538, "learning_rate": 0.0002, "epoch": 0.7920064653589008, "step": 5390}, {"loss": 0.847, "grad_norm": 0.1963324099779129, "learning_rate": 0.0002, "epoch": 0.793475865109103, "step": 5400}, {"loss": 0.8547, "grad_norm": 0.251597136259079, "learning_rate": 0.0002, "epoch": 0.794945264859305, "step": 5410}, {"loss": 0.8413, "grad_norm": 0.19656604528427124, "learning_rate": 0.0002, "epoch": 0.796414664609507, "step": 5420}, {"loss": 0.878, "grad_norm": 0.20754213631153107, "learning_rate": 0.0002, "epoch": 0.797884064359709, "step": 5430}, {"loss": 0.9426, "grad_norm": 0.23673921823501587, "learning_rate": 0.0002, "epoch": 0.7993534641099111, "step": 5440}, {"loss": 0.8233, "grad_norm": 0.19087398052215576, "learning_rate": 0.0002, "epoch": 0.8008228638601131, "step": 5450}, {"loss": 0.8205, "grad_norm": 0.212051123380661, "learning_rate": 0.0002, "epoch": 0.8022922636103151, "step": 5460}, {"loss": 0.8366, "grad_norm": 0.18929585814476013, "learning_rate": 0.0002, "epoch": 0.8037616633605172, "step": 5470}, {"loss": 0.7821, "grad_norm": 0.2048686146736145, "learning_rate": 0.0002, "epoch": 0.8052310631107192, "step": 5480}, {"loss": 0.9052, "grad_norm": 0.18554654717445374, "learning_rate": 0.0002, "epoch": 0.8067004628609213, "step": 5490}, {"loss": 0.8173, "grad_norm": 0.22007998824119568, "learning_rate": 0.0002, "epoch": 0.8081698626111233, "step": 5500}, {"loss": 0.8483, "grad_norm": 0.19007296860218048, "learning_rate": 0.0002, "epoch": 0.8096392623613254, "step": 5510}, {"loss": 0.863, "grad_norm": 0.21111422777175903, "learning_rate": 0.0002, "epoch": 0.8111086621115274, "step": 5520}, {"loss": 0.8633, "grad_norm": 0.21857836842536926, "learning_rate": 0.0002, "epoch": 0.8125780618617294, "step": 5530}, {"loss": 0.9347, "grad_norm": 0.215643048286438, "learning_rate": 0.0002, "epoch": 0.8140474616119315, "step": 5540}, {"loss": 0.8489, "grad_norm": 0.20151667296886444, "learning_rate": 0.0002, "epoch": 0.8155168613621335, "step": 5550}, {"loss": 0.8972, "grad_norm": 0.23732979595661163, "learning_rate": 0.0002, "epoch": 0.8169862611123356, "step": 5560}, {"loss": 0.821, "grad_norm": 0.260456919670105, "learning_rate": 0.0002, "epoch": 0.8184556608625376, "step": 5570}, {"loss": 0.8752, "grad_norm": 0.2437278777360916, "learning_rate": 0.0002, "epoch": 0.8199250606127397, "step": 5580}, {"loss": 0.8928, "grad_norm": 0.21646688878536224, "learning_rate": 0.0002, "epoch": 0.8213944603629417, "step": 5590}, {"loss": 0.8625, "grad_norm": 0.19661495089530945, "learning_rate": 0.0002, "epoch": 0.8228638601131438, "step": 5600}, {"loss": 0.8742, "grad_norm": 0.23855896294116974, "learning_rate": 0.0002, "epoch": 0.8243332598633458, "step": 5610}, {"loss": 0.888, "grad_norm": 0.2652871608734131, "learning_rate": 0.0002, "epoch": 0.8258026596135478, "step": 5620}, {"loss": 0.8737, "grad_norm": 0.23708868026733398, "learning_rate": 0.0002, "epoch": 0.8272720593637499, "step": 5630}, {"loss": 0.8802, "grad_norm": 0.24858877062797546, "learning_rate": 0.0002, "epoch": 0.8287414591139519, "step": 5640}, {"loss": 0.8607, "grad_norm": 0.1882055699825287, "learning_rate": 0.0002, "epoch": 0.830210858864154, "step": 5650}, {"loss": 0.8598, "grad_norm": 0.21917679905891418, "learning_rate": 0.0002, "epoch": 0.831680258614356, "step": 5660}, {"loss": 0.8688, "grad_norm": 0.18880417943000793, "learning_rate": 0.0002, "epoch": 0.8331496583645581, "step": 5670}, {"loss": 0.9342, "grad_norm": 0.21574261784553528, "learning_rate": 0.0002, "epoch": 0.8346190581147601, "step": 5680}, {"loss": 0.8554, "grad_norm": 0.18751873075962067, "learning_rate": 0.0002, "epoch": 0.8360884578649621, "step": 5690}, {"loss": 0.8405, "grad_norm": 0.2063598334789276, "learning_rate": 0.0002, "epoch": 0.8375578576151642, "step": 5700}, {"loss": 0.8892, "grad_norm": 0.21342656016349792, "learning_rate": 0.0002, "epoch": 0.8390272573653662, "step": 5710}, {"loss": 0.8858, "grad_norm": 0.2105468511581421, "learning_rate": 0.0002, "epoch": 0.8404966571155683, "step": 5720}, {"loss": 0.891, "grad_norm": 0.28539690375328064, "learning_rate": 0.0002, "epoch": 0.8419660568657703, "step": 5730}, {"loss": 0.8949, "grad_norm": 0.2243139147758484, "learning_rate": 0.0002, "epoch": 0.8434354566159724, "step": 5740}, {"loss": 0.8796, "grad_norm": 0.22701016068458557, "learning_rate": 0.0002, "epoch": 0.8449048563661744, "step": 5750}, {"loss": 0.8848, "grad_norm": 0.19109635055065155, "learning_rate": 0.0002, "epoch": 0.8463742561163765, "step": 5760}, {"loss": 0.8585, "grad_norm": 0.20332100987434387, "learning_rate": 0.0002, "epoch": 0.8478436558665785, "step": 5770}, {"loss": 0.8115, "grad_norm": 0.17545317113399506, "learning_rate": 0.0002, "epoch": 0.8493130556167805, "step": 5780}, {"loss": 0.8946, "grad_norm": 0.19831585884094238, "learning_rate": 0.0002, "epoch": 0.8507824553669826, "step": 5790}, {"loss": 0.8708, "grad_norm": 0.19500254094600677, "learning_rate": 0.0002, "epoch": 0.8522518551171846, "step": 5800}, {"loss": 0.8459, "grad_norm": 0.2453385889530182, "learning_rate": 0.0002, "epoch": 0.8537212548673867, "step": 5810}, {"loss": 0.8524, "grad_norm": 0.2186202108860016, "learning_rate": 0.0002, "epoch": 0.8551906546175887, "step": 5820}, {"loss": 0.9496, "grad_norm": 0.21183040738105774, "learning_rate": 0.0002, "epoch": 0.8566600543677908, "step": 5830}, {"loss": 0.8756, "grad_norm": 0.2365546077489853, "learning_rate": 0.0002, "epoch": 0.8581294541179928, "step": 5840}, {"loss": 0.9296, "grad_norm": 0.22473560273647308, "learning_rate": 0.0002, "epoch": 0.8595988538681948, "step": 5850}, {"loss": 0.8847, "grad_norm": 0.20596253871917725, "learning_rate": 0.0002, "epoch": 0.8610682536183969, "step": 5860}, {"loss": 0.8883, "grad_norm": 0.23390474915504456, "learning_rate": 0.0002, "epoch": 0.8625376533685989, "step": 5870}, {"loss": 0.8547, "grad_norm": 0.18226845562458038, "learning_rate": 0.0002, "epoch": 0.864007053118801, "step": 5880}, {"loss": 0.8397, "grad_norm": 0.1927943080663681, "learning_rate": 0.0002, "epoch": 0.865476452869003, "step": 5890}, {"loss": 0.8823, "grad_norm": 0.20829688012599945, "learning_rate": 0.0002, "epoch": 0.8669458526192051, "step": 5900}, {"loss": 0.8505, "grad_norm": 0.17833663523197174, "learning_rate": 0.0002, "epoch": 0.8684152523694071, "step": 5910}, {"loss": 0.8402, "grad_norm": 0.19247068464756012, "learning_rate": 0.0002, "epoch": 0.8698846521196092, "step": 5920}, {"loss": 0.8338, "grad_norm": 0.2055516242980957, "learning_rate": 0.0002, "epoch": 0.8713540518698112, "step": 5930}, {"loss": 0.9217, "grad_norm": 0.27160409092903137, "learning_rate": 0.0002, "epoch": 0.8728234516200132, "step": 5940}, {"loss": 0.8672, "grad_norm": 0.2160903513431549, "learning_rate": 0.0002, "epoch": 0.8742928513702153, "step": 5950}, {"loss": 0.8642, "grad_norm": 0.23171348869800568, "learning_rate": 0.0002, "epoch": 0.8757622511204173, "step": 5960}, {"loss": 0.9209, "grad_norm": 0.2006101757287979, "learning_rate": 0.0002, "epoch": 0.8772316508706194, "step": 5970}, {"loss": 0.8807, "grad_norm": 0.2423861026763916, "learning_rate": 0.0002, "epoch": 0.8787010506208214, "step": 5980}, {"loss": 0.9509, "grad_norm": 0.19111405313014984, "learning_rate": 0.0002, "epoch": 0.8801704503710235, "step": 5990}, {"loss": 0.9044, "grad_norm": 0.23003137111663818, "learning_rate": 0.0002, "epoch": 0.8816398501212255, "step": 6000}, {"loss": 0.8779, "grad_norm": 0.2220255732536316, "learning_rate": 0.0002, "epoch": 0.8831092498714275, "step": 6010}, {"loss": 0.8628, "grad_norm": 0.20620617270469666, "learning_rate": 0.0002, "epoch": 0.8845786496216296, "step": 6020}, {"loss": 0.8993, "grad_norm": 0.2374424934387207, "learning_rate": 0.0002, "epoch": 0.8860480493718316, "step": 6030}, {"loss": 0.8839, "grad_norm": 0.19202063977718353, "learning_rate": 0.0002, "epoch": 0.8875174491220337, "step": 6040}, {"loss": 0.9133, "grad_norm": 0.22009184956550598, "learning_rate": 0.0002, "epoch": 0.8889868488722357, "step": 6050}, {"loss": 0.8817, "grad_norm": 0.1983577460050583, "learning_rate": 0.0002, "epoch": 0.8904562486224378, "step": 6060}, {"loss": 0.9048, "grad_norm": 0.23484794795513153, "learning_rate": 0.0002, "epoch": 0.8919256483726398, "step": 6070}, {"loss": 0.8533, "grad_norm": 0.1894720047712326, "learning_rate": 0.0002, "epoch": 0.8933950481228419, "step": 6080}, {"loss": 0.8875, "grad_norm": 0.19840069115161896, "learning_rate": 0.0002, "epoch": 0.8948644478730439, "step": 6090}, {"loss": 0.9358, "grad_norm": 0.22224456071853638, "learning_rate": 0.0002, "epoch": 0.8963338476232459, "step": 6100}, {"loss": 0.9034, "grad_norm": 0.22123970091342926, "learning_rate": 0.0002, "epoch": 0.897803247373448, "step": 6110}, {"loss": 0.8504, "grad_norm": 0.19278573989868164, "learning_rate": 0.0002, "epoch": 0.89927264712365, "step": 6120}, {"loss": 0.874, "grad_norm": 0.3129579722881317, "learning_rate": 0.0002, "epoch": 0.9007420468738521, "step": 6130}, {"loss": 0.8456, "grad_norm": 0.2044374942779541, "learning_rate": 0.0002, "epoch": 0.9022114466240541, "step": 6140}, {"loss": 0.809, "grad_norm": 0.2066025733947754, "learning_rate": 0.0002, "epoch": 0.9036808463742562, "step": 6150}, {"loss": 0.8497, "grad_norm": 0.22208696603775024, "learning_rate": 0.0002, "epoch": 0.9051502461244582, "step": 6160}, {"loss": 0.8942, "grad_norm": 0.2257661372423172, "learning_rate": 0.0002, "epoch": 0.9066196458746602, "step": 6170}, {"loss": 0.8938, "grad_norm": 0.20444297790527344, "learning_rate": 0.0002, "epoch": 0.9080890456248623, "step": 6180}, {"loss": 0.8537, "grad_norm": 0.21641409397125244, "learning_rate": 0.0002, "epoch": 0.9095584453750643, "step": 6190}, {"loss": 0.8559, "grad_norm": 0.19348600506782532, "learning_rate": 0.0002, "epoch": 0.9110278451252664, "step": 6200}, {"loss": 0.8999, "grad_norm": 0.2026488333940506, "learning_rate": 0.0002, "epoch": 0.9124972448754683, "step": 6210}, {"loss": 0.8318, "grad_norm": 0.18936511874198914, "learning_rate": 0.0002, "epoch": 0.9139666446256705, "step": 6220}, {"loss": 0.8225, "grad_norm": 0.1924109011888504, "learning_rate": 0.0002, "epoch": 0.9154360443758724, "step": 6230}, {"loss": 0.8038, "grad_norm": 0.23843780159950256, "learning_rate": 0.0002, "epoch": 0.9169054441260746, "step": 6240}, {"loss": 0.8737, "grad_norm": 0.2209387719631195, "learning_rate": 0.0002, "epoch": 0.9183748438762765, "step": 6250}, {"loss": 0.8787, "grad_norm": 0.2409553974866867, "learning_rate": 0.0002, "epoch": 0.9198442436264785, "step": 6260}, {"loss": 0.8366, "grad_norm": 0.18180789053440094, "learning_rate": 0.0002, "epoch": 0.9213136433766806, "step": 6270}, {"loss": 0.868, "grad_norm": 0.2026936113834381, "learning_rate": 0.0002, "epoch": 0.9227830431268826, "step": 6280}, {"loss": 0.8656, "grad_norm": 0.17562644183635712, "learning_rate": 0.0002, "epoch": 0.9242524428770847, "step": 6290}, {"loss": 0.844, "grad_norm": 0.21019227802753448, "learning_rate": 0.0002, "epoch": 0.9257218426272867, "step": 6300}, {"loss": 0.8932, "grad_norm": 0.19504691660404205, "learning_rate": 0.0002, "epoch": 0.9271912423774888, "step": 6310}, {"loss": 0.8236, "grad_norm": 0.21586452424526215, "learning_rate": 0.0002, "epoch": 0.9286606421276908, "step": 6320}, {"loss": 0.8103, "grad_norm": 0.24656902253627777, "learning_rate": 0.0002, "epoch": 0.9301300418778928, "step": 6330}, {"loss": 0.8685, "grad_norm": 0.23663389682769775, "learning_rate": 0.0002, "epoch": 0.9315994416280949, "step": 6340}, {"loss": 0.8901, "grad_norm": 0.19826091825962067, "learning_rate": 0.0002, "epoch": 0.9330688413782969, "step": 6350}, {"loss": 0.8232, "grad_norm": 0.2152305692434311, "learning_rate": 0.0002, "epoch": 0.934538241128499, "step": 6360}, {"loss": 0.8913, "grad_norm": 0.18090355396270752, "learning_rate": 0.0002, "epoch": 0.936007640878701, "step": 6370}, {"loss": 0.8675, "grad_norm": 0.2050005942583084, "learning_rate": 0.0002, "epoch": 0.9374770406289031, "step": 6380}, {"loss": 0.8512, "grad_norm": 0.20134799182415009, "learning_rate": 0.0002, "epoch": 0.9389464403791051, "step": 6390}, {"loss": 0.9153, "grad_norm": 0.24282105267047882, "learning_rate": 0.0002, "epoch": 0.9404158401293071, "step": 6400}, {"loss": 0.9048, "grad_norm": 0.22150756418704987, "learning_rate": 0.0002, "epoch": 0.9418852398795092, "step": 6410}, {"loss": 0.8512, "grad_norm": 0.21308870613574982, "learning_rate": 0.0002, "epoch": 0.9433546396297112, "step": 6420}, {"loss": 0.7941, "grad_norm": 0.18100349605083466, "learning_rate": 0.0002, "epoch": 0.9448240393799133, "step": 6430}, {"loss": 0.8442, "grad_norm": 0.18279363214969635, "learning_rate": 0.0002, "epoch": 0.9462934391301153, "step": 6440}, {"loss": 0.8976, "grad_norm": 0.20209787786006927, "learning_rate": 0.0002, "epoch": 0.9477628388803174, "step": 6450}, {"loss": 0.8541, "grad_norm": 0.18696613609790802, "learning_rate": 0.0002, "epoch": 0.9492322386305194, "step": 6460}, {"loss": 0.8992, "grad_norm": 0.17197009921073914, "learning_rate": 0.0002, "epoch": 0.9507016383807215, "step": 6470}, {"loss": 0.8483, "grad_norm": 0.21374905109405518, "learning_rate": 0.0002, "epoch": 0.9521710381309235, "step": 6480}, {"loss": 0.8658, "grad_norm": 0.17458385229110718, "learning_rate": 0.0002, "epoch": 0.9536404378811255, "step": 6490}, {"loss": 0.8481, "grad_norm": 0.2173559069633484, "learning_rate": 0.0002, "epoch": 0.9551098376313276, "step": 6500}, {"loss": 0.8209, "grad_norm": 0.23596982657909393, "learning_rate": 0.0002, "epoch": 0.9565792373815296, "step": 6510}, {"loss": 0.8659, "grad_norm": 0.23647022247314453, "learning_rate": 0.0002, "epoch": 0.9580486371317317, "step": 6520}, {"loss": 0.8995, "grad_norm": 0.21122702956199646, "learning_rate": 0.0002, "epoch": 0.9595180368819337, "step": 6530}, {"loss": 0.8635, "grad_norm": 0.18416011333465576, "learning_rate": 0.0002, "epoch": 0.9609874366321358, "step": 6540}, {"loss": 0.8958, "grad_norm": 0.2422763556241989, "learning_rate": 0.0002, "epoch": 0.9624568363823378, "step": 6550}, {"loss": 0.8909, "grad_norm": 0.21425847709178925, "learning_rate": 0.0002, "epoch": 0.9639262361325398, "step": 6560}, {"loss": 0.8963, "grad_norm": 0.20464813709259033, "learning_rate": 0.0002, "epoch": 0.9653956358827419, "step": 6570}, {"loss": 0.8701, "grad_norm": 0.2020505964756012, "learning_rate": 0.0002, "epoch": 0.9668650356329439, "step": 6580}, {"loss": 0.8604, "grad_norm": 0.22078554332256317, "learning_rate": 0.0002, "epoch": 0.968334435383146, "step": 6590}, {"loss": 0.8945, "grad_norm": 0.2235570102930069, "learning_rate": 0.0002, "epoch": 0.969803835133348, "step": 6600}, {"loss": 0.8088, "grad_norm": 0.17768371105194092, "learning_rate": 0.0002, "epoch": 0.9712732348835501, "step": 6610}, {"loss": 0.8243, "grad_norm": 0.2664101719856262, "learning_rate": 0.0002, "epoch": 0.9727426346337521, "step": 6620}, {"loss": 0.8655, "grad_norm": 0.20040933787822723, "learning_rate": 0.0002, "epoch": 0.9742120343839542, "step": 6630}, {"loss": 0.826, "grad_norm": 0.19160234928131104, "learning_rate": 0.0002, "epoch": 0.9756814341341562, "step": 6640}, {"loss": 0.8725, "grad_norm": 0.2631106674671173, "learning_rate": 0.0002, "epoch": 0.9771508338843582, "step": 6650}, {"loss": 0.9271, "grad_norm": 0.23195059597492218, "learning_rate": 0.0002, "epoch": 0.9786202336345603, "step": 6660}, {"loss": 0.8629, "grad_norm": 0.211252361536026, "learning_rate": 0.0002, "epoch": 0.9800896333847623, "step": 6670}, {"loss": 0.8745, "grad_norm": 0.23654179275035858, "learning_rate": 0.0002, "epoch": 0.9815590331349644, "step": 6680}, {"loss": 0.9008, "grad_norm": 0.19568824768066406, "learning_rate": 0.0002, "epoch": 0.9830284328851664, "step": 6690}, {"loss": 0.8522, "grad_norm": 0.20598605275154114, "learning_rate": 0.0002, "epoch": 0.9844978326353685, "step": 6700}, {"loss": 0.8823, "grad_norm": 0.1991468369960785, "learning_rate": 0.0002, "epoch": 0.9859672323855705, "step": 6710}, {"loss": 0.8417, "grad_norm": 0.2218412607908249, "learning_rate": 0.0002, "epoch": 0.9874366321357725, "step": 6720}, {"loss": 0.8989, "grad_norm": 0.19989511370658875, "learning_rate": 0.0002, "epoch": 0.9889060318859746, "step": 6730}, {"loss": 0.8596, "grad_norm": 0.19747602939605713, "learning_rate": 0.0002, "epoch": 0.9903754316361766, "step": 6740}, {"loss": 0.9338, "grad_norm": 0.22099170088768005, "learning_rate": 0.0002, "epoch": 0.9918448313863787, "step": 6750}, {"loss": 0.8512, "grad_norm": 0.21557390689849854, "learning_rate": 0.0002, "epoch": 0.9933142311365807, "step": 6760}, {"loss": 0.8422, "grad_norm": 0.22880250215530396, "learning_rate": 0.0002, "epoch": 0.9947836308867828, "step": 6770}, {"loss": 0.8933, "grad_norm": 0.2067687213420868, "learning_rate": 0.0002, "epoch": 0.9962530306369848, "step": 6780}, {"loss": 0.9045, "grad_norm": 0.22168412804603577, "learning_rate": 0.0002, "epoch": 0.9977224303871869, "step": 6790}, {"loss": 0.8565, "grad_norm": 0.18633197247982025, "learning_rate": 0.0002, "epoch": 0.9991918301373889, "step": 6800}]}