LlamaSummary / trainer_log_history.jsonl
Sidharthkr's picture
Upload trainer_log_history.jsonl with huggingface_hub
eed1f1f
{"loss": 2.7936, "learning_rate": 2.9999999999999997e-05, "epoch": 0.04, "step": 10}
{"loss": 2.6511, "learning_rate": 5.9999999999999995e-05, "epoch": 0.07, "step": 20}
{"loss": 2.634, "learning_rate": 8.999999999999999e-05, "epoch": 0.11, "step": 30}
{"loss": 2.6239, "learning_rate": 0.00011999999999999999, "epoch": 0.14, "step": 40}
{"loss": 2.5104, "learning_rate": 0.00015, "epoch": 0.18, "step": 50}
{"loss": 2.3821, "learning_rate": 0.00017999999999999998, "epoch": 0.22, "step": 60}
{"loss": 2.3514, "learning_rate": 0.00020999999999999998, "epoch": 0.25, "step": 70}
{"loss": 2.2218, "learning_rate": 0.00023999999999999998, "epoch": 0.29, "step": 80}
{"loss": 2.2881, "learning_rate": 0.00027, "epoch": 0.32, "step": 90}
{"loss": 2.3525, "learning_rate": 0.0003, "epoch": 0.36, "step": 100}
{"loss": 2.3534, "learning_rate": 0.00029888059701492536, "epoch": 0.4, "step": 110}
{"loss": 2.375, "learning_rate": 0.00029776119402985074, "epoch": 0.43, "step": 120}
{"loss": 2.1723, "learning_rate": 0.0002966417910447761, "epoch": 0.47, "step": 130}
{"loss": 2.1229, "learning_rate": 0.00029552238805970145, "epoch": 0.5, "step": 140}
{"loss": 2.3203, "learning_rate": 0.00029440298507462683, "epoch": 0.54, "step": 150}
{"loss": 2.2084, "learning_rate": 0.0002932835820895522, "epoch": 0.58, "step": 160}
{"loss": 2.2841, "learning_rate": 0.0002921641791044776, "epoch": 0.61, "step": 170}
{"loss": 2.135, "learning_rate": 0.00029104477611940297, "epoch": 0.65, "step": 180}
{"loss": 2.2003, "learning_rate": 0.0002899253731343283, "epoch": 0.68, "step": 190}
{"loss": 2.2004, "learning_rate": 0.0002888059701492537, "epoch": 0.72, "step": 200}
{"loss": 2.0142, "learning_rate": 0.0002876865671641791, "epoch": 0.76, "step": 210}
{"loss": 2.1899, "learning_rate": 0.00028656716417910444, "epoch": 0.79, "step": 220}
{"loss": 2.1275, "learning_rate": 0.0002854477611940298, "epoch": 0.83, "step": 230}
{"loss": 2.2414, "learning_rate": 0.0002843283582089552, "epoch": 0.86, "step": 240}
{"loss": 2.162, "learning_rate": 0.0002832089552238806, "epoch": 0.9, "step": 250}
{"loss": 2.1962, "learning_rate": 0.00028208955223880597, "epoch": 0.94, "step": 260}
{"loss": 2.0908, "learning_rate": 0.0002809701492537313, "epoch": 0.97, "step": 270}
{"loss": 2.2293, "learning_rate": 0.0002798507462686567, "epoch": 1.01, "step": 280}
{"loss": 2.1165, "learning_rate": 0.00027873134328358206, "epoch": 1.04, "step": 290}
{"loss": 2.1872, "learning_rate": 0.00027761194029850744, "epoch": 1.08, "step": 300}
{"loss": 2.0367, "learning_rate": 0.0002764925373134328, "epoch": 1.12, "step": 310}
{"loss": 2.0395, "learning_rate": 0.0002753731343283582, "epoch": 1.15, "step": 320}
{"loss": 2.0058, "learning_rate": 0.0002742537313432836, "epoch": 1.19, "step": 330}
{"loss": 1.9998, "learning_rate": 0.0002731343283582089, "epoch": 1.22, "step": 340}
{"loss": 2.1682, "learning_rate": 0.0002720149253731343, "epoch": 1.26, "step": 350}
{"loss": 1.9747, "learning_rate": 0.0002708955223880597, "epoch": 1.29, "step": 360}
{"loss": 2.0787, "learning_rate": 0.00026977611940298506, "epoch": 1.33, "step": 370}
{"loss": 2.235, "learning_rate": 0.00026865671641791044, "epoch": 1.37, "step": 380}
{"loss": 1.9715, "learning_rate": 0.00026753731343283577, "epoch": 1.4, "step": 390}
{"loss": 1.9939, "learning_rate": 0.00026641791044776115, "epoch": 1.44, "step": 400}
{"loss": 2.0815, "learning_rate": 0.0002652985074626866, "epoch": 1.47, "step": 410}
{"loss": 2.0237, "learning_rate": 0.0002641791044776119, "epoch": 1.51, "step": 420}
{"loss": 2.3032, "learning_rate": 0.0002630597014925373, "epoch": 1.55, "step": 430}
{"loss": 2.1848, "learning_rate": 0.0002619402985074627, "epoch": 1.58, "step": 440}
{"loss": 2.1893, "learning_rate": 0.00026082089552238806, "epoch": 1.62, "step": 450}
{"loss": 1.9002, "learning_rate": 0.0002597014925373134, "epoch": 1.65, "step": 460}
{"loss": 2.0481, "learning_rate": 0.00025858208955223876, "epoch": 1.69, "step": 470}
{"loss": 2.1521, "learning_rate": 0.00025746268656716415, "epoch": 1.73, "step": 480}
{"loss": 1.9633, "learning_rate": 0.00025634328358208953, "epoch": 1.76, "step": 490}
{"loss": 2.1533, "learning_rate": 0.0002552238805970149, "epoch": 1.8, "step": 500}
{"loss": 1.8578, "learning_rate": 0.00025410447761194024, "epoch": 1.83, "step": 510}
{"loss": 2.0494, "learning_rate": 0.00025298507462686567, "epoch": 1.87, "step": 520}
{"loss": 2.1759, "learning_rate": 0.00025186567164179105, "epoch": 1.91, "step": 530}
{"loss": 2.0618, "learning_rate": 0.0002507462686567164, "epoch": 1.94, "step": 540}
{"loss": 2.0098, "learning_rate": 0.00024962686567164176, "epoch": 1.98, "step": 550}
{"loss": 1.9424, "learning_rate": 0.00024850746268656714, "epoch": 2.01, "step": 560}
{"loss": 1.8802, "learning_rate": 0.0002473880597014925, "epoch": 2.05, "step": 570}
{"loss": 2.0241, "learning_rate": 0.0002462686567164179, "epoch": 2.09, "step": 580}
{"loss": 2.0073, "learning_rate": 0.00024514925373134323, "epoch": 2.12, "step": 590}
{"loss": 2.0162, "learning_rate": 0.00024402985074626864, "epoch": 2.16, "step": 600}
{"loss": 1.8319, "learning_rate": 0.000242910447761194, "epoch": 2.19, "step": 610}
{"loss": 1.814, "learning_rate": 0.00024179104477611938, "epoch": 2.23, "step": 620}
{"loss": 1.5829, "learning_rate": 0.00024067164179104476, "epoch": 2.27, "step": 630}
{"loss": 1.7978, "learning_rate": 0.00023955223880597012, "epoch": 2.3, "step": 640}
{"loss": 1.8052, "learning_rate": 0.0002384328358208955, "epoch": 2.34, "step": 650}
{"loss": 2.0461, "learning_rate": 0.00023731343283582085, "epoch": 2.37, "step": 660}
{"loss": 1.8847, "learning_rate": 0.00023619402985074626, "epoch": 2.41, "step": 670}
{"loss": 1.8465, "learning_rate": 0.00023507462686567164, "epoch": 2.45, "step": 680}
{"loss": 1.7806, "learning_rate": 0.000233955223880597, "epoch": 2.48, "step": 690}
{"loss": 1.8604, "learning_rate": 0.00023283582089552238, "epoch": 2.52, "step": 700}
{"loss": 1.7639, "learning_rate": 0.00023171641791044773, "epoch": 2.55, "step": 710}
{"loss": 1.8926, "learning_rate": 0.0002305970149253731, "epoch": 2.59, "step": 720}
{"loss": 1.7832, "learning_rate": 0.0002294776119402985, "epoch": 2.63, "step": 730}
{"loss": 1.7619, "learning_rate": 0.00022835820895522385, "epoch": 2.66, "step": 740}
{"loss": 1.8423, "learning_rate": 0.00022723880597014923, "epoch": 2.7, "step": 750}
{"loss": 1.9202, "learning_rate": 0.00022611940298507459, "epoch": 2.73, "step": 760}
{"loss": 2.0189, "learning_rate": 0.000225, "epoch": 2.77, "step": 770}
{"loss": 2.0472, "learning_rate": 0.00022388059701492538, "epoch": 2.81, "step": 780}
{"loss": 2.0446, "learning_rate": 0.00022276119402985073, "epoch": 2.84, "step": 790}
{"loss": 1.8546, "learning_rate": 0.0002216417910447761, "epoch": 2.88, "step": 800}
{"loss": 1.896, "learning_rate": 0.00022052238805970147, "epoch": 2.91, "step": 810}
{"loss": 2.0741, "learning_rate": 0.00021940298507462685, "epoch": 2.95, "step": 820}
{"loss": 1.8384, "learning_rate": 0.00021828358208955223, "epoch": 2.99, "step": 830}
{"loss": 1.9125, "learning_rate": 0.00021716417910447758, "epoch": 3.02, "step": 840}
{"loss": 1.6362, "learning_rate": 0.00021604477611940296, "epoch": 3.06, "step": 850}
{"loss": 1.5579, "learning_rate": 0.00021492537313432832, "epoch": 3.09, "step": 860}
{"loss": 1.7687, "learning_rate": 0.0002138059701492537, "epoch": 3.13, "step": 870}
{"loss": 1.6143, "learning_rate": 0.0002126865671641791, "epoch": 3.17, "step": 880}
{"loss": 1.6969, "learning_rate": 0.00021156716417910446, "epoch": 3.2, "step": 890}
{"loss": 1.4892, "learning_rate": 0.00021044776119402985, "epoch": 3.24, "step": 900}
{"loss": 1.7392, "learning_rate": 0.0002093283582089552, "epoch": 3.27, "step": 910}
{"loss": 1.691, "learning_rate": 0.00020820895522388058, "epoch": 3.31, "step": 920}
{"loss": 1.6215, "learning_rate": 0.00020708955223880596, "epoch": 3.35, "step": 930}
{"loss": 1.7974, "learning_rate": 0.00020597014925373132, "epoch": 3.38, "step": 940}
{"loss": 1.747, "learning_rate": 0.0002048507462686567, "epoch": 3.42, "step": 950}
{"loss": 1.692, "learning_rate": 0.00020373134328358205, "epoch": 3.45, "step": 960}
{"loss": 1.5483, "learning_rate": 0.00020261194029850743, "epoch": 3.49, "step": 970}
{"loss": 1.6975, "learning_rate": 0.00020149253731343284, "epoch": 3.53, "step": 980}
{"loss": 1.4288, "learning_rate": 0.0002003731343283582, "epoch": 3.56, "step": 990}
{"loss": 1.8136, "learning_rate": 0.00019925373134328358, "epoch": 3.6, "step": 1000}
{"loss": 1.7636, "learning_rate": 0.00019813432835820893, "epoch": 3.63, "step": 1010}
{"loss": 1.7509, "learning_rate": 0.00019701492537313432, "epoch": 3.67, "step": 1020}
{"loss": 1.553, "learning_rate": 0.0001958955223880597, "epoch": 3.71, "step": 1030}
{"loss": 1.752, "learning_rate": 0.00019477611940298505, "epoch": 3.74, "step": 1040}
{"loss": 1.7697, "learning_rate": 0.00019365671641791043, "epoch": 3.78, "step": 1050}
{"loss": 1.6802, "learning_rate": 0.0001925373134328358, "epoch": 3.81, "step": 1060}
{"loss": 1.6143, "learning_rate": 0.00019141791044776117, "epoch": 3.85, "step": 1070}
{"loss": 1.8689, "learning_rate": 0.00019029850746268658, "epoch": 3.88, "step": 1080}
{"loss": 1.7677, "learning_rate": 0.00018917910447761193, "epoch": 3.92, "step": 1090}
{"loss": 1.7427, "learning_rate": 0.0001880597014925373, "epoch": 3.96, "step": 1100}
{"loss": 1.509, "learning_rate": 0.00018694029850746267, "epoch": 3.99, "step": 1110}
{"loss": 1.5502, "learning_rate": 0.00018582089552238805, "epoch": 4.03, "step": 1120}
{"loss": 1.4425, "learning_rate": 0.0001847014925373134, "epoch": 4.06, "step": 1130}
{"loss": 1.5528, "learning_rate": 0.00018358208955223879, "epoch": 4.1, "step": 1140}
{"loss": 1.445, "learning_rate": 0.00018246268656716417, "epoch": 4.14, "step": 1150}
{"loss": 1.5416, "learning_rate": 0.00018134328358208952, "epoch": 4.17, "step": 1160}
{"loss": 1.553, "learning_rate": 0.0001802238805970149, "epoch": 4.21, "step": 1170}
{"loss": 1.4628, "learning_rate": 0.00017910447761194026, "epoch": 4.24, "step": 1180}
{"loss": 1.5914, "learning_rate": 0.00017798507462686567, "epoch": 4.28, "step": 1190}
{"loss": 1.3937, "learning_rate": 0.00017686567164179105, "epoch": 4.32, "step": 1200}
{"loss": 1.4096, "learning_rate": 0.0001757462686567164, "epoch": 4.35, "step": 1210}
{"loss": 1.4138, "learning_rate": 0.00017462686567164178, "epoch": 4.39, "step": 1220}
{"loss": 1.5178, "learning_rate": 0.00017350746268656714, "epoch": 4.42, "step": 1230}
{"loss": 1.5928, "learning_rate": 0.00017238805970149252, "epoch": 4.46, "step": 1240}
{"loss": 1.4397, "learning_rate": 0.0001712686567164179, "epoch": 4.5, "step": 1250}
{"loss": 1.3036, "learning_rate": 0.00017014925373134325, "epoch": 4.53, "step": 1260}
{"loss": 1.4615, "learning_rate": 0.00016902985074626864, "epoch": 4.57, "step": 1270}
{"loss": 1.6205, "learning_rate": 0.000167910447761194, "epoch": 4.6, "step": 1280}
{"loss": 1.4631, "learning_rate": 0.0001667910447761194, "epoch": 4.64, "step": 1290}
{"loss": 1.5588, "learning_rate": 0.00016567164179104478, "epoch": 4.68, "step": 1300}
{"loss": 1.5026, "learning_rate": 0.00016455223880597014, "epoch": 4.71, "step": 1310}
{"loss": 1.4519, "learning_rate": 0.00016343283582089552, "epoch": 4.75, "step": 1320}
{"loss": 1.6337, "learning_rate": 0.00016231343283582087, "epoch": 4.78, "step": 1330}
{"loss": 1.482, "learning_rate": 0.00016119402985074625, "epoch": 4.82, "step": 1340}
{"loss": 1.3933, "learning_rate": 0.00016007462686567163, "epoch": 4.86, "step": 1350}
{"loss": 1.5357, "learning_rate": 0.000158955223880597, "epoch": 4.89, "step": 1360}
{"loss": 1.3478, "learning_rate": 0.00015783582089552237, "epoch": 4.93, "step": 1370}
{"loss": 1.5469, "learning_rate": 0.00015671641791044772, "epoch": 4.96, "step": 1380}
{"loss": 1.4212, "learning_rate": 0.00015559701492537313, "epoch": 5.0, "step": 1390}
{"loss": 1.3422, "learning_rate": 0.00015447761194029851, "epoch": 5.04, "step": 1400}
{"loss": 1.4765, "learning_rate": 0.00015335820895522387, "epoch": 5.07, "step": 1410}
{"loss": 1.2777, "learning_rate": 0.00015223880597014925, "epoch": 5.11, "step": 1420}
{"loss": 1.3657, "learning_rate": 0.0001511194029850746, "epoch": 5.14, "step": 1430}
{"loss": 1.1507, "learning_rate": 0.00015, "epoch": 5.18, "step": 1440}
{"loss": 1.489, "learning_rate": 0.00014888059701492537, "epoch": 5.22, "step": 1450}
{"loss": 1.4137, "learning_rate": 0.00014776119402985072, "epoch": 5.25, "step": 1460}
{"loss": 1.2471, "learning_rate": 0.0001466417910447761, "epoch": 5.29, "step": 1470}
{"loss": 1.3481, "learning_rate": 0.00014552238805970149, "epoch": 5.32, "step": 1480}
{"loss": 1.4573, "learning_rate": 0.00014440298507462684, "epoch": 5.36, "step": 1490}
{"loss": 1.4607, "learning_rate": 0.00014328358208955222, "epoch": 5.4, "step": 1500}
{"loss": 1.2774, "learning_rate": 0.0001421641791044776, "epoch": 5.43, "step": 1510}
{"loss": 1.1598, "learning_rate": 0.00014104477611940298, "epoch": 5.47, "step": 1520}
{"loss": 1.3139, "learning_rate": 0.00013992537313432834, "epoch": 5.5, "step": 1530}
{"loss": 1.2327, "learning_rate": 0.00013880597014925372, "epoch": 5.54, "step": 1540}
{"loss": 1.2279, "learning_rate": 0.0001376865671641791, "epoch": 5.58, "step": 1550}
{"loss": 1.3786, "learning_rate": 0.00013656716417910446, "epoch": 5.61, "step": 1560}
{"loss": 1.3426, "learning_rate": 0.00013544776119402984, "epoch": 5.65, "step": 1570}
{"loss": 1.4421, "learning_rate": 0.00013432835820895522, "epoch": 5.68, "step": 1580}
{"loss": 1.3727, "learning_rate": 0.00013320895522388057, "epoch": 5.72, "step": 1590}
{"loss": 1.1887, "learning_rate": 0.00013208955223880596, "epoch": 5.76, "step": 1600}
{"loss": 1.1396, "learning_rate": 0.00013097014925373134, "epoch": 5.79, "step": 1610}
{"loss": 1.2622, "learning_rate": 0.0001298507462686567, "epoch": 5.83, "step": 1620}
{"loss": 1.3236, "learning_rate": 0.00012873134328358207, "epoch": 5.86, "step": 1630}
{"loss": 1.2268, "learning_rate": 0.00012761194029850745, "epoch": 5.9, "step": 1640}
{"loss": 1.1743, "learning_rate": 0.00012649253731343284, "epoch": 5.94, "step": 1650}
{"loss": 1.2099, "learning_rate": 0.0001253731343283582, "epoch": 5.97, "step": 1660}
{"loss": 1.3259, "learning_rate": 0.00012425373134328357, "epoch": 6.01, "step": 1670}
{"loss": 1.0077, "learning_rate": 0.00012313432835820895, "epoch": 6.04, "step": 1680}
{"loss": 0.9612, "learning_rate": 0.00012201492537313432, "epoch": 6.08, "step": 1690}
{"loss": 1.2474, "learning_rate": 0.00012089552238805969, "epoch": 6.12, "step": 1700}
{"loss": 1.0267, "learning_rate": 0.00011977611940298506, "epoch": 6.15, "step": 1710}
{"loss": 1.0815, "learning_rate": 0.00011865671641791043, "epoch": 6.19, "step": 1720}
{"loss": 1.1129, "learning_rate": 0.00011753731343283582, "epoch": 6.22, "step": 1730}
{"loss": 1.0391, "learning_rate": 0.00011641791044776119, "epoch": 6.26, "step": 1740}
{"loss": 1.1293, "learning_rate": 0.00011529850746268656, "epoch": 6.29, "step": 1750}
{"loss": 1.2439, "learning_rate": 0.00011417910447761192, "epoch": 6.33, "step": 1760}
{"loss": 1.0303, "learning_rate": 0.00011305970149253729, "epoch": 6.37, "step": 1770}
{"loss": 1.18, "learning_rate": 0.00011194029850746269, "epoch": 6.4, "step": 1780}
{"loss": 1.1128, "learning_rate": 0.00011082089552238806, "epoch": 6.44, "step": 1790}
{"loss": 1.3343, "learning_rate": 0.00010970149253731342, "epoch": 6.47, "step": 1800}
{"loss": 1.0738, "learning_rate": 0.00010858208955223879, "epoch": 6.51, "step": 1810}
{"loss": 1.2427, "learning_rate": 0.00010746268656716416, "epoch": 6.55, "step": 1820}
{"loss": 1.2309, "learning_rate": 0.00010634328358208955, "epoch": 6.58, "step": 1830}
{"loss": 1.1866, "learning_rate": 0.00010522388059701492, "epoch": 6.62, "step": 1840}
{"loss": 1.1793, "learning_rate": 0.00010410447761194029, "epoch": 6.65, "step": 1850}
{"loss": 0.9522, "learning_rate": 0.00010298507462686566, "epoch": 6.69, "step": 1860}
{"loss": 1.2496, "learning_rate": 0.00010186567164179103, "epoch": 6.73, "step": 1870}
{"loss": 1.1967, "learning_rate": 0.00010074626865671642, "epoch": 6.76, "step": 1880}
{"loss": 1.1936, "learning_rate": 9.962686567164179e-05, "epoch": 6.8, "step": 1890}
{"loss": 1.264, "learning_rate": 9.850746268656716e-05, "epoch": 6.83, "step": 1900}
{"loss": 1.2078, "learning_rate": 9.738805970149253e-05, "epoch": 6.87, "step": 1910}
{"loss": 1.2529, "learning_rate": 9.62686567164179e-05, "epoch": 6.91, "step": 1920}
{"loss": 1.2267, "learning_rate": 9.514925373134329e-05, "epoch": 6.94, "step": 1930}
{"loss": 1.0987, "learning_rate": 9.402985074626866e-05, "epoch": 6.98, "step": 1940}
{"loss": 1.0249, "learning_rate": 9.291044776119402e-05, "epoch": 7.01, "step": 1950}
{"loss": 0.8457, "learning_rate": 9.179104477611939e-05, "epoch": 7.05, "step": 1960}
{"loss": 0.8884, "learning_rate": 9.067164179104476e-05, "epoch": 7.09, "step": 1970}
{"loss": 1.0515, "learning_rate": 8.955223880597013e-05, "epoch": 7.12, "step": 1980}
{"loss": 1.1131, "learning_rate": 8.843283582089552e-05, "epoch": 7.16, "step": 1990}
{"loss": 0.9864, "learning_rate": 8.731343283582089e-05, "epoch": 7.19, "step": 2000}
{"loss": 0.9131, "learning_rate": 8.619402985074626e-05, "epoch": 7.23, "step": 2010}
{"loss": 0.831, "learning_rate": 8.507462686567163e-05, "epoch": 7.27, "step": 2020}
{"loss": 1.0958, "learning_rate": 8.3955223880597e-05, "epoch": 7.3, "step": 2030}
{"loss": 1.1438, "learning_rate": 8.283582089552239e-05, "epoch": 7.34, "step": 2040}
{"loss": 1.1594, "learning_rate": 8.171641791044776e-05, "epoch": 7.37, "step": 2050}
{"loss": 1.0476, "learning_rate": 8.059701492537313e-05, "epoch": 7.41, "step": 2060}
{"loss": 1.0496, "learning_rate": 7.94776119402985e-05, "epoch": 7.45, "step": 2070}
{"loss": 0.8074, "learning_rate": 7.835820895522386e-05, "epoch": 7.48, "step": 2080}
{"loss": 0.8995, "learning_rate": 7.723880597014926e-05, "epoch": 7.52, "step": 2090}
{"loss": 1.0888, "learning_rate": 7.611940298507463e-05, "epoch": 7.55, "step": 2100}
{"loss": 1.2066, "learning_rate": 7.5e-05, "epoch": 7.59, "step": 2110}
{"loss": 0.9468, "learning_rate": 7.388059701492536e-05, "epoch": 7.63, "step": 2120}
{"loss": 0.9874, "learning_rate": 7.276119402985074e-05, "epoch": 7.66, "step": 2130}
{"loss": 0.8849, "learning_rate": 7.164179104477611e-05, "epoch": 7.7, "step": 2140}
{"loss": 1.0517, "learning_rate": 7.052238805970149e-05, "epoch": 7.73, "step": 2150}
{"loss": 0.9174, "learning_rate": 6.940298507462686e-05, "epoch": 7.77, "step": 2160}
{"loss": 0.9831, "learning_rate": 6.828358208955223e-05, "epoch": 7.81, "step": 2170}
{"loss": 1.1041, "learning_rate": 6.716417910447761e-05, "epoch": 7.84, "step": 2180}
{"loss": 1.0281, "learning_rate": 6.604477611940298e-05, "epoch": 7.88, "step": 2190}
{"loss": 1.0085, "learning_rate": 6.492537313432835e-05, "epoch": 7.91, "step": 2200}
{"loss": 1.1037, "learning_rate": 6.380597014925373e-05, "epoch": 7.95, "step": 2210}
{"loss": 1.2633, "learning_rate": 6.26865671641791e-05, "epoch": 7.99, "step": 2220}
{"loss": 1.0117, "learning_rate": 6.156716417910448e-05, "epoch": 8.02, "step": 2230}
{"loss": 0.9484, "learning_rate": 6.0447761194029845e-05, "epoch": 8.06, "step": 2240}
{"loss": 0.8574, "learning_rate": 5.932835820895521e-05, "epoch": 8.09, "step": 2250}
{"loss": 1.0723, "learning_rate": 5.8208955223880594e-05, "epoch": 8.13, "step": 2260}
{"loss": 0.7553, "learning_rate": 5.708955223880596e-05, "epoch": 8.17, "step": 2270}
{"loss": 0.9336, "learning_rate": 5.5970149253731344e-05, "epoch": 8.2, "step": 2280}
{"loss": 0.8903, "learning_rate": 5.485074626865671e-05, "epoch": 8.24, "step": 2290}
{"loss": 0.7616, "learning_rate": 5.373134328358208e-05, "epoch": 8.27, "step": 2300}
{"loss": 0.9979, "learning_rate": 5.261194029850746e-05, "epoch": 8.31, "step": 2310}
{"loss": 1.1038, "learning_rate": 5.149253731343283e-05, "epoch": 8.35, "step": 2320}
{"loss": 0.8682, "learning_rate": 5.037313432835821e-05, "epoch": 8.38, "step": 2330}
{"loss": 0.9784, "learning_rate": 4.925373134328358e-05, "epoch": 8.42, "step": 2340}
{"loss": 0.8704, "learning_rate": 4.813432835820895e-05, "epoch": 8.45, "step": 2350}
{"loss": 0.9511, "learning_rate": 4.701492537313433e-05, "epoch": 8.49, "step": 2360}
{"loss": 1.0958, "learning_rate": 4.5895522388059696e-05, "epoch": 8.53, "step": 2370}
{"loss": 0.7955, "learning_rate": 4.4776119402985064e-05, "epoch": 8.56, "step": 2380}
{"loss": 0.9566, "learning_rate": 4.3656716417910446e-05, "epoch": 8.6, "step": 2390}
{"loss": 0.7675, "learning_rate": 4.2537313432835814e-05, "epoch": 8.63, "step": 2400}
{"loss": 0.9047, "learning_rate": 4.1417910447761195e-05, "epoch": 8.67, "step": 2410}
{"loss": 0.8634, "learning_rate": 4.029850746268656e-05, "epoch": 8.71, "step": 2420}
{"loss": 0.7287, "learning_rate": 3.917910447761193e-05, "epoch": 8.74, "step": 2430}
{"loss": 0.9552, "learning_rate": 3.805970149253731e-05, "epoch": 8.78, "step": 2440}
{"loss": 0.8651, "learning_rate": 3.694029850746268e-05, "epoch": 8.81, "step": 2450}
{"loss": 0.9632, "learning_rate": 3.5820895522388055e-05, "epoch": 8.85, "step": 2460}
{"loss": 0.9298, "learning_rate": 3.470149253731343e-05, "epoch": 8.88, "step": 2470}
{"loss": 0.7165, "learning_rate": 3.3582089552238805e-05, "epoch": 8.92, "step": 2480}
{"loss": 0.9407, "learning_rate": 3.246268656716417e-05, "epoch": 8.96, "step": 2490}
{"loss": 0.8694, "learning_rate": 3.134328358208955e-05, "epoch": 8.99, "step": 2500}
{"loss": 0.8516, "learning_rate": 3.0223880597014922e-05, "epoch": 9.03, "step": 2510}
{"loss": 0.9565, "learning_rate": 2.9104477611940297e-05, "epoch": 9.06, "step": 2520}
{"loss": 0.8364, "learning_rate": 2.7985074626865672e-05, "epoch": 9.1, "step": 2530}
{"loss": 0.8792, "learning_rate": 2.686567164179104e-05, "epoch": 9.14, "step": 2540}
{"loss": 0.7395, "learning_rate": 2.5746268656716415e-05, "epoch": 9.17, "step": 2550}
{"loss": 0.7, "learning_rate": 2.462686567164179e-05, "epoch": 9.21, "step": 2560}
{"loss": 0.8797, "learning_rate": 2.3507462686567164e-05, "epoch": 9.24, "step": 2570}
{"loss": 0.8762, "learning_rate": 2.2388059701492532e-05, "epoch": 9.28, "step": 2580}
{"loss": 0.8505, "learning_rate": 2.1268656716417907e-05, "epoch": 9.32, "step": 2590}
{"loss": 0.7759, "learning_rate": 2.014925373134328e-05, "epoch": 9.35, "step": 2600}
{"loss": 0.7994, "learning_rate": 1.9029850746268656e-05, "epoch": 9.39, "step": 2610}
{"loss": 0.9462, "learning_rate": 1.7910447761194028e-05, "epoch": 9.42, "step": 2620}
{"loss": 0.9192, "learning_rate": 1.6791044776119402e-05, "epoch": 9.46, "step": 2630}
{"loss": 0.8138, "learning_rate": 1.5671641791044774e-05, "epoch": 9.5, "step": 2640}
{"loss": 0.7991, "learning_rate": 1.4552238805970149e-05, "epoch": 9.53, "step": 2650}
{"loss": 0.8714, "learning_rate": 1.343283582089552e-05, "epoch": 9.57, "step": 2660}
{"loss": 0.8073, "learning_rate": 1.2313432835820895e-05, "epoch": 9.6, "step": 2670}
{"loss": 0.7959, "learning_rate": 1.1194029850746266e-05, "epoch": 9.64, "step": 2680}
{"loss": 0.7809, "learning_rate": 1.007462686567164e-05, "epoch": 9.68, "step": 2690}
{"loss": 0.5922, "learning_rate": 8.955223880597014e-06, "epoch": 9.71, "step": 2700}
{"loss": 0.8417, "learning_rate": 7.835820895522387e-06, "epoch": 9.75, "step": 2710}
{"loss": 0.8592, "learning_rate": 6.71641791044776e-06, "epoch": 9.78, "step": 2720}
{"loss": 0.9484, "learning_rate": 5.597014925373133e-06, "epoch": 9.82, "step": 2730}
{"loss": 0.824, "learning_rate": 4.477611940298507e-06, "epoch": 9.86, "step": 2740}
{"loss": 0.8203, "learning_rate": 3.35820895522388e-06, "epoch": 9.89, "step": 2750}
{"loss": 0.7591, "learning_rate": 2.2388059701492535e-06, "epoch": 9.93, "step": 2760}
{"loss": 0.7422, "learning_rate": 1.1194029850746267e-06, "epoch": 9.96, "step": 2770}
{"loss": 0.9275, "learning_rate": 0.0, "epoch": 10.0, "step": 2780}
{"train_runtime": 65996.59, "train_samples_per_second": 0.042, "train_steps_per_second": 0.042, "total_flos": 6.64507164033024e+16, "train_loss": 1.4632105468846053, "epoch": 10.0, "step": 2780}