[ { "loss": 0.6476, "learning_rate": 0.0002, "epoch": 0.02, "step": 1 }, { "loss": 0.6668, "learning_rate": 0.0002, "epoch": 0.04, "step": 2 }, { "loss": 0.6615, "learning_rate": 0.0002, "epoch": 0.05, "step": 3 }, { "loss": 0.5895, "learning_rate": 0.0002, "epoch": 0.07, "step": 4 }, { "loss": 0.6124, "learning_rate": 0.0002, "epoch": 0.09, "step": 5 }, { "loss": 0.5953, "learning_rate": 0.0002, "epoch": 0.11, "step": 6 }, { "loss": 0.5356, "learning_rate": 0.0002, "epoch": 0.12, "step": 7 }, { "loss": 0.4604, "learning_rate": 0.0002, "epoch": 0.14, "step": 8 }, { "loss": 0.4143, "learning_rate": 0.0002, "epoch": 0.16, "step": 9 }, { "loss": 0.4553, "learning_rate": 0.0002, "epoch": 0.18, "step": 10 }, { "loss": 0.3538, "learning_rate": 0.0002, "epoch": 0.2, "step": 11 }, { "loss": 0.3389, "learning_rate": 0.0002, "epoch": 0.21, "step": 12 }, { "loss": 0.2639, "learning_rate": 0.0002, "epoch": 0.23, "step": 13 }, { "loss": 0.2096, "learning_rate": 0.0002, "epoch": 0.25, "step": 14 }, { "loss": 0.2002, "learning_rate": 0.0002, "epoch": 0.27, "step": 15 }, { "loss": 0.2374, "learning_rate": 0.0002, "epoch": 0.29, "step": 16 }, { "loss": 0.2475, "learning_rate": 0.0002, "epoch": 0.3, "step": 17 }, { "loss": 0.1994, "learning_rate": 0.0002, "epoch": 0.32, "step": 18 }, { "loss": 0.1406, "learning_rate": 0.0002, "epoch": 0.34, "step": 19 }, { "loss": 0.055, "learning_rate": 0.0002, "epoch": 0.36, "step": 20 }, { "loss": 0.1168, "learning_rate": 0.0002, "epoch": 0.38, "step": 21 }, { "loss": 0.0916, "learning_rate": 0.0002, "epoch": 0.39, "step": 22 }, { "loss": 0.1596, "learning_rate": 0.0002, "epoch": 0.41, "step": 23 }, { "loss": 0.1585, "learning_rate": 0.0002, "epoch": 0.43, "step": 24 }, { "loss": 0.184, "learning_rate": 0.0002, "epoch": 0.45, "step": 25 }, { "eval_math_exam_questions_loss": 0.09979354590177536, "eval_math_exam_questions_score": -0.029867494478821754, "eval_math_exam_questions_brier_score": 0.029867494478821754, "eval_math_exam_questions_average_probability": 0.9305303692817688, "eval_math_exam_questions_accuracy": 0.95, "eval_math_exam_questions_probabilities": [ 0.8511927723884583, 0.8560279011726379, 0.9999574422836304, 0.9483713507652283, 0.2606664001941681, 0.9442870616912842, 0.9947615265846252, 0.9007341265678406, 0.9449180364608765, 0.41139501333236694, 0.9922882318496704, 0.9992316961288452, 0.9993873834609985, 0.9968752861022949, 0.9976814985275269, 0.9999994039535522, 0.9999996423721313, 0.9999994039535522, 0.9571589231491089, 0.995111882686615, 0.979494571685791, 0.574057400226593, 0.7285555601119995, 0.3839860260486603, 0.9998674392700195, 0.9982432126998901, 0.9999611377716064, 0.9999970197677612, 0.9999997615814209, 1.0, 0.9638392329216003, 0.968817949295044, 0.2049836367368698, 0.9999809265136719, 0.9998273253440857, 0.9999629259109497, 0.9595451951026917, 0.9992743134498596, 0.9850507974624634, 0.9250211715698242, 0.840378999710083, 0.8428780436515808, 0.9994524121284485, 0.9991476535797119, 0.9997357726097107, 0.9995108842849731, 0.9998007416725159, 0.9996703863143921, 0.9999984502792358, 0.9999967813491821, 0.999998927116394, 0.9999979734420776, 0.9999997615814209, 0.9999996423721313, 0.9957982897758484, 0.9765607714653015, 0.996933102607727, 0.8950393795967102, 0.9991758465766907, 0.9990474581718445, 0.7260539531707764, 0.8293086290359497, 0.381984144449234, 0.9996906518936157, 0.9999486207962036, 0.9999747276306152, 0.994877815246582, 0.991081714630127, 0.9968804121017456, 0.9998242259025574, 0.9999198913574219, 0.999997615814209, 0.9805970788002014, 0.9670814871788025, 0.8692526817321777, 0.9998607635498047, 0.9999896287918091, 0.999993085861206, 0.9961829781532288, 0.9215685129165649, 0.998610258102417, 0.9952474236488342, 0.7989624738693237, 0.9947852492332458, 0.9998492002487183, 0.9999179840087891, 0.9999663829803467, 0.9998860359191895, 0.9998941421508789, 0.9978526830673218, 0.8546462059020996, 0.8965560793876648, 0.6786884069442749, 0.9975112676620483, 0.9996652603149414, 0.9990561604499817, 0.9986518025398254, 0.9981924891471863, 0.998735249042511, 0.9286666512489319 ], "eval_math_exam_questions_runtime": 58.7461, "eval_math_exam_questions_samples_per_second": 1.702, "eval_math_exam_questions_steps_per_second": 0.068, "epoch": 0.45, "step": 25 }, { "loss": 0.0599, "learning_rate": 0.0002, "epoch": 0.46, "step": 26 }, { "loss": 0.0445, "learning_rate": 0.0002, "epoch": 0.48, "step": 27 }, { "loss": 0.0625, "learning_rate": 0.0002, "epoch": 0.5, "step": 28 }, { "loss": 0.0125, "learning_rate": 0.0002, "epoch": 0.52, "step": 29 }, { "loss": 0.2316, "learning_rate": 0.0002, "epoch": 0.54, "step": 30 }, { "loss": 0.0746, "learning_rate": 0.0002, "epoch": 0.55, "step": 31 }, { "loss": 0.1032, "learning_rate": 0.0002, "epoch": 0.57, "step": 32 }, { "loss": 0.1413, "learning_rate": 0.0002, "epoch": 0.59, "step": 33 }, { "loss": 0.1345, "learning_rate": 0.0002, "epoch": 0.61, "step": 34 }, { "loss": 0.0688, "learning_rate": 0.0002, "epoch": 0.62, "step": 35 }, { "loss": 0.0762, "learning_rate": 0.0002, "epoch": 0.64, "step": 36 }, { "loss": 0.0552, "learning_rate": 0.0002, "epoch": 0.66, "step": 37 }, { "loss": 0.0356, "learning_rate": 0.0002, "epoch": 0.68, "step": 38 }, { "loss": 0.0543, "learning_rate": 0.0002, "epoch": 0.7, "step": 39 }, { "loss": 0.0279, "learning_rate": 0.0002, "epoch": 0.71, "step": 40 }, { "loss": 0.0704, "learning_rate": 0.0002, "epoch": 0.73, "step": 41 }, { "loss": 0.039, "learning_rate": 0.0002, "epoch": 0.75, "step": 42 }, { "loss": 0.0114, "learning_rate": 0.0002, "epoch": 0.77, "step": 43 }, { "loss": 0.0043, "learning_rate": 0.0002, "epoch": 0.79, "step": 44 }, { "loss": 0.1562, "learning_rate": 0.0002, "epoch": 0.8, "step": 45 }, { "loss": 0.0495, "learning_rate": 0.0002, "epoch": 0.82, "step": 46 }, { "loss": 0.1683, "learning_rate": 0.0002, "epoch": 0.84, "step": 47 }, { "loss": 0.0118, "learning_rate": 0.0002, "epoch": 0.86, "step": 48 }, { "loss": 0.0708, "learning_rate": 0.0002, "epoch": 0.88, "step": 49 }, { "loss": 0.0445, "learning_rate": 0.0002, "epoch": 0.89, "step": 50 }, { "eval_math_exam_questions_loss": 0.11060654371976852, "eval_math_exam_questions_score": -0.04164460673928261, "eval_math_exam_questions_brier_score": 0.04164460673928261, "eval_math_exam_questions_average_probability": 0.9377254247665405, "eval_math_exam_questions_accuracy": 0.92, "eval_math_exam_questions_probabilities": [ 0.9878444075584412, 0.9649227261543274, 0.9999626874923706, 0.9997627139091492, 0.41481563448905945, 0.9997966885566711, 0.9999992847442627, 0.9999576807022095, 0.9999995231628418, 0.13421274721622467, 0.9997405409812927, 0.9999984502792358, 0.9999998807907104, 0.9999960660934448, 0.9999959468841553, 1.0, 1.0, 1.0, 0.9999942779541016, 0.9999998807907104, 0.9999129772186279, 0.4325127899646759, 0.19135379791259766, 0.2162252813577652, 1.0, 0.9999597072601318, 1.0, 0.9999998807907104, 1.0, 1.0, 0.9999549388885498, 0.9999803304672241, 0.9946361184120178, 0.9999997615814209, 0.9998887777328491, 0.9999957084655762, 0.9995691180229187, 1.0, 0.99983811378479, 0.9979630708694458, 0.9983990788459778, 0.9632781147956848, 0.9999992847442627, 0.9999983310699463, 0.9999996423721313, 0.9999991655349731, 0.999947190284729, 0.9998934268951416, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999369382858276, 0.9989650249481201, 0.9998283386230469, 0.9974669218063354, 1.0, 1.0, 0.7936062216758728, 0.9307928085327148, 0.27603790163993835, 0.9999816417694092, 0.9999940395355225, 0.9999995231628418, 0.9996922016143799, 0.988324761390686, 0.9975664615631104, 0.9999992847442627, 0.9999995231628418, 1.0, 0.9978724718093872, 0.3203738033771515, 0.3396102786064148, 0.9999998807907104, 1.0, 1.0, 0.9998663663864136, 0.988074004650116, 0.9999923706054688, 0.9999964237213135, 0.9248444437980652, 0.9999958276748657, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999954700469971, 0.9937870502471924, 0.9950788021087646, 0.9376111030578613, 1.0, 1.0, 1.0, 0.9999998807907104, 0.9999986886978149, 0.9999997615814209, 0.9989570379257202 ], "eval_math_exam_questions_runtime": 58.7331, "eval_math_exam_questions_samples_per_second": 1.703, "eval_math_exam_questions_steps_per_second": 0.068, "epoch": 0.89, "step": 50 }, { "loss": 0.1862, "learning_rate": 0.0002, "epoch": 0.91, "step": 51 }, { "loss": 0.1376, "learning_rate": 0.0002, "epoch": 0.93, "step": 52 }, { "loss": 0.1167, "learning_rate": 0.0002, "epoch": 0.95, "step": 53 }, { "loss": 0.1163, "learning_rate": 0.0002, "epoch": 0.96, "step": 54 }, { "loss": 0.0308, "learning_rate": 0.0002, "epoch": 0.98, "step": 55 }, { "loss": 0.0515, "learning_rate": 0.0002, "epoch": 1.0, "step": 56 }, { "loss": 0.0876, "learning_rate": 0.0002, "epoch": 1.02, "step": 57 }, { "loss": 0.0215, "learning_rate": 0.0002, "epoch": 1.04, "step": 58 }, { "loss": 0.0454, "learning_rate": 0.0002, "epoch": 1.05, "step": 59 }, { "loss": 0.0335, "learning_rate": 0.0002, "epoch": 1.07, "step": 60 }, { "loss": 0.0248, "learning_rate": 0.0002, "epoch": 1.09, "step": 61 }, { "loss": 0.0028, "learning_rate": 0.0002, "epoch": 1.11, "step": 62 }, { "loss": 0.0175, "learning_rate": 0.0002, "epoch": 1.12, "step": 63 }, { "loss": 0.0078, "learning_rate": 0.0002, "epoch": 1.14, "step": 64 }, { "loss": 0.0113, "learning_rate": 0.0002, "epoch": 1.16, "step": 65 }, { "loss": 0.0119, "learning_rate": 0.0002, "epoch": 1.18, "step": 66 }, { "loss": 0.0235, "learning_rate": 0.0002, "epoch": 1.2, "step": 67 }, { "loss": 0.0116, "learning_rate": 0.0002, "epoch": 1.21, "step": 68 }, { "loss": 0.0018, "learning_rate": 0.0002, "epoch": 1.23, "step": 69 }, { "loss": 0.0024, "learning_rate": 0.0002, "epoch": 1.25, "step": 70 }, { "loss": 0.003, "learning_rate": 0.0002, "epoch": 1.27, "step": 71 }, { "loss": 0.0044, "learning_rate": 0.0002, "epoch": 1.29, "step": 72 }, { "loss": 0.0151, "learning_rate": 0.0002, "epoch": 1.3, "step": 73 }, { "loss": 0.0019, "learning_rate": 0.0002, "epoch": 1.32, "step": 74 }, { "loss": 0.0003, "learning_rate": 0.0002, "epoch": 1.34, "step": 75 }, { "eval_math_exam_questions_loss": 0.05752657726407051, "eval_math_exam_questions_score": -0.019282517954707146, "eval_math_exam_questions_brier_score": 0.019282517954707146, "eval_math_exam_questions_average_probability": 0.9707407355308533, "eval_math_exam_questions_accuracy": 0.97, "eval_math_exam_questions_probabilities": [ 0.9951311349868774, 0.8724949359893799, 0.9999651908874512, 1.0, 0.8811224102973938, 1.0, 1.0, 1.0, 1.0, 0.103812575340271, 0.9999701976776123, 0.9999998807907104, 1.0, 0.9999998807907104, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999974966049194, 0.9944435954093933, 0.9979708790779114, 0.4370083212852478, 1.0, 0.9999988079071045, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999998807907104, 1.0, 0.14551357924938202, 1.0, 0.999981164932251, 0.9999678134918213, 0.9999961853027344, 0.9987561702728271, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999998807907104, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999978542327881, 1.0, 0.999945878982544, 1.0, 1.0, 0.8369179964065552, 0.9991326928138733, 0.8580266237258911, 1.0, 1.0, 1.0, 0.9999998807907104, 0.9998944997787476, 0.9997386336326599, 1.0, 1.0, 1.0, 1.0, 0.9999858140945435, 0.9989979863166809, 0.9989114999771118, 1.0, 1.0, 0.9999665021896362, 0.9914140701293945, 0.9999996423721313, 1.0, 0.9750034809112549, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999885559082031, 0.999998927116394, 0.9905345439910889, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9994863271713257 ], "eval_math_exam_questions_runtime": 58.6753, "eval_math_exam_questions_samples_per_second": 1.704, "eval_math_exam_questions_steps_per_second": 0.068, "epoch": 1.34, "step": 75 }, { "loss": 0.0139, "learning_rate": 0.0002, "epoch": 1.36, "step": 76 }, { "loss": 0.0121, "learning_rate": 0.0002, "epoch": 1.38, "step": 77 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 1.39, "step": 78 }, { "loss": 0.0117, "learning_rate": 0.0002, "epoch": 1.41, "step": 79 }, { "loss": 0.0046, "learning_rate": 0.0002, "epoch": 1.43, "step": 80 }, { "loss": 0.3396, "learning_rate": 0.0002, "epoch": 1.45, "step": 81 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 1.46, "step": 82 }, { "loss": 0.0014, "learning_rate": 0.0002, "epoch": 1.48, "step": 83 }, { "loss": 0.0034, "learning_rate": 0.0002, "epoch": 1.5, "step": 84 }, { "loss": 0.0026, "learning_rate": 0.0002, "epoch": 1.52, "step": 85 }, { "loss": 0.001, "learning_rate": 0.0002, "epoch": 1.54, "step": 86 }, { "loss": 0.0026, "learning_rate": 0.0002, "epoch": 1.55, "step": 87 }, { "loss": 0.006, "learning_rate": 0.0002, "epoch": 1.57, "step": 88 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 1.59, "step": 89 }, { "loss": 0.0024, "learning_rate": 0.0002, "epoch": 1.61, "step": 90 }, { "loss": 0.0031, "learning_rate": 0.0002, "epoch": 1.62, "step": 91 }, { "loss": 0.0005, "learning_rate": 0.0002, "epoch": 1.64, "step": 92 }, { "loss": 0.006, "learning_rate": 0.0002, "epoch": 1.66, "step": 93 }, { "loss": 0.0056, "learning_rate": 0.0002, "epoch": 1.68, "step": 94 }, { "loss": 0.0003, "learning_rate": 0.0002, "epoch": 1.7, "step": 95 }, { "loss": 0.007, "learning_rate": 0.0002, "epoch": 1.71, "step": 96 }, { "loss": 0.1818, "learning_rate": 0.0002, "epoch": 1.73, "step": 97 }, { "loss": 0.0089, "learning_rate": 0.0002, "epoch": 1.75, "step": 98 }, { "loss": 0.0022, "learning_rate": 0.0002, "epoch": 1.77, "step": 99 }, { "loss": 0.0025, "learning_rate": 0.0002, "epoch": 1.79, "step": 100 }, { "eval_math_exam_questions_loss": 0.214411199092865, "eval_math_exam_questions_score": -0.03251197189092636, "eval_math_exam_questions_brier_score": 0.03251197189092636, "eval_math_exam_questions_average_probability": 0.9538560509681702, "eval_math_exam_questions_accuracy": 0.96, "eval_math_exam_questions_probabilities": [ 0.9958956241607666, 0.8318325877189636, 0.9999992847442627, 1.0, 8.109304872050416e-06, 1.0, 1.0, 1.0, 1.0, 0.29970213770866394, 0.9999974966049194, 1.0, 1.0, 0.9999998807907104, 1.0, 1.0, 1.0, 1.0, 0.9999997615814209, 1.0, 0.999977707862854, 0.999778687953949, 0.9999997615814209, 0.30764925479888916, 1.0, 0.9999996423721313, 1.0, 0.9999992847442627, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.6386927366256714, 1.0, 0.9961808919906616, 0.9999927282333374, 0.9999998807907104, 0.8549012541770935, 0.9999998807907104, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999997615814209, 1.0, 1.0, 1.0, 0.9999995231628418, 1.0, 0.9937735199928284, 1.0, 1.0, 0.9984048008918762, 1.0, 0.9988483190536499, 0.9999972581863403, 1.0, 1.0, 1.0, 0.9999980926513672, 0.9998224377632141, 0.9999912977218628, 1.0, 1.0, 0.7528015971183777, 0.003164754481986165, 0.9934700727462769, 0.9953631162643433, 1.0, 1.0, 0.9982792139053345, 0.9575594067573547, 0.9999983310699463, 1.0, 0.7891086935997009, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999996423721313, 0.9999997615814209, 0.9999990463256836, 0.9808980822563171, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9995265007019043 ], "eval_math_exam_questions_runtime": 58.6781, "eval_math_exam_questions_samples_per_second": 1.704, "eval_math_exam_questions_steps_per_second": 0.068, "epoch": 1.79, "step": 100 }, { "train_runtime": 4114.901, "train_samples_per_second": 0.778, "train_steps_per_second": 0.024, "total_flos": 0.0, "train_loss": 0.11991490689004422, "epoch": 1.79, "step": 100 } ]]