[ { "loss": 0.8734, "learning_rate": 0.0002, "epoch": 0.03, "step": 1 }, { "loss": 0.7825, "learning_rate": 0.0002, "epoch": 0.05, "step": 2 }, { "loss": 0.6849, "learning_rate": 0.0002, "epoch": 0.08, "step": 3 }, { "loss": 0.6394, "learning_rate": 0.0002, "epoch": 0.11, "step": 4 }, { "loss": 0.5604, "learning_rate": 0.0002, "epoch": 0.13, "step": 5 }, { "loss": 0.484, "learning_rate": 0.0002, "epoch": 0.16, "step": 6 }, { "loss": 0.4106, "learning_rate": 0.0002, "epoch": 0.18, "step": 7 }, { "loss": 0.315, "learning_rate": 0.0002, "epoch": 0.21, "step": 8 }, { "loss": 0.303, "learning_rate": 0.0002, "epoch": 0.24, "step": 9 }, { "loss": 0.2428, "learning_rate": 0.0002, "epoch": 0.26, "step": 10 }, { "loss": 0.1576, "learning_rate": 0.0002, "epoch": 0.29, "step": 11 }, { "loss": 0.1088, "learning_rate": 0.0002, "epoch": 0.32, "step": 12 }, { "loss": 0.0539, "learning_rate": 0.0002, "epoch": 0.34, "step": 13 }, { "loss": 0.0554, "learning_rate": 0.0002, "epoch": 0.37, "step": 14 }, { "loss": 0.0339, "learning_rate": 0.0002, "epoch": 0.39, "step": 15 }, { "loss": 0.0978, "learning_rate": 0.0002, "epoch": 0.42, "step": 16 }, { "loss": 0.0074, "learning_rate": 0.0002, "epoch": 0.45, "step": 17 }, { "loss": 0.0462, "learning_rate": 0.0002, "epoch": 0.47, "step": 18 }, { "loss": 0.0314, "learning_rate": 0.0002, "epoch": 0.5, "step": 19 }, { "loss": 0.0483, "learning_rate": 0.0002, "epoch": 0.53, "step": 20 }, { "loss": 0.0277, "learning_rate": 0.0002, "epoch": 0.55, "step": 21 }, { "loss": 0.0407, "learning_rate": 0.0002, "epoch": 0.58, "step": 22 }, { "loss": 0.055, "learning_rate": 0.0002, "epoch": 0.61, "step": 23 }, { "loss": 0.0459, "learning_rate": 0.0002, "epoch": 0.63, "step": 24 }, { "loss": 0.0388, "learning_rate": 0.0002, "epoch": 0.66, "step": 25 }, { "eval_creative_writing_loss": 0.10124114900827408, "eval_creative_writing_score": -0.029021821916103363, "eval_creative_writing_brier_score": 0.029021821916103363, "eval_creative_writing_average_probability": 0.949838399887085, "eval_creative_writing_accuracy": 0.96, "eval_creative_writing_probabilities": [ 0.968286395072937, 1.0, 0.9910894632339478, 1.0, 0.7539140582084656, 1.0, 0.9999864101409912, 1.0, 0.10593231767416, 0.9996770620346069, 0.35035908222198486, 1.0, 0.9999998807907104, 1.0, 0.995671272277832, 0.9999988079071045, 0.9999849796295166, 0.9999967813491821, 0.9999188184738159, 1.0, 0.9981185793876648, 0.9999852180480957, 0.999852180480957, 0.9999985694885254, 0.7055116891860962, 0.9584224820137024, 0.9999998807907104, 1.0, 0.0526885986328125, 0.9999996423721313, 0.9999980926513672, 1.0, 0.9999997615814209, 1.0, 0.9999997615814209, 1.0, 0.996961772441864, 1.0, 0.9960450530052185, 0.9987917542457581, 0.9999934434890747, 1.0, 0.9999978542327881, 1.0, 0.9987438321113586, 0.9999847412109375, 0.9999997615814209, 1.0, 0.999990701675415, 0.9999983310699463, 0.9999998807907104, 1.0, 0.968285322189331, 1.0, 0.988308310508728, 0.9999992847442627, 0.9514806270599365, 0.9999994039535522, 0.9962933659553528, 1.0, 0.8887924551963806, 1.0, 1.0, 1.0, 0.9997608065605164, 1.0, 0.4080069065093994, 0.6264576315879822, 0.8959720134735107, 1.0, 0.7741034626960754, 1.0, 1.0, 1.0, 1.0, 1.0, 0.8713729381561279, 1.0, 0.9989833235740662, 0.9999933242797852, 1.0, 1.0, 0.9996505975723267, 1.0, 0.9916742444038391, 0.9989118576049805, 0.9800452589988708, 0.9999994039535522, 0.9998782873153687, 0.9999986886978149, 1.0, 1.0, 1.0, 1.0, 0.9999996423721313, 0.9999991655349731, 0.7797707915306091, 0.9964134097099304, 0.9997926354408264, 1.0 ], "eval_creative_writing_runtime": 31.3856, "eval_creative_writing_samples_per_second": 3.186, "eval_creative_writing_steps_per_second": 0.127, "epoch": 0.66, "step": 25 }, { "eval_biology_with_literary_style_loss": 0.9078262448310852, "eval_biology_with_literary_style_score": -0.22649109363555908, "eval_biology_with_literary_style_brier_score": 0.22649109363555908, "eval_biology_with_literary_style_average_probability": 0.7168461680412292, "eval_biology_with_literary_style_accuracy": 0.72, "eval_biology_with_literary_style_probabilities": [ 0.890451967716217, 0.9884986281394958, 0.4627087116241455, 0.26461559534072876, 0.9440751671791077, 0.9983709454536438, 0.14105352759361267, 0.5504218935966492, 0.0717836245894432, 0.9877713918685913, 0.9968053102493286, 0.8904775977134705, 0.2346506416797638, 0.2469814568758011, 0.6628801822662354, 0.9975651502609253, 0.16669368743896484, 0.5848821401596069, 0.9999884366989136, 0.999921441078186, 0.9999263286590576, 0.9972250461578369, 0.9998313188552856, 0.6406869888305664, 0.9976915121078491, 0.9530547857284546, 0.995725154876709, 0.10443083941936493, 0.023461028933525085, 0.0064667826518416405, 0.9999899864196777, 0.997308611869812, 0.9999022483825684, 0.9999972581863403, 0.9999064207077026, 0.9999868869781494, 0.5541044473648071, 0.9237357378005981, 0.9736377596855164, 0.00014247097715269774, 0.0008552187937311828, 0.0020070422906428576, 0.009259264916181564, 0.09797927737236023, 0.988251268863678, 0.9928971529006958, 0.003707324853166938, 0.9992867112159729, 0.9720431566238403, 0.9831154942512512, 0.9243084192276001, 0.056156225502491, 0.6303812265396118, 0.9999059438705444, 0.900888204574585, 0.9978699684143066, 0.91905677318573, 0.22306089103221893, 0.48004305362701416, 0.02644597738981247, 0.9815545678138733, 0.9921851754188538, 0.9861576557159424, 0.9997566342353821, 0.4922117292881012, 0.9966878294944763, 0.9991987347602844, 0.9990311861038208, 0.9266239404678345, 0.9989709854125977, 0.9725721478462219, 0.9685802459716797, 0.013590201735496521, 0.7605278491973877, 0.9985472559928894, 0.998909592628479, 0.9852558374404907, 0.9983707070350647, 0.12097364664077759, 0.9766552448272705, 0.02587122656404972, 0.9760261178016663, 0.9663914442062378, 0.9991564750671387, 0.9999828338623047, 0.99964439868927, 0.9962877035140991, 0.9912015199661255, 0.9959142804145813, 0.004339241422712803, 0.9495350122451782, 0.9996113181114197, 0.9997592568397522, 0.9140135645866394, 0.9997426867485046, 0.9957128763198853, 0.2625727951526642, 0.3218887746334076, 0.010261102579534054, 0.655005931854248 ], "eval_biology_with_literary_style_runtime": 34.5421, "eval_biology_with_literary_style_samples_per_second": 2.895, "eval_biology_with_literary_style_steps_per_second": 0.116, "epoch": 0.66, "step": 25 }, { "loss": 0.211, "learning_rate": 0.0002, "epoch": 0.68, "step": 26 }, { "loss": 0.128, "learning_rate": 0.0002, "epoch": 0.71, "step": 27 }, { "loss": 0.0735, "learning_rate": 0.0002, "epoch": 0.74, "step": 28 }, { "loss": 0.0765, "learning_rate": 0.0002, "epoch": 0.76, "step": 29 }, { "loss": 0.0729, "learning_rate": 0.0002, "epoch": 0.79, "step": 30 }, { "loss": 0.0643, "learning_rate": 0.0002, "epoch": 0.82, "step": 31 }, { "loss": 0.052, "learning_rate": 0.0002, "epoch": 0.84, "step": 32 }, { "loss": 0.0159, "learning_rate": 0.0002, "epoch": 0.87, "step": 33 }, { "loss": 0.0546, "learning_rate": 0.0002, "epoch": 0.89, "step": 34 }, { "loss": 0.0484, "learning_rate": 0.0002, "epoch": 0.92, "step": 35 }, { "loss": 0.1863, "learning_rate": 0.0002, "epoch": 0.95, "step": 36 }, { "loss": 0.1555, "learning_rate": 0.0002, "epoch": 0.97, "step": 37 }, { "loss": 0.0444, "learning_rate": 0.0002, "epoch": 1.0, "step": 38 }, { "loss": 0.0087, "learning_rate": 0.0002, "epoch": 1.03, "step": 39 }, { "loss": 0.0122, "learning_rate": 0.0002, "epoch": 1.05, "step": 40 }, { "loss": 0.0664, "learning_rate": 0.0002, "epoch": 1.08, "step": 41 }, { "loss": 0.0011, "learning_rate": 0.0002, "epoch": 1.11, "step": 42 }, { "loss": 0.0363, "learning_rate": 0.0002, "epoch": 1.13, "step": 43 }, { "loss": 0.0011, "learning_rate": 0.0002, "epoch": 1.16, "step": 44 }, { "loss": 0.0145, "learning_rate": 0.0002, "epoch": 1.18, "step": 45 }, { "loss": 0.0055, "learning_rate": 0.0002, "epoch": 1.21, "step": 46 }, { "loss": 0.0542, "learning_rate": 0.0002, "epoch": 1.24, "step": 47 }, { "loss": 0.0092, "learning_rate": 0.0002, "epoch": 1.26, "step": 48 }, { "loss": 0.0047, "learning_rate": 0.0002, "epoch": 1.29, "step": 49 }, { "loss": 0.0913, "learning_rate": 0.0002, "epoch": 1.32, "step": 50 }, { "eval_creative_writing_loss": 0.0620737262070179, "eval_creative_writing_score": -0.018390759825706482, "eval_creative_writing_brier_score": 0.018390759825706482, "eval_creative_writing_average_probability": 0.9659796953201294, "eval_creative_writing_accuracy": 0.97, "eval_creative_writing_probabilities": [ 0.9853099584579468, 1.0, 0.9972212314605713, 1.0, 0.9858209490776062, 1.0, 0.9999940395355225, 1.0, 0.26181724667549133, 1.0, 0.4172997772693634, 1.0, 1.0, 1.0, 0.998902440071106, 1.0, 1.0, 1.0, 0.9997408986091614, 1.0, 0.9994969367980957, 0.9999998807907104, 0.9999983310699463, 1.0, 0.8943321108818054, 0.9996473789215088, 1.0, 1.0, 0.1366727352142334, 1.0, 0.9999995231628418, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9997522234916687, 1.0, 0.9999735355377197, 0.9999978542327881, 0.9999889135360718, 1.0, 0.9999996423721313, 1.0, 0.9998144507408142, 0.9999997615814209, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.976751983165741, 1.0, 0.997978150844574, 1.0, 0.9721508622169495, 1.0, 0.9091917872428894, 1.0, 0.8016390800476074, 1.0, 1.0, 1.0, 0.9940642714500427, 1.0, 0.7791704535484314, 0.9723653793334961, 0.8304190635681152, 1.0, 0.738132655620575, 1.0, 1.0, 1.0, 1.0, 1.0, 0.997934103012085, 1.0, 0.9999746084213257, 1.0, 1.0, 1.0, 0.9992375373840332, 1.0, 0.9998910427093506, 0.9999988079071045, 0.9967170357704163, 1.0, 0.9999998807907104, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9567482471466064, 0.9999606609344482, 0.9998536109924316, 1.0 ], "eval_creative_writing_runtime": 31.4284, "eval_creative_writing_samples_per_second": 3.182, "eval_creative_writing_steps_per_second": 0.127, "epoch": 1.32, "step": 50 }, { "eval_biology_with_literary_style_loss": 1.3244109153747559, "eval_biology_with_literary_style_score": -0.2915953993797302, "eval_biology_with_literary_style_brier_score": 0.2915953993797302, "eval_biology_with_literary_style_average_probability": 0.6370630860328674, "eval_biology_with_literary_style_accuracy": 0.66, "eval_biology_with_literary_style_probabilities": [ 0.8422331213951111, 0.3563534617424011, 0.20590150356292725, 0.34388160705566406, 0.9738777279853821, 0.9933098554611206, 0.013515126891434193, 0.03492158278822899, 0.0005772275617346168, 0.9946145415306091, 0.9994958639144897, 0.9732575416564941, 0.4653439223766327, 0.2902985215187073, 0.9848002195358276, 0.874828040599823, 0.007957144640386105, 0.10880450904369354, 0.9999992847442627, 0.9999992847442627, 0.9997832179069519, 0.9021045565605164, 0.9993135929107666, 0.09312967211008072, 0.9835945963859558, 0.9038721919059753, 0.9953224062919617, 0.005929848644882441, 0.004466882906854153, 0.034653790295124054, 0.9999308586120605, 0.9367615580558777, 0.9977996945381165, 0.9999979734420776, 0.996374785900116, 0.9997103810310364, 0.19381558895111084, 0.6907597780227661, 0.875636100769043, 8.129484740493353e-06, 6.22305233264342e-05, 0.0003827095788437873, 0.0011861087987199426, 0.013507246039807796, 0.9973799586296082, 0.6868321299552917, 0.04164070263504982, 0.9997265934944153, 0.9688616991043091, 0.9772934317588806, 0.9902143478393555, 0.05315324291586876, 0.7507219910621643, 0.997478187084198, 0.8234730362892151, 0.9983914494514465, 0.5037214159965515, 0.1830606311559677, 0.6088641285896301, 0.056403663009405136, 0.9997255206108093, 0.997403085231781, 0.9995600581169128, 0.9978277087211609, 0.1915532499551773, 0.886441171169281, 0.9874991774559021, 0.9855878353118896, 0.7539471387863159, 0.9771825671195984, 0.5847952961921692, 0.7823383212089539, 0.010838981717824936, 0.7440186142921448, 0.8710861206054688, 0.9924225211143494, 0.41601699590682983, 0.9564679861068726, 0.5351160168647766, 0.9124112725257874, 0.00990450568497181, 0.8436535000801086, 0.45744913816452026, 0.9863516092300415, 0.9999998807907104, 0.99796462059021, 0.984410285949707, 0.8628932237625122, 0.7051854133605957, 0.004238440655171871, 0.6792111396789551, 0.9969097971916199, 0.9998319149017334, 0.12430453300476074, 0.9992156028747559, 0.8144650459289551, 0.07938272505998611, 0.013606518507003784, 0.003284846432507038, 0.8325172066688538 ], "eval_biology_with_literary_style_runtime": 34.5464, "eval_biology_with_literary_style_samples_per_second": 2.895, "eval_biology_with_literary_style_steps_per_second": 0.116, "epoch": 1.32, "step": 50 }, { "loss": 0.0092, "learning_rate": 0.0002, "epoch": 1.34, "step": 51 }, { "loss": 0.0012, "learning_rate": 0.0002, "epoch": 1.37, "step": 52 }, { "loss": 0.0741, "learning_rate": 0.0002, "epoch": 1.39, "step": 53 }, { "loss": 0.0191, "learning_rate": 0.0002, "epoch": 1.42, "step": 54 }, { "loss": 0.0289, "learning_rate": 0.0002, "epoch": 1.45, "step": 55 }, { "loss": 0.008, "learning_rate": 0.0002, "epoch": 1.47, "step": 56 }, { "loss": 0.0091, "learning_rate": 0.0002, "epoch": 1.5, "step": 57 }, { "loss": 0.001, "learning_rate": 0.0002, "epoch": 1.53, "step": 58 }, { "loss": 0.0589, "learning_rate": 0.0002, "epoch": 1.55, "step": 59 }, { "loss": 0.0014, "learning_rate": 0.0002, "epoch": 1.58, "step": 60 }, { "loss": 0.0009, "learning_rate": 0.0002, "epoch": 1.61, "step": 61 }, { "loss": 0.0032, "learning_rate": 0.0002, "epoch": 1.63, "step": 62 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 1.66, "step": 63 }, { "loss": 0.0163, "learning_rate": 0.0002, "epoch": 1.68, "step": 64 }, { "loss": 0.0007, "learning_rate": 0.0002, "epoch": 1.71, "step": 65 }, { "loss": 0.0153, "learning_rate": 0.0002, "epoch": 1.74, "step": 66 }, { "loss": 0.0266, "learning_rate": 0.0002, "epoch": 1.76, "step": 67 }, { "loss": 0.0547, "learning_rate": 0.0002, "epoch": 1.79, "step": 68 }, { "loss": 0.0024, "learning_rate": 0.0002, "epoch": 1.82, "step": 69 }, { "loss": 0.0118, "learning_rate": 0.0002, "epoch": 1.84, "step": 70 }, { "loss": 0.0031, "learning_rate": 0.0002, "epoch": 1.87, "step": 71 }, { "loss": 0.0076, "learning_rate": 0.0002, "epoch": 1.89, "step": 72 }, { "loss": 0.0201, "learning_rate": 0.0002, "epoch": 1.92, "step": 73 }, { "loss": 0.0105, "learning_rate": 0.0002, "epoch": 1.95, "step": 74 }, { "loss": 0.0089, "learning_rate": 0.0002, "epoch": 1.97, "step": 75 }, { "eval_creative_writing_loss": 0.09592155367136002, "eval_creative_writing_score": -0.020631961524486542, "eval_creative_writing_brier_score": 0.020631961524486542, "eval_creative_writing_average_probability": 0.971525251865387, "eval_creative_writing_accuracy": 0.97, "eval_creative_writing_probabilities": [ 0.9997228980064392, 1.0, 0.9999998807907104, 1.0, 0.9996482133865356, 1.0, 0.9999998807907104, 1.0, 0.2378961592912674, 0.9999994039535522, 0.42051035165786743, 1.0, 1.0, 1.0, 0.9999306201934814, 1.0, 0.9999998807907104, 1.0, 0.9999996423721313, 1.0, 0.999981164932251, 0.9999998807907104, 0.9999912977218628, 1.0, 0.9809040427207947, 0.9995905756950378, 1.0, 1.0, 0.0023363870568573475, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999964237213135, 1.0, 0.9986351132392883, 0.9997923970222473, 0.9999994039535522, 1.0, 1.0, 1.0, 0.9998650550842285, 0.9999997615814209, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9987547397613525, 1.0, 0.9995219707489014, 1.0, 0.9831072092056274, 1.0, 0.9974205493927002, 1.0, 0.9997766613960266, 1.0, 1.0, 1.0, 0.9999995231628418, 1.0, 0.6153692007064819, 0.9825475811958313, 0.9969502091407776, 1.0, 0.9921615719795227, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9998723268508911, 1.0, 0.9999754428863525, 0.9999998807907104, 1.0, 1.0, 0.9998829364776611, 1.0, 0.9971277117729187, 0.9999291896820068, 0.9999527931213379, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.952387273311615, 0.9989917874336243, 0.9999982118606567, 1.0 ], "eval_creative_writing_runtime": 31.3865, "eval_creative_writing_samples_per_second": 3.186, "eval_creative_writing_steps_per_second": 0.127, "epoch": 1.97, "step": 75 }, { "eval_biology_with_literary_style_loss": 1.4261603355407715, "eval_biology_with_literary_style_score": -0.23246526718139648, "eval_biology_with_literary_style_brier_score": 0.23246526718139648, "eval_biology_with_literary_style_average_probability": 0.739600419998169, "eval_biology_with_literary_style_accuracy": 0.75, "eval_biology_with_literary_style_probabilities": [ 0.9998239874839783, 0.9999821186065674, 0.997286319732666, 0.7309690117835999, 0.9989686012268066, 0.9998588562011719, 0.1100221648812294, 0.013960055075585842, 0.0014234904665499926, 0.9999901056289673, 0.9999978542327881, 0.9998490810394287, 0.5011758804321289, 0.24851495027542114, 0.9544274806976318, 0.996453046798706, 0.0021988353691995144, 0.05229213833808899, 1.0, 1.0, 0.9999995231628418, 0.9999839067459106, 0.999997615814209, 0.9901941418647766, 0.9993062019348145, 0.9996980428695679, 0.9998052716255188, 7.921013457234949e-05, 0.00241833901964128, 0.00010733860108302906, 1.0, 0.9999939203262329, 1.0, 1.0, 0.9999995231628418, 1.0, 0.37197402119636536, 0.906427264213562, 0.9985352754592896, 9.472542927824179e-08, 1.8663062292034738e-05, 0.0007226847810670733, 0.0009153155260719359, 0.027196291834115982, 0.996335506439209, 0.9934919476509094, 0.009263264015316963, 0.9999862909317017, 0.9986127614974976, 0.999937891960144, 0.9997109770774841, 0.5540578961372375, 0.9694981575012207, 0.9999996423721313, 0.9991618394851685, 0.9999830722808838, 0.9839572906494141, 0.3787292242050171, 0.7562795877456665, 0.024417594075202942, 0.9996551275253296, 0.9969584941864014, 0.998749852180481, 0.9999865293502808, 0.7677027583122253, 0.9995812773704529, 0.9999984502792358, 0.9999995231628418, 0.9999768733978271, 0.9979830980300903, 0.9924507737159729, 0.9991115927696228, 7.307936175493523e-05, 0.7401599884033203, 0.929680347442627, 0.9999669790267944, 0.8817222118377686, 0.9998908042907715, 0.1284332275390625, 0.928813636302948, 0.0032492727041244507, 0.9993767142295837, 0.9767991900444031, 0.9966356158256531, 1.0, 0.9999995231628418, 0.9999977350234985, 0.9992828965187073, 0.999262273311615, 1.0285020834999159e-05, 0.9999904632568359, 1.0, 1.0, 0.9980078339576721, 0.9999998807907104, 0.999974250793457, 0.034345593303442, 0.024079279974102974, 0.0003281444078311324, 0.9998185038566589 ], "eval_biology_with_literary_style_runtime": 34.5167, "eval_biology_with_literary_style_samples_per_second": 2.897, "eval_biology_with_literary_style_steps_per_second": 0.116, "epoch": 1.97, "step": 75 }, { "loss": 0.0031, "learning_rate": 0.0002, "epoch": 2.0, "step": 76 }, { "loss": 0.0008, "learning_rate": 0.0002, "epoch": 2.03, "step": 77 }, { "loss": 0.0019, "learning_rate": 0.0002, "epoch": 2.05, "step": 78 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 2.08, "step": 79 }, { "loss": 0.0009, "learning_rate": 0.0002, "epoch": 2.11, "step": 80 }, { "loss": 0.0005, "learning_rate": 0.0002, "epoch": 2.13, "step": 81 }, { "loss": 0.0006, "learning_rate": 0.0002, "epoch": 2.16, "step": 82 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 2.18, "step": 83 }, { "loss": 0.0002, "learning_rate": 0.0002, "epoch": 2.21, "step": 84 }, { "loss": 0.0002, "learning_rate": 0.0002, "epoch": 2.24, "step": 85 }, { "loss": 0.0002, "learning_rate": 0.0002, "epoch": 2.26, "step": 86 }, { "loss": 0.0002, "learning_rate": 0.0002, "epoch": 2.29, "step": 87 }, { "loss": 0.0013, "learning_rate": 0.0002, "epoch": 2.32, "step": 88 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 2.34, "step": 89 }, { "loss": 0.0018, "learning_rate": 0.0002, "epoch": 2.37, "step": 90 }, { "loss": 0.0012, "learning_rate": 0.0002, "epoch": 2.39, "step": 91 }, { "loss": 0.0004, "learning_rate": 0.0002, "epoch": 2.42, "step": 92 }, { "loss": 0.0038, "learning_rate": 0.0002, "epoch": 2.45, "step": 93 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 2.47, "step": 94 }, { "loss": 0.0007, "learning_rate": 0.0002, "epoch": 2.5, "step": 95 }, { "loss": 0.028, "learning_rate": 0.0002, "epoch": 2.53, "step": 96 }, { "loss": 0.003, "learning_rate": 0.0002, "epoch": 2.55, "step": 97 }, { "loss": 0.0082, "learning_rate": 0.0002, "epoch": 2.58, "step": 98 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 2.61, "step": 99 }, { "loss": 0.0002, "learning_rate": 0.0002, "epoch": 2.63, "step": 100 }, { "eval_creative_writing_loss": 0.12560071051120758, "eval_creative_writing_score": -0.02528984844684601, "eval_creative_writing_brier_score": 0.02528984844684601, "eval_creative_writing_average_probability": 0.9707810282707214, "eval_creative_writing_accuracy": 0.97, "eval_creative_writing_probabilities": [ 0.9999761581420898, 1.0, 0.9999992847442627, 1.0, 0.9999547004699707, 1.0, 1.0, 1.0, 0.01668044924736023, 1.0, 0.2576431930065155, 1.0, 1.0, 1.0, 0.9999969005584717, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999998807907104, 1.0, 1.0, 1.0, 0.9979181885719299, 0.9999982118606567, 1.0, 1.0, 0.004256995394825935, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999716281890869, 1.0, 0.9999717473983765, 0.9999995231628418, 0.9999998807907104, 1.0, 1.0, 1.0, 0.999849796295166, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9995261430740356, 1.0, 0.9999653100967407, 1.0, 0.9834521412849426, 1.0, 0.9987533092498779, 1.0, 0.9978016018867493, 1.0, 1.0, 1.0, 0.9999966621398926, 1.0, 0.8659273386001587, 0.9995256662368774, 0.9950549602508545, 1.0, 0.9657167792320251, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999123811721802, 1.0, 0.9999997615814209, 1.0, 1.0, 1.0, 0.9999539852142334, 1.0, 0.9996681213378906, 0.9999990463256836, 0.9996721744537354, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9969596862792969, 0.9999977350234985, 0.9999960660934448, 1.0 ], "eval_creative_writing_runtime": 31.387, "eval_creative_writing_samples_per_second": 3.186, "eval_creative_writing_steps_per_second": 0.127, "epoch": 2.63, "step": 100 }, { "eval_biology_with_literary_style_loss": 1.4950416088104248, "eval_biology_with_literary_style_score": -0.239924356341362, "eval_biology_with_literary_style_brier_score": 0.239924356341362, "eval_biology_with_literary_style_average_probability": 0.7326297760009766, "eval_biology_with_literary_style_accuracy": 0.75, "eval_biology_with_literary_style_probabilities": [ 0.9994196891784668, 0.9992738366127014, 0.983923614025116, 0.9044151902198792, 0.9997815489768982, 0.9998750686645508, 0.006262939423322678, 0.00128739804495126, 8.855006308294833e-05, 0.9999986886978149, 0.9999992847442627, 0.9998844861984253, 0.684658944606781, 0.4036870300769806, 0.9940760135650635, 0.9792241454124451, 0.0024490836076438427, 0.030131032690405846, 1.0, 1.0, 0.9999988079071045, 0.9999747276306152, 0.9999988079071045, 0.9765191674232483, 0.9992048144340515, 0.9988371729850769, 0.9996092915534973, 8.090558549156412e-05, 0.004592935089021921, 0.002051251009106636, 1.0, 0.9999823570251465, 0.9999997615814209, 1.0, 0.9999997615814209, 0.9999996423721313, 0.523539662361145, 0.9182876348495483, 0.9991507530212402, 6.639822203169388e-08, 3.326304067741148e-05, 0.00180066900793463, 0.00022590388834942132, 0.0030361979734152555, 0.9938976168632507, 0.999334990978241, 0.014086072333157063, 0.9999929666519165, 0.9996891021728516, 0.9999194145202637, 0.9998146891593933, 0.5219235420227051, 0.9443055391311646, 0.999993085861206, 0.9992896318435669, 0.9999892711639404, 0.8569433093070984, 0.11361022293567657, 0.6792670488357544, 0.07281597703695297, 0.9995861649513245, 0.9714840054512024, 0.9986534118652344, 0.9999567270278931, 0.7200058102607727, 0.9917659163475037, 0.9999903440475464, 0.9999982118606567, 0.999862551689148, 0.9478113055229187, 0.9273961782455444, 0.9989776611328125, 9.58814489422366e-05, 0.9144469499588013, 0.8570235371589661, 0.999974250793457, 0.27402758598327637, 0.9973011612892151, 0.07933907210826874, 0.9523033499717712, 0.005996616557240486, 0.9999831914901733, 0.9931454658508301, 0.9989646673202515, 1.0, 0.9999984502792358, 0.999990701675415, 0.9977810978889465, 0.9995942711830139, 0.00018957872816827148, 0.9999784231185913, 0.9999998807907104, 1.0, 0.9992635846138, 1.0, 0.9999830722808838, 0.024764718487858772, 0.0030579320155084133, 0.0002520766283851117, 0.9998053908348083 ], "eval_biology_with_literary_style_runtime": 34.5448, "eval_biology_with_literary_style_samples_per_second": 2.895, "eval_biology_with_literary_style_steps_per_second": 0.116, "epoch": 2.63, "step": 100 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 2.66, "step": 101 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 2.68, "step": 102 }, { "loss": 0.0008, "learning_rate": 0.0002, "epoch": 2.71, "step": 103 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 2.74, "step": 104 }, { "loss": 0.0002, "learning_rate": 0.0002, "epoch": 2.76, "step": 105 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 2.79, "step": 106 }, { "loss": 0.0012, "learning_rate": 0.0002, "epoch": 2.82, "step": 107 }, { "loss": 0.0002, "learning_rate": 0.0002, "epoch": 2.84, "step": 108 }, { "loss": 0.0003, "learning_rate": 0.0002, "epoch": 2.87, "step": 109 }, { "loss": 0.0004, "learning_rate": 0.0002, "epoch": 2.89, "step": 110 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 2.92, "step": 111 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 2.95, "step": 112 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 2.97, "step": 113 }, { "loss": 0.0005, "learning_rate": 0.0002, "epoch": 3.0, "step": 114 }, { "loss": 0.0002, "learning_rate": 0.0002, "epoch": 3.03, "step": 115 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 3.05, "step": 116 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 3.08, "step": 117 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 3.11, "step": 118 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 3.13, "step": 119 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 3.16, "step": 120 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 3.18, "step": 121 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 3.21, "step": 122 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 3.24, "step": 123 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 3.26, "step": 124 }, { "loss": 0.0002, "learning_rate": 0.0002, "epoch": 3.29, "step": 125 }, { "eval_creative_writing_loss": 0.12550988793373108, "eval_creative_writing_score": -0.023599309846758842, "eval_creative_writing_brier_score": 0.023599309846758842, "eval_creative_writing_average_probability": 0.9722127318382263, "eval_creative_writing_accuracy": 0.97, "eval_creative_writing_probabilities": [ 0.9999924898147583, 1.0, 0.9999974966049194, 1.0, 0.9999760389328003, 1.0, 1.0, 1.0, 0.004820940550416708, 1.0, 0.38430270552635193, 1.0, 1.0, 1.0, 0.9999996423721313, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9995892643928528, 0.9999997615814209, 1.0, 1.0, 0.012191555462777615, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999853372573853, 1.0, 0.999993085861206, 0.9999998807907104, 1.0, 1.0, 1.0, 1.0, 0.9999395608901978, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9998890161514282, 1.0, 0.9999903440475464, 1.0, 0.9879515767097473, 1.0, 0.9996850490570068, 1.0, 0.9710411429405212, 1.0, 1.0, 1.0, 0.9999980926513672, 1.0, 0.8841173648834229, 0.999834418296814, 0.9965130686759949, 1.0, 0.9834631085395813, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999334812164307, 1.0, 0.9999998807907104, 1.0, 1.0, 1.0, 0.9999955892562866, 1.0, 0.9997425675392151, 0.9999978542327881, 0.9998661279678345, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9984672665596008, 0.9999994039535522, 0.9999991655349731, 1.0 ], "eval_creative_writing_runtime": 31.3796, "eval_creative_writing_samples_per_second": 3.187, "eval_creative_writing_steps_per_second": 0.127, "epoch": 3.29, "step": 125 }, { "eval_biology_with_literary_style_loss": 1.4181084632873535, "eval_biology_with_literary_style_score": -0.22596853971481323, "eval_biology_with_literary_style_brier_score": 0.22596853971481323, "eval_biology_with_literary_style_average_probability": 0.7482656836509705, "eval_biology_with_literary_style_accuracy": 0.75, "eval_biology_with_literary_style_probabilities": [ 0.9995549321174622, 0.9985436201095581, 0.9808439612388611, 0.9283376932144165, 0.9999039173126221, 0.9999103546142578, 0.0073650372214615345, 0.0004983382532373071, 3.531386028043926e-05, 0.9999995231628418, 0.9999997615814209, 0.9999420642852783, 0.7384650707244873, 0.46473434567451477, 0.9987931251525879, 0.9886658191680908, 0.008153733797371387, 0.09072274714708328, 1.0, 1.0, 0.9999998807907104, 0.9999436140060425, 0.9999998807907104, 0.9902008175849915, 0.9996306896209717, 0.9992589354515076, 0.9999074935913086, 0.00022981569054536521, 0.020188162103295326, 0.02801305055618286, 1.0, 0.9999865293502808, 0.9999995231628418, 1.0, 0.9999997615814209, 0.9999998807907104, 0.6005551218986511, 0.9702953696250916, 0.9998131394386292, 1.041131092449632e-08, 9.587202839611564e-06, 0.0011823305394500494, 0.0001448177790734917, 0.009138714522123337, 0.9979574680328369, 0.9991582632064819, 0.016484873369336128, 0.9999951124191284, 0.999769389629364, 0.9998552799224854, 0.9998192191123962, 0.8466882705688477, 0.9816719889640808, 0.9999973773956299, 0.9993175268173218, 0.9999885559082031, 0.9391415119171143, 0.1099478080868721, 0.7412080764770508, 0.18274378776550293, 0.999987006187439, 0.9898987412452698, 0.9998290538787842, 0.9999748468399048, 0.776476263999939, 0.9796139001846313, 0.9999961853027344, 0.9999972581863403, 0.9996709823608398, 0.9429153800010681, 0.8567234873771667, 0.9939687252044678, 0.00026437846827320755, 0.9818727374076843, 0.9595638513565063, 0.9999904632568359, 0.45074892044067383, 0.9988915324211121, 0.27005892992019653, 0.9760595560073853, 0.003961615264415741, 0.999995231628418, 0.9971864819526672, 0.9999188184738159, 1.0, 0.9999996423721313, 0.9999929666519165, 0.9993189573287964, 0.9999390840530396, 0.0004752865352202207, 0.9999654293060303, 1.0, 1.0, 0.9996334314346313, 1.0, 0.9999845027923584, 0.012308338657021523, 0.0005812794552184641, 0.0003732819459401071, 0.999724805355072 ], "eval_biology_with_literary_style_runtime": 34.5294, "eval_biology_with_literary_style_samples_per_second": 2.896, "eval_biology_with_literary_style_steps_per_second": 0.116, "epoch": 3.29, "step": 125 }, { "loss": 0.0002, "learning_rate": 0.0002, "epoch": 3.32, "step": 126 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 3.34, "step": 127 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 3.37, "step": 128 }, { "loss": 0.0008, "learning_rate": 0.0002, "epoch": 3.39, "step": 129 }, { "loss": 0.0002, "learning_rate": 0.0002, "epoch": 3.42, "step": 130 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 3.45, "step": 131 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 3.47, "step": 132 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 3.5, "step": 133 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 3.53, "step": 134 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 3.55, "step": 135 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 3.58, "step": 136 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 3.61, "step": 137 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 3.63, "step": 138 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 3.66, "step": 139 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 3.68, "step": 140 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 3.71, "step": 141 }, { "loss": 0.0002, "learning_rate": 0.0002, "epoch": 3.74, "step": 142 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 3.76, "step": 143 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 3.79, "step": 144 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 3.82, "step": 145 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 3.84, "step": 146 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 3.87, "step": 147 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 3.89, "step": 148 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 3.92, "step": 149 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 3.95, "step": 150 }, { "eval_creative_writing_loss": 0.12696059048175812, "eval_creative_writing_score": -0.023474015295505524, "eval_creative_writing_brier_score": 0.023474015295505524, "eval_creative_writing_average_probability": 0.9725755453109741, "eval_creative_writing_accuracy": 0.97, "eval_creative_writing_probabilities": [ 0.9999969005584717, 1.0, 0.9999990463256836, 1.0, 0.9999603033065796, 1.0, 1.0, 1.0, 0.0047756098210811615, 1.0, 0.3958245515823364, 1.0, 1.0, 1.0, 0.9999998807907104, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9996107220649719, 0.9999997615814209, 1.0, 1.0, 0.009845593012869358, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999935626983643, 1.0, 0.999993085861206, 0.9999998807907104, 1.0, 1.0, 1.0, 1.0, 0.9999793767929077, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999181032180786, 1.0, 0.9999945163726807, 1.0, 0.9935240745544434, 1.0, 0.9997866749763489, 1.0, 0.9811118245124817, 1.0, 1.0, 1.0, 0.9999995231628418, 1.0, 0.8962162137031555, 0.9998764991760254, 0.9970778226852417, 1.0, 0.9822664856910706, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999604225158691, 1.0, 0.9999998807907104, 1.0, 1.0, 1.0, 0.9999983310699463, 1.0, 0.9996516704559326, 0.9999957084655762, 0.9999384880065918, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9982588887214661, 0.9999991655349731, 0.9999997615814209, 1.0 ], "eval_creative_writing_runtime": 31.3872, "eval_creative_writing_samples_per_second": 3.186, "eval_creative_writing_steps_per_second": 0.127, "epoch": 3.95, "step": 150 }, { "eval_biology_with_literary_style_loss": 1.508569598197937, "eval_biology_with_literary_style_score": -0.223800390958786, "eval_biology_with_literary_style_brier_score": 0.223800390958786, "eval_biology_with_literary_style_average_probability": 0.753645658493042, "eval_biology_with_literary_style_accuracy": 0.77, "eval_biology_with_literary_style_probabilities": [ 0.9997276663780212, 0.9992485642433167, 0.9895316362380981, 0.9514719843864441, 0.9999614953994751, 0.9999442100524902, 0.004651335533708334, 0.0001869379193522036, 1.4444511180045083e-05, 0.9999998807907104, 1.0, 0.9999759197235107, 0.79854816198349, 0.5046471357345581, 0.9990099668502808, 0.99369215965271, 0.0043740225955843925, 0.05449388176202774, 1.0, 1.0, 1.0, 0.9999809265136719, 1.0, 0.99128657579422, 0.9998859167098999, 0.999728262424469, 0.9999839067459106, 0.00016595126362517476, 0.02103896625339985, 0.024976782500743866, 1.0, 0.9999954700469971, 0.9999998807907104, 1.0, 1.0, 1.0, 0.6514801383018494, 0.9780262112617493, 0.9999254941940308, 2.189738612656811e-09, 3.2669267966412008e-06, 0.0008265993092209101, 9.858178964350373e-05, 0.007948733866214752, 0.9988512992858887, 0.9993112087249756, 0.007991177029907703, 0.9999977350234985, 0.9998981952667236, 0.9999642372131348, 0.9999281167984009, 0.8989755511283875, 0.9909796714782715, 0.9999992847442627, 0.9996218681335449, 0.9999951124191284, 0.9505088925361633, 0.1275900900363922, 0.8378230929374695, 0.1916273832321167, 0.9999955892562866, 0.9941998720169067, 0.9999337196350098, 0.9999921321868896, 0.7730574607849121, 0.9855798482894897, 0.9999992847442627, 0.9999995231628418, 0.9999063014984131, 0.9654306769371033, 0.9086850881576538, 0.9970625042915344, 0.00011868889123434201, 0.9871053695678711, 0.962754487991333, 0.999996542930603, 0.5310143828392029, 0.9996671676635742, 0.28350409865379333, 0.9845860600471497, 0.002773666987195611, 0.9999983310699463, 0.9990894794464111, 0.999972939491272, 1.0, 0.9999998807907104, 0.9999982118606567, 0.9997745156288147, 0.9999819993972778, 0.00044673527008853853, 0.9999911785125732, 1.0, 1.0, 0.9998553991317749, 1.0, 0.9999963045120239, 0.01165656466037035, 0.0004307126218918711, 0.00019968363631051034, 0.9999244213104248 ], "eval_biology_with_literary_style_runtime": 34.5257, "eval_biology_with_literary_style_samples_per_second": 2.896, "eval_biology_with_literary_style_steps_per_second": 0.116, "epoch": 3.95, "step": 150 }, { "train_runtime": 3937.8753, "train_samples_per_second": 1.219, "train_steps_per_second": 0.038, "total_flos": 0.0, "train_loss": 0.053941556412501084, "epoch": 3.95, "step": 150 } ]