|
[ |
|
{ |
|
"loss": 0.8734, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.03, |
|
"step": 1 |
|
}, |
|
{ |
|
"loss": 0.7825, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.05, |
|
"step": 2 |
|
}, |
|
{ |
|
"loss": 0.6849, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.08, |
|
"step": 3 |
|
}, |
|
{ |
|
"loss": 0.6394, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.11, |
|
"step": 4 |
|
}, |
|
{ |
|
"loss": 0.5604, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.13, |
|
"step": 5 |
|
}, |
|
{ |
|
"loss": 0.484, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.16, |
|
"step": 6 |
|
}, |
|
{ |
|
"loss": 0.4106, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.18, |
|
"step": 7 |
|
}, |
|
{ |
|
"loss": 0.315, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.21, |
|
"step": 8 |
|
}, |
|
{ |
|
"loss": 0.303, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.24, |
|
"step": 9 |
|
}, |
|
{ |
|
"loss": 0.2428, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.26, |
|
"step": 10 |
|
}, |
|
{ |
|
"loss": 0.1576, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.29, |
|
"step": 11 |
|
}, |
|
{ |
|
"loss": 0.1088, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.32, |
|
"step": 12 |
|
}, |
|
{ |
|
"loss": 0.0539, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.34, |
|
"step": 13 |
|
}, |
|
{ |
|
"loss": 0.0554, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.37, |
|
"step": 14 |
|
}, |
|
{ |
|
"loss": 0.0339, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.39, |
|
"step": 15 |
|
}, |
|
{ |
|
"loss": 0.0978, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.42, |
|
"step": 16 |
|
}, |
|
{ |
|
"loss": 0.0074, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.45, |
|
"step": 17 |
|
}, |
|
{ |
|
"loss": 0.0462, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.47, |
|
"step": 18 |
|
}, |
|
{ |
|
"loss": 0.0314, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.5, |
|
"step": 19 |
|
}, |
|
{ |
|
"loss": 0.0483, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.53, |
|
"step": 20 |
|
}, |
|
{ |
|
"loss": 0.0277, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.55, |
|
"step": 21 |
|
}, |
|
{ |
|
"loss": 0.0407, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.58, |
|
"step": 22 |
|
}, |
|
{ |
|
"loss": 0.055, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.61, |
|
"step": 23 |
|
}, |
|
{ |
|
"loss": 0.0459, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.63, |
|
"step": 24 |
|
}, |
|
{ |
|
"loss": 0.0388, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.66, |
|
"step": 25 |
|
}, |
|
{ |
|
"eval_creative_writing_loss": 0.10124114900827408, |
|
"eval_creative_writing_score": -0.029021821916103363, |
|
"eval_creative_writing_brier_score": 0.029021821916103363, |
|
"eval_creative_writing_average_probability": 0.949838399887085, |
|
"eval_creative_writing_accuracy": 0.96, |
|
"eval_creative_writing_probabilities": [ |
|
0.968286395072937, |
|
1.0, |
|
0.9910894632339478, |
|
1.0, |
|
0.7539140582084656, |
|
1.0, |
|
0.9999864101409912, |
|
1.0, |
|
0.10593231767416, |
|
0.9996770620346069, |
|
0.35035908222198486, |
|
1.0, |
|
0.9999998807907104, |
|
1.0, |
|
0.995671272277832, |
|
0.9999988079071045, |
|
0.9999849796295166, |
|
0.9999967813491821, |
|
0.9999188184738159, |
|
1.0, |
|
0.9981185793876648, |
|
0.9999852180480957, |
|
0.999852180480957, |
|
0.9999985694885254, |
|
0.7055116891860962, |
|
0.9584224820137024, |
|
0.9999998807907104, |
|
1.0, |
|
0.0526885986328125, |
|
0.9999996423721313, |
|
0.9999980926513672, |
|
1.0, |
|
0.9999997615814209, |
|
1.0, |
|
0.9999997615814209, |
|
1.0, |
|
0.996961772441864, |
|
1.0, |
|
0.9960450530052185, |
|
0.9987917542457581, |
|
0.9999934434890747, |
|
1.0, |
|
0.9999978542327881, |
|
1.0, |
|
0.9987438321113586, |
|
0.9999847412109375, |
|
0.9999997615814209, |
|
1.0, |
|
0.999990701675415, |
|
0.9999983310699463, |
|
0.9999998807907104, |
|
1.0, |
|
0.968285322189331, |
|
1.0, |
|
0.988308310508728, |
|
0.9999992847442627, |
|
0.9514806270599365, |
|
0.9999994039535522, |
|
0.9962933659553528, |
|
1.0, |
|
0.8887924551963806, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.9997608065605164, |
|
1.0, |
|
0.4080069065093994, |
|
0.6264576315879822, |
|
0.8959720134735107, |
|
1.0, |
|
0.7741034626960754, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.8713729381561279, |
|
1.0, |
|
0.9989833235740662, |
|
0.9999933242797852, |
|
1.0, |
|
1.0, |
|
0.9996505975723267, |
|
1.0, |
|
0.9916742444038391, |
|
0.9989118576049805, |
|
0.9800452589988708, |
|
0.9999994039535522, |
|
0.9998782873153687, |
|
0.9999986886978149, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.9999996423721313, |
|
0.9999991655349731, |
|
0.7797707915306091, |
|
0.9964134097099304, |
|
0.9997926354408264, |
|
1.0 |
|
], |
|
"eval_creative_writing_runtime": 31.3856, |
|
"eval_creative_writing_samples_per_second": 3.186, |
|
"eval_creative_writing_steps_per_second": 0.127, |
|
"epoch": 0.66, |
|
"step": 25 |
|
}, |
|
{ |
|
"eval_biology_with_literary_style_loss": 0.9078262448310852, |
|
"eval_biology_with_literary_style_score": -0.22649109363555908, |
|
"eval_biology_with_literary_style_brier_score": 0.22649109363555908, |
|
"eval_biology_with_literary_style_average_probability": 0.7168461680412292, |
|
"eval_biology_with_literary_style_accuracy": 0.72, |
|
"eval_biology_with_literary_style_probabilities": [ |
|
0.890451967716217, |
|
0.9884986281394958, |
|
0.4627087116241455, |
|
0.26461559534072876, |
|
0.9440751671791077, |
|
0.9983709454536438, |
|
0.14105352759361267, |
|
0.5504218935966492, |
|
0.0717836245894432, |
|
0.9877713918685913, |
|
0.9968053102493286, |
|
0.8904775977134705, |
|
0.2346506416797638, |
|
0.2469814568758011, |
|
0.6628801822662354, |
|
0.9975651502609253, |
|
0.16669368743896484, |
|
0.5848821401596069, |
|
0.9999884366989136, |
|
0.999921441078186, |
|
0.9999263286590576, |
|
0.9972250461578369, |
|
0.9998313188552856, |
|
0.6406869888305664, |
|
0.9976915121078491, |
|
0.9530547857284546, |
|
0.995725154876709, |
|
0.10443083941936493, |
|
0.023461028933525085, |
|
0.0064667826518416405, |
|
0.9999899864196777, |
|
0.997308611869812, |
|
0.9999022483825684, |
|
0.9999972581863403, |
|
0.9999064207077026, |
|
0.9999868869781494, |
|
0.5541044473648071, |
|
0.9237357378005981, |
|
0.9736377596855164, |
|
0.00014247097715269774, |
|
0.0008552187937311828, |
|
0.0020070422906428576, |
|
0.009259264916181564, |
|
0.09797927737236023, |
|
0.988251268863678, |
|
0.9928971529006958, |
|
0.003707324853166938, |
|
0.9992867112159729, |
|
0.9720431566238403, |
|
0.9831154942512512, |
|
0.9243084192276001, |
|
0.056156225502491, |
|
0.6303812265396118, |
|
0.9999059438705444, |
|
0.900888204574585, |
|
0.9978699684143066, |
|
0.91905677318573, |
|
0.22306089103221893, |
|
0.48004305362701416, |
|
0.02644597738981247, |
|
0.9815545678138733, |
|
0.9921851754188538, |
|
0.9861576557159424, |
|
0.9997566342353821, |
|
0.4922117292881012, |
|
0.9966878294944763, |
|
0.9991987347602844, |
|
0.9990311861038208, |
|
0.9266239404678345, |
|
0.9989709854125977, |
|
0.9725721478462219, |
|
0.9685802459716797, |
|
0.013590201735496521, |
|
0.7605278491973877, |
|
0.9985472559928894, |
|
0.998909592628479, |
|
0.9852558374404907, |
|
0.9983707070350647, |
|
0.12097364664077759, |
|
0.9766552448272705, |
|
0.02587122656404972, |
|
0.9760261178016663, |
|
0.9663914442062378, |
|
0.9991564750671387, |
|
0.9999828338623047, |
|
0.99964439868927, |
|
0.9962877035140991, |
|
0.9912015199661255, |
|
0.9959142804145813, |
|
0.004339241422712803, |
|
0.9495350122451782, |
|
0.9996113181114197, |
|
0.9997592568397522, |
|
0.9140135645866394, |
|
0.9997426867485046, |
|
0.9957128763198853, |
|
0.2625727951526642, |
|
0.3218887746334076, |
|
0.010261102579534054, |
|
0.655005931854248 |
|
], |
|
"eval_biology_with_literary_style_runtime": 34.5421, |
|
"eval_biology_with_literary_style_samples_per_second": 2.895, |
|
"eval_biology_with_literary_style_steps_per_second": 0.116, |
|
"epoch": 0.66, |
|
"step": 25 |
|
}, |
|
{ |
|
"loss": 0.211, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.68, |
|
"step": 26 |
|
}, |
|
{ |
|
"loss": 0.128, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.71, |
|
"step": 27 |
|
}, |
|
{ |
|
"loss": 0.0735, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.74, |
|
"step": 28 |
|
}, |
|
{ |
|
"loss": 0.0765, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.76, |
|
"step": 29 |
|
}, |
|
{ |
|
"loss": 0.0729, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.79, |
|
"step": 30 |
|
}, |
|
{ |
|
"loss": 0.0643, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.82, |
|
"step": 31 |
|
}, |
|
{ |
|
"loss": 0.052, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.84, |
|
"step": 32 |
|
}, |
|
{ |
|
"loss": 0.0159, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.87, |
|
"step": 33 |
|
}, |
|
{ |
|
"loss": 0.0546, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.89, |
|
"step": 34 |
|
}, |
|
{ |
|
"loss": 0.0484, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.92, |
|
"step": 35 |
|
}, |
|
{ |
|
"loss": 0.1863, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.95, |
|
"step": 36 |
|
}, |
|
{ |
|
"loss": 0.1555, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.97, |
|
"step": 37 |
|
}, |
|
{ |
|
"loss": 0.0444, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.0, |
|
"step": 38 |
|
}, |
|
{ |
|
"loss": 0.0087, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.03, |
|
"step": 39 |
|
}, |
|
{ |
|
"loss": 0.0122, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.05, |
|
"step": 40 |
|
}, |
|
{ |
|
"loss": 0.0664, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.08, |
|
"step": 41 |
|
}, |
|
{ |
|
"loss": 0.0011, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.11, |
|
"step": 42 |
|
}, |
|
{ |
|
"loss": 0.0363, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.13, |
|
"step": 43 |
|
}, |
|
{ |
|
"loss": 0.0011, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.16, |
|
"step": 44 |
|
}, |
|
{ |
|
"loss": 0.0145, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.18, |
|
"step": 45 |
|
}, |
|
{ |
|
"loss": 0.0055, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.21, |
|
"step": 46 |
|
}, |
|
{ |
|
"loss": 0.0542, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.24, |
|
"step": 47 |
|
}, |
|
{ |
|
"loss": 0.0092, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.26, |
|
"step": 48 |
|
}, |
|
{ |
|
"loss": 0.0047, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.29, |
|
"step": 49 |
|
}, |
|
{ |
|
"loss": 0.0913, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.32, |
|
"step": 50 |
|
}, |
|
{ |
|
"eval_creative_writing_loss": 0.0620737262070179, |
|
"eval_creative_writing_score": -0.018390759825706482, |
|
"eval_creative_writing_brier_score": 0.018390759825706482, |
|
"eval_creative_writing_average_probability": 0.9659796953201294, |
|
"eval_creative_writing_accuracy": 0.97, |
|
"eval_creative_writing_probabilities": [ |
|
0.9853099584579468, |
|
1.0, |
|
0.9972212314605713, |
|
1.0, |
|
0.9858209490776062, |
|
1.0, |
|
0.9999940395355225, |
|
1.0, |
|
0.26181724667549133, |
|
1.0, |
|
0.4172997772693634, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.998902440071106, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.9997408986091614, |
|
1.0, |
|
0.9994969367980957, |
|
0.9999998807907104, |
|
0.9999983310699463, |
|
1.0, |
|
0.8943321108818054, |
|
0.9996473789215088, |
|
1.0, |
|
1.0, |
|
0.1366727352142334, |
|
1.0, |
|
0.9999995231628418, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.9997522234916687, |
|
1.0, |
|
0.9999735355377197, |
|
0.9999978542327881, |
|
0.9999889135360718, |
|
1.0, |
|
0.9999996423721313, |
|
1.0, |
|
0.9998144507408142, |
|
0.9999997615814209, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.976751983165741, |
|
1.0, |
|
0.997978150844574, |
|
1.0, |
|
0.9721508622169495, |
|
1.0, |
|
0.9091917872428894, |
|
1.0, |
|
0.8016390800476074, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.9940642714500427, |
|
1.0, |
|
0.7791704535484314, |
|
0.9723653793334961, |
|
0.8304190635681152, |
|
1.0, |
|
0.738132655620575, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.997934103012085, |
|
1.0, |
|
0.9999746084213257, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.9992375373840332, |
|
1.0, |
|
0.9998910427093506, |
|
0.9999988079071045, |
|
0.9967170357704163, |
|
1.0, |
|
0.9999998807907104, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.9567482471466064, |
|
0.9999606609344482, |
|
0.9998536109924316, |
|
1.0 |
|
], |
|
"eval_creative_writing_runtime": 31.4284, |
|
"eval_creative_writing_samples_per_second": 3.182, |
|
"eval_creative_writing_steps_per_second": 0.127, |
|
"epoch": 1.32, |
|
"step": 50 |
|
}, |
|
{ |
|
"eval_biology_with_literary_style_loss": 1.3244109153747559, |
|
"eval_biology_with_literary_style_score": -0.2915953993797302, |
|
"eval_biology_with_literary_style_brier_score": 0.2915953993797302, |
|
"eval_biology_with_literary_style_average_probability": 0.6370630860328674, |
|
"eval_biology_with_literary_style_accuracy": 0.66, |
|
"eval_biology_with_literary_style_probabilities": [ |
|
0.8422331213951111, |
|
0.3563534617424011, |
|
0.20590150356292725, |
|
0.34388160705566406, |
|
0.9738777279853821, |
|
0.9933098554611206, |
|
0.013515126891434193, |
|
0.03492158278822899, |
|
0.0005772275617346168, |
|
0.9946145415306091, |
|
0.9994958639144897, |
|
0.9732575416564941, |
|
0.4653439223766327, |
|
0.2902985215187073, |
|
0.9848002195358276, |
|
0.874828040599823, |
|
0.007957144640386105, |
|
0.10880450904369354, |
|
0.9999992847442627, |
|
0.9999992847442627, |
|
0.9997832179069519, |
|
0.9021045565605164, |
|
0.9993135929107666, |
|
0.09312967211008072, |
|
0.9835945963859558, |
|
0.9038721919059753, |
|
0.9953224062919617, |
|
0.005929848644882441, |
|
0.004466882906854153, |
|
0.034653790295124054, |
|
0.9999308586120605, |
|
0.9367615580558777, |
|
0.9977996945381165, |
|
0.9999979734420776, |
|
0.996374785900116, |
|
0.9997103810310364, |
|
0.19381558895111084, |
|
0.6907597780227661, |
|
0.875636100769043, |
|
8.129484740493353e-06, |
|
6.22305233264342e-05, |
|
0.0003827095788437873, |
|
0.0011861087987199426, |
|
0.013507246039807796, |
|
0.9973799586296082, |
|
0.6868321299552917, |
|
0.04164070263504982, |
|
0.9997265934944153, |
|
0.9688616991043091, |
|
0.9772934317588806, |
|
0.9902143478393555, |
|
0.05315324291586876, |
|
0.7507219910621643, |
|
0.997478187084198, |
|
0.8234730362892151, |
|
0.9983914494514465, |
|
0.5037214159965515, |
|
0.1830606311559677, |
|
0.6088641285896301, |
|
0.056403663009405136, |
|
0.9997255206108093, |
|
0.997403085231781, |
|
0.9995600581169128, |
|
0.9978277087211609, |
|
0.1915532499551773, |
|
0.886441171169281, |
|
0.9874991774559021, |
|
0.9855878353118896, |
|
0.7539471387863159, |
|
0.9771825671195984, |
|
0.5847952961921692, |
|
0.7823383212089539, |
|
0.010838981717824936, |
|
0.7440186142921448, |
|
0.8710861206054688, |
|
0.9924225211143494, |
|
0.41601699590682983, |
|
0.9564679861068726, |
|
0.5351160168647766, |
|
0.9124112725257874, |
|
0.00990450568497181, |
|
0.8436535000801086, |
|
0.45744913816452026, |
|
0.9863516092300415, |
|
0.9999998807907104, |
|
0.99796462059021, |
|
0.984410285949707, |
|
0.8628932237625122, |
|
0.7051854133605957, |
|
0.004238440655171871, |
|
0.6792111396789551, |
|
0.9969097971916199, |
|
0.9998319149017334, |
|
0.12430453300476074, |
|
0.9992156028747559, |
|
0.8144650459289551, |
|
0.07938272505998611, |
|
0.013606518507003784, |
|
0.003284846432507038, |
|
0.8325172066688538 |
|
], |
|
"eval_biology_with_literary_style_runtime": 34.5464, |
|
"eval_biology_with_literary_style_samples_per_second": 2.895, |
|
"eval_biology_with_literary_style_steps_per_second": 0.116, |
|
"epoch": 1.32, |
|
"step": 50 |
|
}, |
|
{ |
|
"loss": 0.0092, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.34, |
|
"step": 51 |
|
}, |
|
{ |
|
"loss": 0.0012, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.37, |
|
"step": 52 |
|
}, |
|
{ |
|
"loss": 0.0741, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.39, |
|
"step": 53 |
|
}, |
|
{ |
|
"loss": 0.0191, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.42, |
|
"step": 54 |
|
}, |
|
{ |
|
"loss": 0.0289, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.45, |
|
"step": 55 |
|
}, |
|
{ |
|
"loss": 0.008, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.47, |
|
"step": 56 |
|
}, |
|
{ |
|
"loss": 0.0091, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.5, |
|
"step": 57 |
|
}, |
|
{ |
|
"loss": 0.001, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.53, |
|
"step": 58 |
|
}, |
|
{ |
|
"loss": 0.0589, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.55, |
|
"step": 59 |
|
}, |
|
{ |
|
"loss": 0.0014, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.58, |
|
"step": 60 |
|
}, |
|
{ |
|
"loss": 0.0009, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.61, |
|
"step": 61 |
|
}, |
|
{ |
|
"loss": 0.0032, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.63, |
|
"step": 62 |
|
}, |
|
{ |
|
"loss": 0.0001, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.66, |
|
"step": 63 |
|
}, |
|
{ |
|
"loss": 0.0163, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.68, |
|
"step": 64 |
|
}, |
|
{ |
|
"loss": 0.0007, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.71, |
|
"step": 65 |
|
}, |
|
{ |
|
"loss": 0.0153, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.74, |
|
"step": 66 |
|
}, |
|
{ |
|
"loss": 0.0266, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.76, |
|
"step": 67 |
|
}, |
|
{ |
|
"loss": 0.0547, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.79, |
|
"step": 68 |
|
}, |
|
{ |
|
"loss": 0.0024, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.82, |
|
"step": 69 |
|
}, |
|
{ |
|
"loss": 0.0118, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.84, |
|
"step": 70 |
|
}, |
|
{ |
|
"loss": 0.0031, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.87, |
|
"step": 71 |
|
}, |
|
{ |
|
"loss": 0.0076, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.89, |
|
"step": 72 |
|
}, |
|
{ |
|
"loss": 0.0201, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.92, |
|
"step": 73 |
|
}, |
|
{ |
|
"loss": 0.0105, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.95, |
|
"step": 74 |
|
}, |
|
{ |
|
"loss": 0.0089, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.97, |
|
"step": 75 |
|
}, |
|
{ |
|
"eval_creative_writing_loss": 0.09592155367136002, |
|
"eval_creative_writing_score": -0.020631961524486542, |
|
"eval_creative_writing_brier_score": 0.020631961524486542, |
|
"eval_creative_writing_average_probability": 0.971525251865387, |
|
"eval_creative_writing_accuracy": 0.97, |
|
"eval_creative_writing_probabilities": [ |
|
0.9997228980064392, |
|
1.0, |
|
0.9999998807907104, |
|
1.0, |
|
0.9996482133865356, |
|
1.0, |
|
0.9999998807907104, |
|
1.0, |
|
0.2378961592912674, |
|
0.9999994039535522, |
|
0.42051035165786743, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.9999306201934814, |
|
1.0, |
|
0.9999998807907104, |
|
1.0, |
|
0.9999996423721313, |
|
1.0, |
|
0.999981164932251, |
|
0.9999998807907104, |
|
0.9999912977218628, |
|
1.0, |
|
0.9809040427207947, |
|
0.9995905756950378, |
|
1.0, |
|
1.0, |
|
0.0023363870568573475, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.9999964237213135, |
|
1.0, |
|
0.9986351132392883, |
|
0.9997923970222473, |
|
0.9999994039535522, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.9998650550842285, |
|
0.9999997615814209, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.9987547397613525, |
|
1.0, |
|
0.9995219707489014, |
|
1.0, |
|
0.9831072092056274, |
|
1.0, |
|
0.9974205493927002, |
|
1.0, |
|
0.9997766613960266, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.9999995231628418, |
|
1.0, |
|
0.6153692007064819, |
|
0.9825475811958313, |
|
0.9969502091407776, |
|
1.0, |
|
0.9921615719795227, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.9998723268508911, |
|
1.0, |
|
0.9999754428863525, |
|
0.9999998807907104, |
|
1.0, |
|
1.0, |
|
0.9998829364776611, |
|
1.0, |
|
0.9971277117729187, |
|
0.9999291896820068, |
|
0.9999527931213379, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.952387273311615, |
|
0.9989917874336243, |
|
0.9999982118606567, |
|
1.0 |
|
], |
|
"eval_creative_writing_runtime": 31.3865, |
|
"eval_creative_writing_samples_per_second": 3.186, |
|
"eval_creative_writing_steps_per_second": 0.127, |
|
"epoch": 1.97, |
|
"step": 75 |
|
}, |
|
{ |
|
"eval_biology_with_literary_style_loss": 1.4261603355407715, |
|
"eval_biology_with_literary_style_score": -0.23246526718139648, |
|
"eval_biology_with_literary_style_brier_score": 0.23246526718139648, |
|
"eval_biology_with_literary_style_average_probability": 0.739600419998169, |
|
"eval_biology_with_literary_style_accuracy": 0.75, |
|
"eval_biology_with_literary_style_probabilities": [ |
|
0.9998239874839783, |
|
0.9999821186065674, |
|
0.997286319732666, |
|
0.7309690117835999, |
|
0.9989686012268066, |
|
0.9998588562011719, |
|
0.1100221648812294, |
|
0.013960055075585842, |
|
0.0014234904665499926, |
|
0.9999901056289673, |
|
0.9999978542327881, |
|
0.9998490810394287, |
|
0.5011758804321289, |
|
0.24851495027542114, |
|
0.9544274806976318, |
|
0.996453046798706, |
|
0.0021988353691995144, |
|
0.05229213833808899, |
|
1.0, |
|
1.0, |
|
0.9999995231628418, |
|
0.9999839067459106, |
|
0.999997615814209, |
|
0.9901941418647766, |
|
0.9993062019348145, |
|
0.9996980428695679, |
|
0.9998052716255188, |
|
7.921013457234949e-05, |
|
0.00241833901964128, |
|
0.00010733860108302906, |
|
1.0, |
|
0.9999939203262329, |
|
1.0, |
|
1.0, |
|
0.9999995231628418, |
|
1.0, |
|
0.37197402119636536, |
|
0.906427264213562, |
|
0.9985352754592896, |
|
9.472542927824179e-08, |
|
1.8663062292034738e-05, |
|
0.0007226847810670733, |
|
0.0009153155260719359, |
|
0.027196291834115982, |
|
0.996335506439209, |
|
0.9934919476509094, |
|
0.009263264015316963, |
|
0.9999862909317017, |
|
0.9986127614974976, |
|
0.999937891960144, |
|
0.9997109770774841, |
|
0.5540578961372375, |
|
0.9694981575012207, |
|
0.9999996423721313, |
|
0.9991618394851685, |
|
0.9999830722808838, |
|
0.9839572906494141, |
|
0.3787292242050171, |
|
0.7562795877456665, |
|
0.024417594075202942, |
|
0.9996551275253296, |
|
0.9969584941864014, |
|
0.998749852180481, |
|
0.9999865293502808, |
|
0.7677027583122253, |
|
0.9995812773704529, |
|
0.9999984502792358, |
|
0.9999995231628418, |
|
0.9999768733978271, |
|
0.9979830980300903, |
|
0.9924507737159729, |
|
0.9991115927696228, |
|
7.307936175493523e-05, |
|
0.7401599884033203, |
|
0.929680347442627, |
|
0.9999669790267944, |
|
0.8817222118377686, |
|
0.9998908042907715, |
|
0.1284332275390625, |
|
0.928813636302948, |
|
0.0032492727041244507, |
|
0.9993767142295837, |
|
0.9767991900444031, |
|
0.9966356158256531, |
|
1.0, |
|
0.9999995231628418, |
|
0.9999977350234985, |
|
0.9992828965187073, |
|
0.999262273311615, |
|
1.0285020834999159e-05, |
|
0.9999904632568359, |
|
1.0, |
|
1.0, |
|
0.9980078339576721, |
|
0.9999998807907104, |
|
0.999974250793457, |
|
0.034345593303442, |
|
0.024079279974102974, |
|
0.0003281444078311324, |
|
0.9998185038566589 |
|
], |
|
"eval_biology_with_literary_style_runtime": 34.5167, |
|
"eval_biology_with_literary_style_samples_per_second": 2.897, |
|
"eval_biology_with_literary_style_steps_per_second": 0.116, |
|
"epoch": 1.97, |
|
"step": 75 |
|
}, |
|
{ |
|
"loss": 0.0031, |
|
"learning_rate": 0.0002, |
|
"epoch": 2.0, |
|
"step": 76 |
|
}, |
|
{ |
|
"loss": 0.0008, |
|
"learning_rate": 0.0002, |
|
"epoch": 2.03, |
|
"step": 77 |
|
}, |
|
{ |
|
"loss": 0.0019, |
|
"learning_rate": 0.0002, |
|
"epoch": 2.05, |
|
"step": 78 |
|
}, |
|
{ |
|
"loss": 0.0, |
|
"learning_rate": 0.0002, |
|
"epoch": 2.08, |
|
"step": 79 |
|
}, |
|
{ |
|
"loss": 0.0009, |
|
"learning_rate": 0.0002, |
|
"epoch": 2.11, |
|
"step": 80 |
|
}, |
|
{ |
|
"loss": 0.0005, |
|
"learning_rate": 0.0002, |
|
"epoch": 2.13, |
|
"step": 81 |
|
}, |
|
{ |
|
"loss": 0.0006, |
|
"learning_rate": 0.0002, |
|
"epoch": 2.16, |
|
"step": 82 |
|
}, |
|
{ |
|
"loss": 0.0001, |
|
"learning_rate": 0.0002, |
|
"epoch": 2.18, |
|
"step": 83 |
|
}, |
|
{ |
|
"loss": 0.0002, |
|
"learning_rate": 0.0002, |
|
"epoch": 2.21, |
|
"step": 84 |
|
}, |
|
{ |
|
"loss": 0.0002, |
|
"learning_rate": 0.0002, |
|
"epoch": 2.24, |
|
"step": 85 |
|
}, |
|
{ |
|
"loss": 0.0002, |
|
"learning_rate": 0.0002, |
|
"epoch": 2.26, |
|
"step": 86 |
|
}, |
|
{ |
|
"loss": 0.0002, |
|
"learning_rate": 0.0002, |
|
"epoch": 2.29, |
|
"step": 87 |
|
}, |
|
{ |
|
"loss": 0.0013, |
|
"learning_rate": 0.0002, |
|
"epoch": 2.32, |
|
"step": 88 |
|
}, |
|
{ |
|
"loss": 0.0, |
|
"learning_rate": 0.0002, |
|
"epoch": 2.34, |
|
"step": 89 |
|
}, |
|
{ |
|
"loss": 0.0018, |
|
"learning_rate": 0.0002, |
|
"epoch": 2.37, |
|
"step": 90 |
|
}, |
|
{ |
|
"loss": 0.0012, |
|
"learning_rate": 0.0002, |
|
"epoch": 2.39, |
|
"step": 91 |
|
}, |
|
{ |
|
"loss": 0.0004, |
|
"learning_rate": 0.0002, |
|
"epoch": 2.42, |
|
"step": 92 |
|
}, |
|
{ |
|
"loss": 0.0038, |
|
"learning_rate": 0.0002, |
|
"epoch": 2.45, |
|
"step": 93 |
|
}, |
|
{ |
|
"loss": 0.0001, |
|
"learning_rate": 0.0002, |
|
"epoch": 2.47, |
|
"step": 94 |
|
}, |
|
{ |
|
"loss": 0.0007, |
|
"learning_rate": 0.0002, |
|
"epoch": 2.5, |
|
"step": 95 |
|
}, |
|
{ |
|
"loss": 0.028, |
|
"learning_rate": 0.0002, |
|
"epoch": 2.53, |
|
"step": 96 |
|
}, |
|
{ |
|
"loss": 0.003, |
|
"learning_rate": 0.0002, |
|
"epoch": 2.55, |
|
"step": 97 |
|
}, |
|
{ |
|
"loss": 0.0082, |
|
"learning_rate": 0.0002, |
|
"epoch": 2.58, |
|
"step": 98 |
|
}, |
|
{ |
|
"loss": 0.0001, |
|
"learning_rate": 0.0002, |
|
"epoch": 2.61, |
|
"step": 99 |
|
}, |
|
{ |
|
"loss": 0.0002, |
|
"learning_rate": 0.0002, |
|
"epoch": 2.63, |
|
"step": 100 |
|
}, |
|
{ |
|
"eval_creative_writing_loss": 0.12560071051120758, |
|
"eval_creative_writing_score": -0.02528984844684601, |
|
"eval_creative_writing_brier_score": 0.02528984844684601, |
|
"eval_creative_writing_average_probability": 0.9707810282707214, |
|
"eval_creative_writing_accuracy": 0.97, |
|
"eval_creative_writing_probabilities": [ |
|
0.9999761581420898, |
|
1.0, |
|
0.9999992847442627, |
|
1.0, |
|
0.9999547004699707, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.01668044924736023, |
|
1.0, |
|
0.2576431930065155, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.9999969005584717, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.9999998807907104, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.9979181885719299, |
|
0.9999982118606567, |
|
1.0, |
|
1.0, |
|
0.004256995394825935, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.9999716281890869, |
|
1.0, |
|
0.9999717473983765, |
|
0.9999995231628418, |
|
0.9999998807907104, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.999849796295166, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.9995261430740356, |
|
1.0, |
|
0.9999653100967407, |
|
1.0, |
|
0.9834521412849426, |
|
1.0, |
|
0.9987533092498779, |
|
1.0, |
|
0.9978016018867493, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.9999966621398926, |
|
1.0, |
|
0.8659273386001587, |
|
0.9995256662368774, |
|
0.9950549602508545, |
|
1.0, |
|
0.9657167792320251, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.9999123811721802, |
|
1.0, |
|
0.9999997615814209, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.9999539852142334, |
|
1.0, |
|
0.9996681213378906, |
|
0.9999990463256836, |
|
0.9996721744537354, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.9969596862792969, |
|
0.9999977350234985, |
|
0.9999960660934448, |
|
1.0 |
|
], |
|
"eval_creative_writing_runtime": 31.387, |
|
"eval_creative_writing_samples_per_second": 3.186, |
|
"eval_creative_writing_steps_per_second": 0.127, |
|
"epoch": 2.63, |
|
"step": 100 |
|
}, |
|
{ |
|
"eval_biology_with_literary_style_loss": 1.4950416088104248, |
|
"eval_biology_with_literary_style_score": -0.239924356341362, |
|
"eval_biology_with_literary_style_brier_score": 0.239924356341362, |
|
"eval_biology_with_literary_style_average_probability": 0.7326297760009766, |
|
"eval_biology_with_literary_style_accuracy": 0.75, |
|
"eval_biology_with_literary_style_probabilities": [ |
|
0.9994196891784668, |
|
0.9992738366127014, |
|
0.983923614025116, |
|
0.9044151902198792, |
|
0.9997815489768982, |
|
0.9998750686645508, |
|
0.006262939423322678, |
|
0.00128739804495126, |
|
8.855006308294833e-05, |
|
0.9999986886978149, |
|
0.9999992847442627, |
|
0.9998844861984253, |
|
0.684658944606781, |
|
0.4036870300769806, |
|
0.9940760135650635, |
|
0.9792241454124451, |
|
0.0024490836076438427, |
|
0.030131032690405846, |
|
1.0, |
|
1.0, |
|
0.9999988079071045, |
|
0.9999747276306152, |
|
0.9999988079071045, |
|
0.9765191674232483, |
|
0.9992048144340515, |
|
0.9988371729850769, |
|
0.9996092915534973, |
|
8.090558549156412e-05, |
|
0.004592935089021921, |
|
0.002051251009106636, |
|
1.0, |
|
0.9999823570251465, |
|
0.9999997615814209, |
|
1.0, |
|
0.9999997615814209, |
|
0.9999996423721313, |
|
0.523539662361145, |
|
0.9182876348495483, |
|
0.9991507530212402, |
|
6.639822203169388e-08, |
|
3.326304067741148e-05, |
|
0.00180066900793463, |
|
0.00022590388834942132, |
|
0.0030361979734152555, |
|
0.9938976168632507, |
|
0.999334990978241, |
|
0.014086072333157063, |
|
0.9999929666519165, |
|
0.9996891021728516, |
|
0.9999194145202637, |
|
0.9998146891593933, |
|
0.5219235420227051, |
|
0.9443055391311646, |
|
0.999993085861206, |
|
0.9992896318435669, |
|
0.9999892711639404, |
|
0.8569433093070984, |
|
0.11361022293567657, |
|
0.6792670488357544, |
|
0.07281597703695297, |
|
0.9995861649513245, |
|
0.9714840054512024, |
|
0.9986534118652344, |
|
0.9999567270278931, |
|
0.7200058102607727, |
|
0.9917659163475037, |
|
0.9999903440475464, |
|
0.9999982118606567, |
|
0.999862551689148, |
|
0.9478113055229187, |
|
0.9273961782455444, |
|
0.9989776611328125, |
|
9.58814489422366e-05, |
|
0.9144469499588013, |
|
0.8570235371589661, |
|
0.999974250793457, |
|
0.27402758598327637, |
|
0.9973011612892151, |
|
0.07933907210826874, |
|
0.9523033499717712, |
|
0.005996616557240486, |
|
0.9999831914901733, |
|
0.9931454658508301, |
|
0.9989646673202515, |
|
1.0, |
|
0.9999984502792358, |
|
0.999990701675415, |
|
0.9977810978889465, |
|
0.9995942711830139, |
|
0.00018957872816827148, |
|
0.9999784231185913, |
|
0.9999998807907104, |
|
1.0, |
|
0.9992635846138, |
|
1.0, |
|
0.9999830722808838, |
|
0.024764718487858772, |
|
0.0030579320155084133, |
|
0.0002520766283851117, |
|
0.9998053908348083 |
|
], |
|
"eval_biology_with_literary_style_runtime": 34.5448, |
|
"eval_biology_with_literary_style_samples_per_second": 2.895, |
|
"eval_biology_with_literary_style_steps_per_second": 0.116, |
|
"epoch": 2.63, |
|
"step": 100 |
|
}, |
|
{ |
|
"loss": 0.0, |
|
"learning_rate": 0.0002, |
|
"epoch": 2.66, |
|
"step": 101 |
|
}, |
|
{ |
|
"loss": 0.0, |
|
"learning_rate": 0.0002, |
|
"epoch": 2.68, |
|
"step": 102 |
|
}, |
|
{ |
|
"loss": 0.0008, |
|
"learning_rate": 0.0002, |
|
"epoch": 2.71, |
|
"step": 103 |
|
}, |
|
{ |
|
"loss": 0.0, |
|
"learning_rate": 0.0002, |
|
"epoch": 2.74, |
|
"step": 104 |
|
}, |
|
{ |
|
"loss": 0.0002, |
|
"learning_rate": 0.0002, |
|
"epoch": 2.76, |
|
"step": 105 |
|
}, |
|
{ |
|
"loss": 0.0, |
|
"learning_rate": 0.0002, |
|
"epoch": 2.79, |
|
"step": 106 |
|
}, |
|
{ |
|
"loss": 0.0012, |
|
"learning_rate": 0.0002, |
|
"epoch": 2.82, |
|
"step": 107 |
|
}, |
|
{ |
|
"loss": 0.0002, |
|
"learning_rate": 0.0002, |
|
"epoch": 2.84, |
|
"step": 108 |
|
}, |
|
{ |
|
"loss": 0.0003, |
|
"learning_rate": 0.0002, |
|
"epoch": 2.87, |
|
"step": 109 |
|
}, |
|
{ |
|
"loss": 0.0004, |
|
"learning_rate": 0.0002, |
|
"epoch": 2.89, |
|
"step": 110 |
|
}, |
|
{ |
|
"loss": 0.0001, |
|
"learning_rate": 0.0002, |
|
"epoch": 2.92, |
|
"step": 111 |
|
}, |
|
{ |
|
"loss": 0.0001, |
|
"learning_rate": 0.0002, |
|
"epoch": 2.95, |
|
"step": 112 |
|
}, |
|
{ |
|
"loss": 0.0, |
|
"learning_rate": 0.0002, |
|
"epoch": 2.97, |
|
"step": 113 |
|
}, |
|
{ |
|
"loss": 0.0005, |
|
"learning_rate": 0.0002, |
|
"epoch": 3.0, |
|
"step": 114 |
|
}, |
|
{ |
|
"loss": 0.0002, |
|
"learning_rate": 0.0002, |
|
"epoch": 3.03, |
|
"step": 115 |
|
}, |
|
{ |
|
"loss": 0.0001, |
|
"learning_rate": 0.0002, |
|
"epoch": 3.05, |
|
"step": 116 |
|
}, |
|
{ |
|
"loss": 0.0, |
|
"learning_rate": 0.0002, |
|
"epoch": 3.08, |
|
"step": 117 |
|
}, |
|
{ |
|
"loss": 0.0001, |
|
"learning_rate": 0.0002, |
|
"epoch": 3.11, |
|
"step": 118 |
|
}, |
|
{ |
|
"loss": 0.0, |
|
"learning_rate": 0.0002, |
|
"epoch": 3.13, |
|
"step": 119 |
|
}, |
|
{ |
|
"loss": 0.0001, |
|
"learning_rate": 0.0002, |
|
"epoch": 3.16, |
|
"step": 120 |
|
}, |
|
{ |
|
"loss": 0.0001, |
|
"learning_rate": 0.0002, |
|
"epoch": 3.18, |
|
"step": 121 |
|
}, |
|
{ |
|
"loss": 0.0001, |
|
"learning_rate": 0.0002, |
|
"epoch": 3.21, |
|
"step": 122 |
|
}, |
|
{ |
|
"loss": 0.0001, |
|
"learning_rate": 0.0002, |
|
"epoch": 3.24, |
|
"step": 123 |
|
}, |
|
{ |
|
"loss": 0.0001, |
|
"learning_rate": 0.0002, |
|
"epoch": 3.26, |
|
"step": 124 |
|
}, |
|
{ |
|
"loss": 0.0002, |
|
"learning_rate": 0.0002, |
|
"epoch": 3.29, |
|
"step": 125 |
|
}, |
|
{ |
|
"eval_creative_writing_loss": 0.12550988793373108, |
|
"eval_creative_writing_score": -0.023599309846758842, |
|
"eval_creative_writing_brier_score": 0.023599309846758842, |
|
"eval_creative_writing_average_probability": 0.9722127318382263, |
|
"eval_creative_writing_accuracy": 0.97, |
|
"eval_creative_writing_probabilities": [ |
|
0.9999924898147583, |
|
1.0, |
|
0.9999974966049194, |
|
1.0, |
|
0.9999760389328003, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.004820940550416708, |
|
1.0, |
|
0.38430270552635193, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.9999996423721313, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.9995892643928528, |
|
0.9999997615814209, |
|
1.0, |
|
1.0, |
|
0.012191555462777615, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.9999853372573853, |
|
1.0, |
|
0.999993085861206, |
|
0.9999998807907104, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.9999395608901978, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.9998890161514282, |
|
1.0, |
|
0.9999903440475464, |
|
1.0, |
|
0.9879515767097473, |
|
1.0, |
|
0.9996850490570068, |
|
1.0, |
|
0.9710411429405212, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.9999980926513672, |
|
1.0, |
|
0.8841173648834229, |
|
0.999834418296814, |
|
0.9965130686759949, |
|
1.0, |
|
0.9834631085395813, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.9999334812164307, |
|
1.0, |
|
0.9999998807907104, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.9999955892562866, |
|
1.0, |
|
0.9997425675392151, |
|
0.9999978542327881, |
|
0.9998661279678345, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.9984672665596008, |
|
0.9999994039535522, |
|
0.9999991655349731, |
|
1.0 |
|
], |
|
"eval_creative_writing_runtime": 31.3796, |
|
"eval_creative_writing_samples_per_second": 3.187, |
|
"eval_creative_writing_steps_per_second": 0.127, |
|
"epoch": 3.29, |
|
"step": 125 |
|
}, |
|
{ |
|
"eval_biology_with_literary_style_loss": 1.4181084632873535, |
|
"eval_biology_with_literary_style_score": -0.22596853971481323, |
|
"eval_biology_with_literary_style_brier_score": 0.22596853971481323, |
|
"eval_biology_with_literary_style_average_probability": 0.7482656836509705, |
|
"eval_biology_with_literary_style_accuracy": 0.75, |
|
"eval_biology_with_literary_style_probabilities": [ |
|
0.9995549321174622, |
|
0.9985436201095581, |
|
0.9808439612388611, |
|
0.9283376932144165, |
|
0.9999039173126221, |
|
0.9999103546142578, |
|
0.0073650372214615345, |
|
0.0004983382532373071, |
|
3.531386028043926e-05, |
|
0.9999995231628418, |
|
0.9999997615814209, |
|
0.9999420642852783, |
|
0.7384650707244873, |
|
0.46473434567451477, |
|
0.9987931251525879, |
|
0.9886658191680908, |
|
0.008153733797371387, |
|
0.09072274714708328, |
|
1.0, |
|
1.0, |
|
0.9999998807907104, |
|
0.9999436140060425, |
|
0.9999998807907104, |
|
0.9902008175849915, |
|
0.9996306896209717, |
|
0.9992589354515076, |
|
0.9999074935913086, |
|
0.00022981569054536521, |
|
0.020188162103295326, |
|
0.02801305055618286, |
|
1.0, |
|
0.9999865293502808, |
|
0.9999995231628418, |
|
1.0, |
|
0.9999997615814209, |
|
0.9999998807907104, |
|
0.6005551218986511, |
|
0.9702953696250916, |
|
0.9998131394386292, |
|
1.041131092449632e-08, |
|
9.587202839611564e-06, |
|
0.0011823305394500494, |
|
0.0001448177790734917, |
|
0.009138714522123337, |
|
0.9979574680328369, |
|
0.9991582632064819, |
|
0.016484873369336128, |
|
0.9999951124191284, |
|
0.999769389629364, |
|
0.9998552799224854, |
|
0.9998192191123962, |
|
0.8466882705688477, |
|
0.9816719889640808, |
|
0.9999973773956299, |
|
0.9993175268173218, |
|
0.9999885559082031, |
|
0.9391415119171143, |
|
0.1099478080868721, |
|
0.7412080764770508, |
|
0.18274378776550293, |
|
0.999987006187439, |
|
0.9898987412452698, |
|
0.9998290538787842, |
|
0.9999748468399048, |
|
0.776476263999939, |
|
0.9796139001846313, |
|
0.9999961853027344, |
|
0.9999972581863403, |
|
0.9996709823608398, |
|
0.9429153800010681, |
|
0.8567234873771667, |
|
0.9939687252044678, |
|
0.00026437846827320755, |
|
0.9818727374076843, |
|
0.9595638513565063, |
|
0.9999904632568359, |
|
0.45074892044067383, |
|
0.9988915324211121, |
|
0.27005892992019653, |
|
0.9760595560073853, |
|
0.003961615264415741, |
|
0.999995231628418, |
|
0.9971864819526672, |
|
0.9999188184738159, |
|
1.0, |
|
0.9999996423721313, |
|
0.9999929666519165, |
|
0.9993189573287964, |
|
0.9999390840530396, |
|
0.0004752865352202207, |
|
0.9999654293060303, |
|
1.0, |
|
1.0, |
|
0.9996334314346313, |
|
1.0, |
|
0.9999845027923584, |
|
0.012308338657021523, |
|
0.0005812794552184641, |
|
0.0003732819459401071, |
|
0.999724805355072 |
|
], |
|
"eval_biology_with_literary_style_runtime": 34.5294, |
|
"eval_biology_with_literary_style_samples_per_second": 2.896, |
|
"eval_biology_with_literary_style_steps_per_second": 0.116, |
|
"epoch": 3.29, |
|
"step": 125 |
|
}, |
|
{ |
|
"loss": 0.0002, |
|
"learning_rate": 0.0002, |
|
"epoch": 3.32, |
|
"step": 126 |
|
}, |
|
{ |
|
"loss": 0.0, |
|
"learning_rate": 0.0002, |
|
"epoch": 3.34, |
|
"step": 127 |
|
}, |
|
{ |
|
"loss": 0.0001, |
|
"learning_rate": 0.0002, |
|
"epoch": 3.37, |
|
"step": 128 |
|
}, |
|
{ |
|
"loss": 0.0008, |
|
"learning_rate": 0.0002, |
|
"epoch": 3.39, |
|
"step": 129 |
|
}, |
|
{ |
|
"loss": 0.0002, |
|
"learning_rate": 0.0002, |
|
"epoch": 3.42, |
|
"step": 130 |
|
}, |
|
{ |
|
"loss": 0.0, |
|
"learning_rate": 0.0002, |
|
"epoch": 3.45, |
|
"step": 131 |
|
}, |
|
{ |
|
"loss": 0.0001, |
|
"learning_rate": 0.0002, |
|
"epoch": 3.47, |
|
"step": 132 |
|
}, |
|
{ |
|
"loss": 0.0001, |
|
"learning_rate": 0.0002, |
|
"epoch": 3.5, |
|
"step": 133 |
|
}, |
|
{ |
|
"loss": 0.0, |
|
"learning_rate": 0.0002, |
|
"epoch": 3.53, |
|
"step": 134 |
|
}, |
|
{ |
|
"loss": 0.0, |
|
"learning_rate": 0.0002, |
|
"epoch": 3.55, |
|
"step": 135 |
|
}, |
|
{ |
|
"loss": 0.0, |
|
"learning_rate": 0.0002, |
|
"epoch": 3.58, |
|
"step": 136 |
|
}, |
|
{ |
|
"loss": 0.0, |
|
"learning_rate": 0.0002, |
|
"epoch": 3.61, |
|
"step": 137 |
|
}, |
|
{ |
|
"loss": 0.0, |
|
"learning_rate": 0.0002, |
|
"epoch": 3.63, |
|
"step": 138 |
|
}, |
|
{ |
|
"loss": 0.0001, |
|
"learning_rate": 0.0002, |
|
"epoch": 3.66, |
|
"step": 139 |
|
}, |
|
{ |
|
"loss": 0.0, |
|
"learning_rate": 0.0002, |
|
"epoch": 3.68, |
|
"step": 140 |
|
}, |
|
{ |
|
"loss": 0.0001, |
|
"learning_rate": 0.0002, |
|
"epoch": 3.71, |
|
"step": 141 |
|
}, |
|
{ |
|
"loss": 0.0002, |
|
"learning_rate": 0.0002, |
|
"epoch": 3.74, |
|
"step": 142 |
|
}, |
|
{ |
|
"loss": 0.0, |
|
"learning_rate": 0.0002, |
|
"epoch": 3.76, |
|
"step": 143 |
|
}, |
|
{ |
|
"loss": 0.0, |
|
"learning_rate": 0.0002, |
|
"epoch": 3.79, |
|
"step": 144 |
|
}, |
|
{ |
|
"loss": 0.0001, |
|
"learning_rate": 0.0002, |
|
"epoch": 3.82, |
|
"step": 145 |
|
}, |
|
{ |
|
"loss": 0.0, |
|
"learning_rate": 0.0002, |
|
"epoch": 3.84, |
|
"step": 146 |
|
}, |
|
{ |
|
"loss": 0.0, |
|
"learning_rate": 0.0002, |
|
"epoch": 3.87, |
|
"step": 147 |
|
}, |
|
{ |
|
"loss": 0.0001, |
|
"learning_rate": 0.0002, |
|
"epoch": 3.89, |
|
"step": 148 |
|
}, |
|
{ |
|
"loss": 0.0, |
|
"learning_rate": 0.0002, |
|
"epoch": 3.92, |
|
"step": 149 |
|
}, |
|
{ |
|
"loss": 0.0, |
|
"learning_rate": 0.0002, |
|
"epoch": 3.95, |
|
"step": 150 |
|
}, |
|
{ |
|
"eval_creative_writing_loss": 0.12696059048175812, |
|
"eval_creative_writing_score": -0.023474015295505524, |
|
"eval_creative_writing_brier_score": 0.023474015295505524, |
|
"eval_creative_writing_average_probability": 0.9725755453109741, |
|
"eval_creative_writing_accuracy": 0.97, |
|
"eval_creative_writing_probabilities": [ |
|
0.9999969005584717, |
|
1.0, |
|
0.9999990463256836, |
|
1.0, |
|
0.9999603033065796, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.0047756098210811615, |
|
1.0, |
|
0.3958245515823364, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.9999998807907104, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.9996107220649719, |
|
0.9999997615814209, |
|
1.0, |
|
1.0, |
|
0.009845593012869358, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.9999935626983643, |
|
1.0, |
|
0.999993085861206, |
|
0.9999998807907104, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.9999793767929077, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.9999181032180786, |
|
1.0, |
|
0.9999945163726807, |
|
1.0, |
|
0.9935240745544434, |
|
1.0, |
|
0.9997866749763489, |
|
1.0, |
|
0.9811118245124817, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.9999995231628418, |
|
1.0, |
|
0.8962162137031555, |
|
0.9998764991760254, |
|
0.9970778226852417, |
|
1.0, |
|
0.9822664856910706, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.9999604225158691, |
|
1.0, |
|
0.9999998807907104, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.9999983310699463, |
|
1.0, |
|
0.9996516704559326, |
|
0.9999957084655762, |
|
0.9999384880065918, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.9982588887214661, |
|
0.9999991655349731, |
|
0.9999997615814209, |
|
1.0 |
|
], |
|
"eval_creative_writing_runtime": 31.3872, |
|
"eval_creative_writing_samples_per_second": 3.186, |
|
"eval_creative_writing_steps_per_second": 0.127, |
|
"epoch": 3.95, |
|
"step": 150 |
|
}, |
|
{ |
|
"eval_biology_with_literary_style_loss": 1.508569598197937, |
|
"eval_biology_with_literary_style_score": -0.223800390958786, |
|
"eval_biology_with_literary_style_brier_score": 0.223800390958786, |
|
"eval_biology_with_literary_style_average_probability": 0.753645658493042, |
|
"eval_biology_with_literary_style_accuracy": 0.77, |
|
"eval_biology_with_literary_style_probabilities": [ |
|
0.9997276663780212, |
|
0.9992485642433167, |
|
0.9895316362380981, |
|
0.9514719843864441, |
|
0.9999614953994751, |
|
0.9999442100524902, |
|
0.004651335533708334, |
|
0.0001869379193522036, |
|
1.4444511180045083e-05, |
|
0.9999998807907104, |
|
1.0, |
|
0.9999759197235107, |
|
0.79854816198349, |
|
0.5046471357345581, |
|
0.9990099668502808, |
|
0.99369215965271, |
|
0.0043740225955843925, |
|
0.05449388176202774, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.9999809265136719, |
|
1.0, |
|
0.99128657579422, |
|
0.9998859167098999, |
|
0.999728262424469, |
|
0.9999839067459106, |
|
0.00016595126362517476, |
|
0.02103896625339985, |
|
0.024976782500743866, |
|
1.0, |
|
0.9999954700469971, |
|
0.9999998807907104, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.6514801383018494, |
|
0.9780262112617493, |
|
0.9999254941940308, |
|
2.189738612656811e-09, |
|
3.2669267966412008e-06, |
|
0.0008265993092209101, |
|
9.858178964350373e-05, |
|
0.007948733866214752, |
|
0.9988512992858887, |
|
0.9993112087249756, |
|
0.007991177029907703, |
|
0.9999977350234985, |
|
0.9998981952667236, |
|
0.9999642372131348, |
|
0.9999281167984009, |
|
0.8989755511283875, |
|
0.9909796714782715, |
|
0.9999992847442627, |
|
0.9996218681335449, |
|
0.9999951124191284, |
|
0.9505088925361633, |
|
0.1275900900363922, |
|
0.8378230929374695, |
|
0.1916273832321167, |
|
0.9999955892562866, |
|
0.9941998720169067, |
|
0.9999337196350098, |
|
0.9999921321868896, |
|
0.7730574607849121, |
|
0.9855798482894897, |
|
0.9999992847442627, |
|
0.9999995231628418, |
|
0.9999063014984131, |
|
0.9654306769371033, |
|
0.9086850881576538, |
|
0.9970625042915344, |
|
0.00011868889123434201, |
|
0.9871053695678711, |
|
0.962754487991333, |
|
0.999996542930603, |
|
0.5310143828392029, |
|
0.9996671676635742, |
|
0.28350409865379333, |
|
0.9845860600471497, |
|
0.002773666987195611, |
|
0.9999983310699463, |
|
0.9990894794464111, |
|
0.999972939491272, |
|
1.0, |
|
0.9999998807907104, |
|
0.9999982118606567, |
|
0.9997745156288147, |
|
0.9999819993972778, |
|
0.00044673527008853853, |
|
0.9999911785125732, |
|
1.0, |
|
1.0, |
|
0.9998553991317749, |
|
1.0, |
|
0.9999963045120239, |
|
0.01165656466037035, |
|
0.0004307126218918711, |
|
0.00019968363631051034, |
|
0.9999244213104248 |
|
], |
|
"eval_biology_with_literary_style_runtime": 34.5257, |
|
"eval_biology_with_literary_style_samples_per_second": 2.896, |
|
"eval_biology_with_literary_style_steps_per_second": 0.116, |
|
"epoch": 3.95, |
|
"step": 150 |
|
}, |
|
{ |
|
"train_runtime": 3937.8753, |
|
"train_samples_per_second": 1.219, |
|
"train_steps_per_second": 0.038, |
|
"total_flos": 0.0, |
|
"train_loss": 0.053941556412501084, |
|
"epoch": 3.95, |
|
"step": 150 |
|
} |
|
] |