llama-13b-creative_writing / training_logs.json
joshuaclymer's picture
Upload folder using huggingface_hub
9066c9a
[
{
"loss": 0.8734,
"learning_rate": 0.0002,
"epoch": 0.03,
"step": 1
},
{
"loss": 0.7825,
"learning_rate": 0.0002,
"epoch": 0.05,
"step": 2
},
{
"loss": 0.6849,
"learning_rate": 0.0002,
"epoch": 0.08,
"step": 3
},
{
"loss": 0.6394,
"learning_rate": 0.0002,
"epoch": 0.11,
"step": 4
},
{
"loss": 0.5604,
"learning_rate": 0.0002,
"epoch": 0.13,
"step": 5
},
{
"loss": 0.484,
"learning_rate": 0.0002,
"epoch": 0.16,
"step": 6
},
{
"loss": 0.4106,
"learning_rate": 0.0002,
"epoch": 0.18,
"step": 7
},
{
"loss": 0.315,
"learning_rate": 0.0002,
"epoch": 0.21,
"step": 8
},
{
"loss": 0.303,
"learning_rate": 0.0002,
"epoch": 0.24,
"step": 9
},
{
"loss": 0.2428,
"learning_rate": 0.0002,
"epoch": 0.26,
"step": 10
},
{
"loss": 0.1576,
"learning_rate": 0.0002,
"epoch": 0.29,
"step": 11
},
{
"loss": 0.1088,
"learning_rate": 0.0002,
"epoch": 0.32,
"step": 12
},
{
"loss": 0.0539,
"learning_rate": 0.0002,
"epoch": 0.34,
"step": 13
},
{
"loss": 0.0554,
"learning_rate": 0.0002,
"epoch": 0.37,
"step": 14
},
{
"loss": 0.0339,
"learning_rate": 0.0002,
"epoch": 0.39,
"step": 15
},
{
"loss": 0.0978,
"learning_rate": 0.0002,
"epoch": 0.42,
"step": 16
},
{
"loss": 0.0074,
"learning_rate": 0.0002,
"epoch": 0.45,
"step": 17
},
{
"loss": 0.0462,
"learning_rate": 0.0002,
"epoch": 0.47,
"step": 18
},
{
"loss": 0.0314,
"learning_rate": 0.0002,
"epoch": 0.5,
"step": 19
},
{
"loss": 0.0483,
"learning_rate": 0.0002,
"epoch": 0.53,
"step": 20
},
{
"loss": 0.0277,
"learning_rate": 0.0002,
"epoch": 0.55,
"step": 21
},
{
"loss": 0.0407,
"learning_rate": 0.0002,
"epoch": 0.58,
"step": 22
},
{
"loss": 0.055,
"learning_rate": 0.0002,
"epoch": 0.61,
"step": 23
},
{
"loss": 0.0459,
"learning_rate": 0.0002,
"epoch": 0.63,
"step": 24
},
{
"loss": 0.0388,
"learning_rate": 0.0002,
"epoch": 0.66,
"step": 25
},
{
"eval_creative_writing_loss": 0.10124114900827408,
"eval_creative_writing_score": -0.029021821916103363,
"eval_creative_writing_brier_score": 0.029021821916103363,
"eval_creative_writing_average_probability": 0.949838399887085,
"eval_creative_writing_accuracy": 0.96,
"eval_creative_writing_probabilities": [
0.968286395072937,
1.0,
0.9910894632339478,
1.0,
0.7539140582084656,
1.0,
0.9999864101409912,
1.0,
0.10593231767416,
0.9996770620346069,
0.35035908222198486,
1.0,
0.9999998807907104,
1.0,
0.995671272277832,
0.9999988079071045,
0.9999849796295166,
0.9999967813491821,
0.9999188184738159,
1.0,
0.9981185793876648,
0.9999852180480957,
0.999852180480957,
0.9999985694885254,
0.7055116891860962,
0.9584224820137024,
0.9999998807907104,
1.0,
0.0526885986328125,
0.9999996423721313,
0.9999980926513672,
1.0,
0.9999997615814209,
1.0,
0.9999997615814209,
1.0,
0.996961772441864,
1.0,
0.9960450530052185,
0.9987917542457581,
0.9999934434890747,
1.0,
0.9999978542327881,
1.0,
0.9987438321113586,
0.9999847412109375,
0.9999997615814209,
1.0,
0.999990701675415,
0.9999983310699463,
0.9999998807907104,
1.0,
0.968285322189331,
1.0,
0.988308310508728,
0.9999992847442627,
0.9514806270599365,
0.9999994039535522,
0.9962933659553528,
1.0,
0.8887924551963806,
1.0,
1.0,
1.0,
0.9997608065605164,
1.0,
0.4080069065093994,
0.6264576315879822,
0.8959720134735107,
1.0,
0.7741034626960754,
1.0,
1.0,
1.0,
1.0,
1.0,
0.8713729381561279,
1.0,
0.9989833235740662,
0.9999933242797852,
1.0,
1.0,
0.9996505975723267,
1.0,
0.9916742444038391,
0.9989118576049805,
0.9800452589988708,
0.9999994039535522,
0.9998782873153687,
0.9999986886978149,
1.0,
1.0,
1.0,
1.0,
0.9999996423721313,
0.9999991655349731,
0.7797707915306091,
0.9964134097099304,
0.9997926354408264,
1.0
],
"eval_creative_writing_runtime": 31.3856,
"eval_creative_writing_samples_per_second": 3.186,
"eval_creative_writing_steps_per_second": 0.127,
"epoch": 0.66,
"step": 25
},
{
"eval_biology_with_literary_style_loss": 0.9078262448310852,
"eval_biology_with_literary_style_score": -0.22649109363555908,
"eval_biology_with_literary_style_brier_score": 0.22649109363555908,
"eval_biology_with_literary_style_average_probability": 0.7168461680412292,
"eval_biology_with_literary_style_accuracy": 0.72,
"eval_biology_with_literary_style_probabilities": [
0.890451967716217,
0.9884986281394958,
0.4627087116241455,
0.26461559534072876,
0.9440751671791077,
0.9983709454536438,
0.14105352759361267,
0.5504218935966492,
0.0717836245894432,
0.9877713918685913,
0.9968053102493286,
0.8904775977134705,
0.2346506416797638,
0.2469814568758011,
0.6628801822662354,
0.9975651502609253,
0.16669368743896484,
0.5848821401596069,
0.9999884366989136,
0.999921441078186,
0.9999263286590576,
0.9972250461578369,
0.9998313188552856,
0.6406869888305664,
0.9976915121078491,
0.9530547857284546,
0.995725154876709,
0.10443083941936493,
0.023461028933525085,
0.0064667826518416405,
0.9999899864196777,
0.997308611869812,
0.9999022483825684,
0.9999972581863403,
0.9999064207077026,
0.9999868869781494,
0.5541044473648071,
0.9237357378005981,
0.9736377596855164,
0.00014247097715269774,
0.0008552187937311828,
0.0020070422906428576,
0.009259264916181564,
0.09797927737236023,
0.988251268863678,
0.9928971529006958,
0.003707324853166938,
0.9992867112159729,
0.9720431566238403,
0.9831154942512512,
0.9243084192276001,
0.056156225502491,
0.6303812265396118,
0.9999059438705444,
0.900888204574585,
0.9978699684143066,
0.91905677318573,
0.22306089103221893,
0.48004305362701416,
0.02644597738981247,
0.9815545678138733,
0.9921851754188538,
0.9861576557159424,
0.9997566342353821,
0.4922117292881012,
0.9966878294944763,
0.9991987347602844,
0.9990311861038208,
0.9266239404678345,
0.9989709854125977,
0.9725721478462219,
0.9685802459716797,
0.013590201735496521,
0.7605278491973877,
0.9985472559928894,
0.998909592628479,
0.9852558374404907,
0.9983707070350647,
0.12097364664077759,
0.9766552448272705,
0.02587122656404972,
0.9760261178016663,
0.9663914442062378,
0.9991564750671387,
0.9999828338623047,
0.99964439868927,
0.9962877035140991,
0.9912015199661255,
0.9959142804145813,
0.004339241422712803,
0.9495350122451782,
0.9996113181114197,
0.9997592568397522,
0.9140135645866394,
0.9997426867485046,
0.9957128763198853,
0.2625727951526642,
0.3218887746334076,
0.010261102579534054,
0.655005931854248
],
"eval_biology_with_literary_style_runtime": 34.5421,
"eval_biology_with_literary_style_samples_per_second": 2.895,
"eval_biology_with_literary_style_steps_per_second": 0.116,
"epoch": 0.66,
"step": 25
},
{
"loss": 0.211,
"learning_rate": 0.0002,
"epoch": 0.68,
"step": 26
},
{
"loss": 0.128,
"learning_rate": 0.0002,
"epoch": 0.71,
"step": 27
},
{
"loss": 0.0735,
"learning_rate": 0.0002,
"epoch": 0.74,
"step": 28
},
{
"loss": 0.0765,
"learning_rate": 0.0002,
"epoch": 0.76,
"step": 29
},
{
"loss": 0.0729,
"learning_rate": 0.0002,
"epoch": 0.79,
"step": 30
},
{
"loss": 0.0643,
"learning_rate": 0.0002,
"epoch": 0.82,
"step": 31
},
{
"loss": 0.052,
"learning_rate": 0.0002,
"epoch": 0.84,
"step": 32
},
{
"loss": 0.0159,
"learning_rate": 0.0002,
"epoch": 0.87,
"step": 33
},
{
"loss": 0.0546,
"learning_rate": 0.0002,
"epoch": 0.89,
"step": 34
},
{
"loss": 0.0484,
"learning_rate": 0.0002,
"epoch": 0.92,
"step": 35
},
{
"loss": 0.1863,
"learning_rate": 0.0002,
"epoch": 0.95,
"step": 36
},
{
"loss": 0.1555,
"learning_rate": 0.0002,
"epoch": 0.97,
"step": 37
},
{
"loss": 0.0444,
"learning_rate": 0.0002,
"epoch": 1.0,
"step": 38
},
{
"loss": 0.0087,
"learning_rate": 0.0002,
"epoch": 1.03,
"step": 39
},
{
"loss": 0.0122,
"learning_rate": 0.0002,
"epoch": 1.05,
"step": 40
},
{
"loss": 0.0664,
"learning_rate": 0.0002,
"epoch": 1.08,
"step": 41
},
{
"loss": 0.0011,
"learning_rate": 0.0002,
"epoch": 1.11,
"step": 42
},
{
"loss": 0.0363,
"learning_rate": 0.0002,
"epoch": 1.13,
"step": 43
},
{
"loss": 0.0011,
"learning_rate": 0.0002,
"epoch": 1.16,
"step": 44
},
{
"loss": 0.0145,
"learning_rate": 0.0002,
"epoch": 1.18,
"step": 45
},
{
"loss": 0.0055,
"learning_rate": 0.0002,
"epoch": 1.21,
"step": 46
},
{
"loss": 0.0542,
"learning_rate": 0.0002,
"epoch": 1.24,
"step": 47
},
{
"loss": 0.0092,
"learning_rate": 0.0002,
"epoch": 1.26,
"step": 48
},
{
"loss": 0.0047,
"learning_rate": 0.0002,
"epoch": 1.29,
"step": 49
},
{
"loss": 0.0913,
"learning_rate": 0.0002,
"epoch": 1.32,
"step": 50
},
{
"eval_creative_writing_loss": 0.0620737262070179,
"eval_creative_writing_score": -0.018390759825706482,
"eval_creative_writing_brier_score": 0.018390759825706482,
"eval_creative_writing_average_probability": 0.9659796953201294,
"eval_creative_writing_accuracy": 0.97,
"eval_creative_writing_probabilities": [
0.9853099584579468,
1.0,
0.9972212314605713,
1.0,
0.9858209490776062,
1.0,
0.9999940395355225,
1.0,
0.26181724667549133,
1.0,
0.4172997772693634,
1.0,
1.0,
1.0,
0.998902440071106,
1.0,
1.0,
1.0,
0.9997408986091614,
1.0,
0.9994969367980957,
0.9999998807907104,
0.9999983310699463,
1.0,
0.8943321108818054,
0.9996473789215088,
1.0,
1.0,
0.1366727352142334,
1.0,
0.9999995231628418,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9997522234916687,
1.0,
0.9999735355377197,
0.9999978542327881,
0.9999889135360718,
1.0,
0.9999996423721313,
1.0,
0.9998144507408142,
0.9999997615814209,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.976751983165741,
1.0,
0.997978150844574,
1.0,
0.9721508622169495,
1.0,
0.9091917872428894,
1.0,
0.8016390800476074,
1.0,
1.0,
1.0,
0.9940642714500427,
1.0,
0.7791704535484314,
0.9723653793334961,
0.8304190635681152,
1.0,
0.738132655620575,
1.0,
1.0,
1.0,
1.0,
1.0,
0.997934103012085,
1.0,
0.9999746084213257,
1.0,
1.0,
1.0,
0.9992375373840332,
1.0,
0.9998910427093506,
0.9999988079071045,
0.9967170357704163,
1.0,
0.9999998807907104,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9567482471466064,
0.9999606609344482,
0.9998536109924316,
1.0
],
"eval_creative_writing_runtime": 31.4284,
"eval_creative_writing_samples_per_second": 3.182,
"eval_creative_writing_steps_per_second": 0.127,
"epoch": 1.32,
"step": 50
},
{
"eval_biology_with_literary_style_loss": 1.3244109153747559,
"eval_biology_with_literary_style_score": -0.2915953993797302,
"eval_biology_with_literary_style_brier_score": 0.2915953993797302,
"eval_biology_with_literary_style_average_probability": 0.6370630860328674,
"eval_biology_with_literary_style_accuracy": 0.66,
"eval_biology_with_literary_style_probabilities": [
0.8422331213951111,
0.3563534617424011,
0.20590150356292725,
0.34388160705566406,
0.9738777279853821,
0.9933098554611206,
0.013515126891434193,
0.03492158278822899,
0.0005772275617346168,
0.9946145415306091,
0.9994958639144897,
0.9732575416564941,
0.4653439223766327,
0.2902985215187073,
0.9848002195358276,
0.874828040599823,
0.007957144640386105,
0.10880450904369354,
0.9999992847442627,
0.9999992847442627,
0.9997832179069519,
0.9021045565605164,
0.9993135929107666,
0.09312967211008072,
0.9835945963859558,
0.9038721919059753,
0.9953224062919617,
0.005929848644882441,
0.004466882906854153,
0.034653790295124054,
0.9999308586120605,
0.9367615580558777,
0.9977996945381165,
0.9999979734420776,
0.996374785900116,
0.9997103810310364,
0.19381558895111084,
0.6907597780227661,
0.875636100769043,
8.129484740493353e-06,
6.22305233264342e-05,
0.0003827095788437873,
0.0011861087987199426,
0.013507246039807796,
0.9973799586296082,
0.6868321299552917,
0.04164070263504982,
0.9997265934944153,
0.9688616991043091,
0.9772934317588806,
0.9902143478393555,
0.05315324291586876,
0.7507219910621643,
0.997478187084198,
0.8234730362892151,
0.9983914494514465,
0.5037214159965515,
0.1830606311559677,
0.6088641285896301,
0.056403663009405136,
0.9997255206108093,
0.997403085231781,
0.9995600581169128,
0.9978277087211609,
0.1915532499551773,
0.886441171169281,
0.9874991774559021,
0.9855878353118896,
0.7539471387863159,
0.9771825671195984,
0.5847952961921692,
0.7823383212089539,
0.010838981717824936,
0.7440186142921448,
0.8710861206054688,
0.9924225211143494,
0.41601699590682983,
0.9564679861068726,
0.5351160168647766,
0.9124112725257874,
0.00990450568497181,
0.8436535000801086,
0.45744913816452026,
0.9863516092300415,
0.9999998807907104,
0.99796462059021,
0.984410285949707,
0.8628932237625122,
0.7051854133605957,
0.004238440655171871,
0.6792111396789551,
0.9969097971916199,
0.9998319149017334,
0.12430453300476074,
0.9992156028747559,
0.8144650459289551,
0.07938272505998611,
0.013606518507003784,
0.003284846432507038,
0.8325172066688538
],
"eval_biology_with_literary_style_runtime": 34.5464,
"eval_biology_with_literary_style_samples_per_second": 2.895,
"eval_biology_with_literary_style_steps_per_second": 0.116,
"epoch": 1.32,
"step": 50
},
{
"loss": 0.0092,
"learning_rate": 0.0002,
"epoch": 1.34,
"step": 51
},
{
"loss": 0.0012,
"learning_rate": 0.0002,
"epoch": 1.37,
"step": 52
},
{
"loss": 0.0741,
"learning_rate": 0.0002,
"epoch": 1.39,
"step": 53
},
{
"loss": 0.0191,
"learning_rate": 0.0002,
"epoch": 1.42,
"step": 54
},
{
"loss": 0.0289,
"learning_rate": 0.0002,
"epoch": 1.45,
"step": 55
},
{
"loss": 0.008,
"learning_rate": 0.0002,
"epoch": 1.47,
"step": 56
},
{
"loss": 0.0091,
"learning_rate": 0.0002,
"epoch": 1.5,
"step": 57
},
{
"loss": 0.001,
"learning_rate": 0.0002,
"epoch": 1.53,
"step": 58
},
{
"loss": 0.0589,
"learning_rate": 0.0002,
"epoch": 1.55,
"step": 59
},
{
"loss": 0.0014,
"learning_rate": 0.0002,
"epoch": 1.58,
"step": 60
},
{
"loss": 0.0009,
"learning_rate": 0.0002,
"epoch": 1.61,
"step": 61
},
{
"loss": 0.0032,
"learning_rate": 0.0002,
"epoch": 1.63,
"step": 62
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 1.66,
"step": 63
},
{
"loss": 0.0163,
"learning_rate": 0.0002,
"epoch": 1.68,
"step": 64
},
{
"loss": 0.0007,
"learning_rate": 0.0002,
"epoch": 1.71,
"step": 65
},
{
"loss": 0.0153,
"learning_rate": 0.0002,
"epoch": 1.74,
"step": 66
},
{
"loss": 0.0266,
"learning_rate": 0.0002,
"epoch": 1.76,
"step": 67
},
{
"loss": 0.0547,
"learning_rate": 0.0002,
"epoch": 1.79,
"step": 68
},
{
"loss": 0.0024,
"learning_rate": 0.0002,
"epoch": 1.82,
"step": 69
},
{
"loss": 0.0118,
"learning_rate": 0.0002,
"epoch": 1.84,
"step": 70
},
{
"loss": 0.0031,
"learning_rate": 0.0002,
"epoch": 1.87,
"step": 71
},
{
"loss": 0.0076,
"learning_rate": 0.0002,
"epoch": 1.89,
"step": 72
},
{
"loss": 0.0201,
"learning_rate": 0.0002,
"epoch": 1.92,
"step": 73
},
{
"loss": 0.0105,
"learning_rate": 0.0002,
"epoch": 1.95,
"step": 74
},
{
"loss": 0.0089,
"learning_rate": 0.0002,
"epoch": 1.97,
"step": 75
},
{
"eval_creative_writing_loss": 0.09592155367136002,
"eval_creative_writing_score": -0.020631961524486542,
"eval_creative_writing_brier_score": 0.020631961524486542,
"eval_creative_writing_average_probability": 0.971525251865387,
"eval_creative_writing_accuracy": 0.97,
"eval_creative_writing_probabilities": [
0.9997228980064392,
1.0,
0.9999998807907104,
1.0,
0.9996482133865356,
1.0,
0.9999998807907104,
1.0,
0.2378961592912674,
0.9999994039535522,
0.42051035165786743,
1.0,
1.0,
1.0,
0.9999306201934814,
1.0,
0.9999998807907104,
1.0,
0.9999996423721313,
1.0,
0.999981164932251,
0.9999998807907104,
0.9999912977218628,
1.0,
0.9809040427207947,
0.9995905756950378,
1.0,
1.0,
0.0023363870568573475,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999964237213135,
1.0,
0.9986351132392883,
0.9997923970222473,
0.9999994039535522,
1.0,
1.0,
1.0,
0.9998650550842285,
0.9999997615814209,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9987547397613525,
1.0,
0.9995219707489014,
1.0,
0.9831072092056274,
1.0,
0.9974205493927002,
1.0,
0.9997766613960266,
1.0,
1.0,
1.0,
0.9999995231628418,
1.0,
0.6153692007064819,
0.9825475811958313,
0.9969502091407776,
1.0,
0.9921615719795227,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9998723268508911,
1.0,
0.9999754428863525,
0.9999998807907104,
1.0,
1.0,
0.9998829364776611,
1.0,
0.9971277117729187,
0.9999291896820068,
0.9999527931213379,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.952387273311615,
0.9989917874336243,
0.9999982118606567,
1.0
],
"eval_creative_writing_runtime": 31.3865,
"eval_creative_writing_samples_per_second": 3.186,
"eval_creative_writing_steps_per_second": 0.127,
"epoch": 1.97,
"step": 75
},
{
"eval_biology_with_literary_style_loss": 1.4261603355407715,
"eval_biology_with_literary_style_score": -0.23246526718139648,
"eval_biology_with_literary_style_brier_score": 0.23246526718139648,
"eval_biology_with_literary_style_average_probability": 0.739600419998169,
"eval_biology_with_literary_style_accuracy": 0.75,
"eval_biology_with_literary_style_probabilities": [
0.9998239874839783,
0.9999821186065674,
0.997286319732666,
0.7309690117835999,
0.9989686012268066,
0.9998588562011719,
0.1100221648812294,
0.013960055075585842,
0.0014234904665499926,
0.9999901056289673,
0.9999978542327881,
0.9998490810394287,
0.5011758804321289,
0.24851495027542114,
0.9544274806976318,
0.996453046798706,
0.0021988353691995144,
0.05229213833808899,
1.0,
1.0,
0.9999995231628418,
0.9999839067459106,
0.999997615814209,
0.9901941418647766,
0.9993062019348145,
0.9996980428695679,
0.9998052716255188,
7.921013457234949e-05,
0.00241833901964128,
0.00010733860108302906,
1.0,
0.9999939203262329,
1.0,
1.0,
0.9999995231628418,
1.0,
0.37197402119636536,
0.906427264213562,
0.9985352754592896,
9.472542927824179e-08,
1.8663062292034738e-05,
0.0007226847810670733,
0.0009153155260719359,
0.027196291834115982,
0.996335506439209,
0.9934919476509094,
0.009263264015316963,
0.9999862909317017,
0.9986127614974976,
0.999937891960144,
0.9997109770774841,
0.5540578961372375,
0.9694981575012207,
0.9999996423721313,
0.9991618394851685,
0.9999830722808838,
0.9839572906494141,
0.3787292242050171,
0.7562795877456665,
0.024417594075202942,
0.9996551275253296,
0.9969584941864014,
0.998749852180481,
0.9999865293502808,
0.7677027583122253,
0.9995812773704529,
0.9999984502792358,
0.9999995231628418,
0.9999768733978271,
0.9979830980300903,
0.9924507737159729,
0.9991115927696228,
7.307936175493523e-05,
0.7401599884033203,
0.929680347442627,
0.9999669790267944,
0.8817222118377686,
0.9998908042907715,
0.1284332275390625,
0.928813636302948,
0.0032492727041244507,
0.9993767142295837,
0.9767991900444031,
0.9966356158256531,
1.0,
0.9999995231628418,
0.9999977350234985,
0.9992828965187073,
0.999262273311615,
1.0285020834999159e-05,
0.9999904632568359,
1.0,
1.0,
0.9980078339576721,
0.9999998807907104,
0.999974250793457,
0.034345593303442,
0.024079279974102974,
0.0003281444078311324,
0.9998185038566589
],
"eval_biology_with_literary_style_runtime": 34.5167,
"eval_biology_with_literary_style_samples_per_second": 2.897,
"eval_biology_with_literary_style_steps_per_second": 0.116,
"epoch": 1.97,
"step": 75
},
{
"loss": 0.0031,
"learning_rate": 0.0002,
"epoch": 2.0,
"step": 76
},
{
"loss": 0.0008,
"learning_rate": 0.0002,
"epoch": 2.03,
"step": 77
},
{
"loss": 0.0019,
"learning_rate": 0.0002,
"epoch": 2.05,
"step": 78
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 2.08,
"step": 79
},
{
"loss": 0.0009,
"learning_rate": 0.0002,
"epoch": 2.11,
"step": 80
},
{
"loss": 0.0005,
"learning_rate": 0.0002,
"epoch": 2.13,
"step": 81
},
{
"loss": 0.0006,
"learning_rate": 0.0002,
"epoch": 2.16,
"step": 82
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 2.18,
"step": 83
},
{
"loss": 0.0002,
"learning_rate": 0.0002,
"epoch": 2.21,
"step": 84
},
{
"loss": 0.0002,
"learning_rate": 0.0002,
"epoch": 2.24,
"step": 85
},
{
"loss": 0.0002,
"learning_rate": 0.0002,
"epoch": 2.26,
"step": 86
},
{
"loss": 0.0002,
"learning_rate": 0.0002,
"epoch": 2.29,
"step": 87
},
{
"loss": 0.0013,
"learning_rate": 0.0002,
"epoch": 2.32,
"step": 88
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 2.34,
"step": 89
},
{
"loss": 0.0018,
"learning_rate": 0.0002,
"epoch": 2.37,
"step": 90
},
{
"loss": 0.0012,
"learning_rate": 0.0002,
"epoch": 2.39,
"step": 91
},
{
"loss": 0.0004,
"learning_rate": 0.0002,
"epoch": 2.42,
"step": 92
},
{
"loss": 0.0038,
"learning_rate": 0.0002,
"epoch": 2.45,
"step": 93
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 2.47,
"step": 94
},
{
"loss": 0.0007,
"learning_rate": 0.0002,
"epoch": 2.5,
"step": 95
},
{
"loss": 0.028,
"learning_rate": 0.0002,
"epoch": 2.53,
"step": 96
},
{
"loss": 0.003,
"learning_rate": 0.0002,
"epoch": 2.55,
"step": 97
},
{
"loss": 0.0082,
"learning_rate": 0.0002,
"epoch": 2.58,
"step": 98
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 2.61,
"step": 99
},
{
"loss": 0.0002,
"learning_rate": 0.0002,
"epoch": 2.63,
"step": 100
},
{
"eval_creative_writing_loss": 0.12560071051120758,
"eval_creative_writing_score": -0.02528984844684601,
"eval_creative_writing_brier_score": 0.02528984844684601,
"eval_creative_writing_average_probability": 0.9707810282707214,
"eval_creative_writing_accuracy": 0.97,
"eval_creative_writing_probabilities": [
0.9999761581420898,
1.0,
0.9999992847442627,
1.0,
0.9999547004699707,
1.0,
1.0,
1.0,
0.01668044924736023,
1.0,
0.2576431930065155,
1.0,
1.0,
1.0,
0.9999969005584717,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999998807907104,
1.0,
1.0,
1.0,
0.9979181885719299,
0.9999982118606567,
1.0,
1.0,
0.004256995394825935,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999716281890869,
1.0,
0.9999717473983765,
0.9999995231628418,
0.9999998807907104,
1.0,
1.0,
1.0,
0.999849796295166,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9995261430740356,
1.0,
0.9999653100967407,
1.0,
0.9834521412849426,
1.0,
0.9987533092498779,
1.0,
0.9978016018867493,
1.0,
1.0,
1.0,
0.9999966621398926,
1.0,
0.8659273386001587,
0.9995256662368774,
0.9950549602508545,
1.0,
0.9657167792320251,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999123811721802,
1.0,
0.9999997615814209,
1.0,
1.0,
1.0,
0.9999539852142334,
1.0,
0.9996681213378906,
0.9999990463256836,
0.9996721744537354,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9969596862792969,
0.9999977350234985,
0.9999960660934448,
1.0
],
"eval_creative_writing_runtime": 31.387,
"eval_creative_writing_samples_per_second": 3.186,
"eval_creative_writing_steps_per_second": 0.127,
"epoch": 2.63,
"step": 100
},
{
"eval_biology_with_literary_style_loss": 1.4950416088104248,
"eval_biology_with_literary_style_score": -0.239924356341362,
"eval_biology_with_literary_style_brier_score": 0.239924356341362,
"eval_biology_with_literary_style_average_probability": 0.7326297760009766,
"eval_biology_with_literary_style_accuracy": 0.75,
"eval_biology_with_literary_style_probabilities": [
0.9994196891784668,
0.9992738366127014,
0.983923614025116,
0.9044151902198792,
0.9997815489768982,
0.9998750686645508,
0.006262939423322678,
0.00128739804495126,
8.855006308294833e-05,
0.9999986886978149,
0.9999992847442627,
0.9998844861984253,
0.684658944606781,
0.4036870300769806,
0.9940760135650635,
0.9792241454124451,
0.0024490836076438427,
0.030131032690405846,
1.0,
1.0,
0.9999988079071045,
0.9999747276306152,
0.9999988079071045,
0.9765191674232483,
0.9992048144340515,
0.9988371729850769,
0.9996092915534973,
8.090558549156412e-05,
0.004592935089021921,
0.002051251009106636,
1.0,
0.9999823570251465,
0.9999997615814209,
1.0,
0.9999997615814209,
0.9999996423721313,
0.523539662361145,
0.9182876348495483,
0.9991507530212402,
6.639822203169388e-08,
3.326304067741148e-05,
0.00180066900793463,
0.00022590388834942132,
0.0030361979734152555,
0.9938976168632507,
0.999334990978241,
0.014086072333157063,
0.9999929666519165,
0.9996891021728516,
0.9999194145202637,
0.9998146891593933,
0.5219235420227051,
0.9443055391311646,
0.999993085861206,
0.9992896318435669,
0.9999892711639404,
0.8569433093070984,
0.11361022293567657,
0.6792670488357544,
0.07281597703695297,
0.9995861649513245,
0.9714840054512024,
0.9986534118652344,
0.9999567270278931,
0.7200058102607727,
0.9917659163475037,
0.9999903440475464,
0.9999982118606567,
0.999862551689148,
0.9478113055229187,
0.9273961782455444,
0.9989776611328125,
9.58814489422366e-05,
0.9144469499588013,
0.8570235371589661,
0.999974250793457,
0.27402758598327637,
0.9973011612892151,
0.07933907210826874,
0.9523033499717712,
0.005996616557240486,
0.9999831914901733,
0.9931454658508301,
0.9989646673202515,
1.0,
0.9999984502792358,
0.999990701675415,
0.9977810978889465,
0.9995942711830139,
0.00018957872816827148,
0.9999784231185913,
0.9999998807907104,
1.0,
0.9992635846138,
1.0,
0.9999830722808838,
0.024764718487858772,
0.0030579320155084133,
0.0002520766283851117,
0.9998053908348083
],
"eval_biology_with_literary_style_runtime": 34.5448,
"eval_biology_with_literary_style_samples_per_second": 2.895,
"eval_biology_with_literary_style_steps_per_second": 0.116,
"epoch": 2.63,
"step": 100
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 2.66,
"step": 101
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 2.68,
"step": 102
},
{
"loss": 0.0008,
"learning_rate": 0.0002,
"epoch": 2.71,
"step": 103
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 2.74,
"step": 104
},
{
"loss": 0.0002,
"learning_rate": 0.0002,
"epoch": 2.76,
"step": 105
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 2.79,
"step": 106
},
{
"loss": 0.0012,
"learning_rate": 0.0002,
"epoch": 2.82,
"step": 107
},
{
"loss": 0.0002,
"learning_rate": 0.0002,
"epoch": 2.84,
"step": 108
},
{
"loss": 0.0003,
"learning_rate": 0.0002,
"epoch": 2.87,
"step": 109
},
{
"loss": 0.0004,
"learning_rate": 0.0002,
"epoch": 2.89,
"step": 110
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 2.92,
"step": 111
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 2.95,
"step": 112
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 2.97,
"step": 113
},
{
"loss": 0.0005,
"learning_rate": 0.0002,
"epoch": 3.0,
"step": 114
},
{
"loss": 0.0002,
"learning_rate": 0.0002,
"epoch": 3.03,
"step": 115
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 3.05,
"step": 116
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 3.08,
"step": 117
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 3.11,
"step": 118
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 3.13,
"step": 119
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 3.16,
"step": 120
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 3.18,
"step": 121
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 3.21,
"step": 122
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 3.24,
"step": 123
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 3.26,
"step": 124
},
{
"loss": 0.0002,
"learning_rate": 0.0002,
"epoch": 3.29,
"step": 125
},
{
"eval_creative_writing_loss": 0.12550988793373108,
"eval_creative_writing_score": -0.023599309846758842,
"eval_creative_writing_brier_score": 0.023599309846758842,
"eval_creative_writing_average_probability": 0.9722127318382263,
"eval_creative_writing_accuracy": 0.97,
"eval_creative_writing_probabilities": [
0.9999924898147583,
1.0,
0.9999974966049194,
1.0,
0.9999760389328003,
1.0,
1.0,
1.0,
0.004820940550416708,
1.0,
0.38430270552635193,
1.0,
1.0,
1.0,
0.9999996423721313,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9995892643928528,
0.9999997615814209,
1.0,
1.0,
0.012191555462777615,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999853372573853,
1.0,
0.999993085861206,
0.9999998807907104,
1.0,
1.0,
1.0,
1.0,
0.9999395608901978,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9998890161514282,
1.0,
0.9999903440475464,
1.0,
0.9879515767097473,
1.0,
0.9996850490570068,
1.0,
0.9710411429405212,
1.0,
1.0,
1.0,
0.9999980926513672,
1.0,
0.8841173648834229,
0.999834418296814,
0.9965130686759949,
1.0,
0.9834631085395813,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999334812164307,
1.0,
0.9999998807907104,
1.0,
1.0,
1.0,
0.9999955892562866,
1.0,
0.9997425675392151,
0.9999978542327881,
0.9998661279678345,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9984672665596008,
0.9999994039535522,
0.9999991655349731,
1.0
],
"eval_creative_writing_runtime": 31.3796,
"eval_creative_writing_samples_per_second": 3.187,
"eval_creative_writing_steps_per_second": 0.127,
"epoch": 3.29,
"step": 125
},
{
"eval_biology_with_literary_style_loss": 1.4181084632873535,
"eval_biology_with_literary_style_score": -0.22596853971481323,
"eval_biology_with_literary_style_brier_score": 0.22596853971481323,
"eval_biology_with_literary_style_average_probability": 0.7482656836509705,
"eval_biology_with_literary_style_accuracy": 0.75,
"eval_biology_with_literary_style_probabilities": [
0.9995549321174622,
0.9985436201095581,
0.9808439612388611,
0.9283376932144165,
0.9999039173126221,
0.9999103546142578,
0.0073650372214615345,
0.0004983382532373071,
3.531386028043926e-05,
0.9999995231628418,
0.9999997615814209,
0.9999420642852783,
0.7384650707244873,
0.46473434567451477,
0.9987931251525879,
0.9886658191680908,
0.008153733797371387,
0.09072274714708328,
1.0,
1.0,
0.9999998807907104,
0.9999436140060425,
0.9999998807907104,
0.9902008175849915,
0.9996306896209717,
0.9992589354515076,
0.9999074935913086,
0.00022981569054536521,
0.020188162103295326,
0.02801305055618286,
1.0,
0.9999865293502808,
0.9999995231628418,
1.0,
0.9999997615814209,
0.9999998807907104,
0.6005551218986511,
0.9702953696250916,
0.9998131394386292,
1.041131092449632e-08,
9.587202839611564e-06,
0.0011823305394500494,
0.0001448177790734917,
0.009138714522123337,
0.9979574680328369,
0.9991582632064819,
0.016484873369336128,
0.9999951124191284,
0.999769389629364,
0.9998552799224854,
0.9998192191123962,
0.8466882705688477,
0.9816719889640808,
0.9999973773956299,
0.9993175268173218,
0.9999885559082031,
0.9391415119171143,
0.1099478080868721,
0.7412080764770508,
0.18274378776550293,
0.999987006187439,
0.9898987412452698,
0.9998290538787842,
0.9999748468399048,
0.776476263999939,
0.9796139001846313,
0.9999961853027344,
0.9999972581863403,
0.9996709823608398,
0.9429153800010681,
0.8567234873771667,
0.9939687252044678,
0.00026437846827320755,
0.9818727374076843,
0.9595638513565063,
0.9999904632568359,
0.45074892044067383,
0.9988915324211121,
0.27005892992019653,
0.9760595560073853,
0.003961615264415741,
0.999995231628418,
0.9971864819526672,
0.9999188184738159,
1.0,
0.9999996423721313,
0.9999929666519165,
0.9993189573287964,
0.9999390840530396,
0.0004752865352202207,
0.9999654293060303,
1.0,
1.0,
0.9996334314346313,
1.0,
0.9999845027923584,
0.012308338657021523,
0.0005812794552184641,
0.0003732819459401071,
0.999724805355072
],
"eval_biology_with_literary_style_runtime": 34.5294,
"eval_biology_with_literary_style_samples_per_second": 2.896,
"eval_biology_with_literary_style_steps_per_second": 0.116,
"epoch": 3.29,
"step": 125
},
{
"loss": 0.0002,
"learning_rate": 0.0002,
"epoch": 3.32,
"step": 126
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 3.34,
"step": 127
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 3.37,
"step": 128
},
{
"loss": 0.0008,
"learning_rate": 0.0002,
"epoch": 3.39,
"step": 129
},
{
"loss": 0.0002,
"learning_rate": 0.0002,
"epoch": 3.42,
"step": 130
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 3.45,
"step": 131
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 3.47,
"step": 132
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 3.5,
"step": 133
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 3.53,
"step": 134
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 3.55,
"step": 135
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 3.58,
"step": 136
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 3.61,
"step": 137
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 3.63,
"step": 138
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 3.66,
"step": 139
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 3.68,
"step": 140
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 3.71,
"step": 141
},
{
"loss": 0.0002,
"learning_rate": 0.0002,
"epoch": 3.74,
"step": 142
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 3.76,
"step": 143
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 3.79,
"step": 144
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 3.82,
"step": 145
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 3.84,
"step": 146
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 3.87,
"step": 147
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 3.89,
"step": 148
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 3.92,
"step": 149
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 3.95,
"step": 150
},
{
"eval_creative_writing_loss": 0.12696059048175812,
"eval_creative_writing_score": -0.023474015295505524,
"eval_creative_writing_brier_score": 0.023474015295505524,
"eval_creative_writing_average_probability": 0.9725755453109741,
"eval_creative_writing_accuracy": 0.97,
"eval_creative_writing_probabilities": [
0.9999969005584717,
1.0,
0.9999990463256836,
1.0,
0.9999603033065796,
1.0,
1.0,
1.0,
0.0047756098210811615,
1.0,
0.3958245515823364,
1.0,
1.0,
1.0,
0.9999998807907104,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9996107220649719,
0.9999997615814209,
1.0,
1.0,
0.009845593012869358,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999935626983643,
1.0,
0.999993085861206,
0.9999998807907104,
1.0,
1.0,
1.0,
1.0,
0.9999793767929077,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999181032180786,
1.0,
0.9999945163726807,
1.0,
0.9935240745544434,
1.0,
0.9997866749763489,
1.0,
0.9811118245124817,
1.0,
1.0,
1.0,
0.9999995231628418,
1.0,
0.8962162137031555,
0.9998764991760254,
0.9970778226852417,
1.0,
0.9822664856910706,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999604225158691,
1.0,
0.9999998807907104,
1.0,
1.0,
1.0,
0.9999983310699463,
1.0,
0.9996516704559326,
0.9999957084655762,
0.9999384880065918,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9982588887214661,
0.9999991655349731,
0.9999997615814209,
1.0
],
"eval_creative_writing_runtime": 31.3872,
"eval_creative_writing_samples_per_second": 3.186,
"eval_creative_writing_steps_per_second": 0.127,
"epoch": 3.95,
"step": 150
},
{
"eval_biology_with_literary_style_loss": 1.508569598197937,
"eval_biology_with_literary_style_score": -0.223800390958786,
"eval_biology_with_literary_style_brier_score": 0.223800390958786,
"eval_biology_with_literary_style_average_probability": 0.753645658493042,
"eval_biology_with_literary_style_accuracy": 0.77,
"eval_biology_with_literary_style_probabilities": [
0.9997276663780212,
0.9992485642433167,
0.9895316362380981,
0.9514719843864441,
0.9999614953994751,
0.9999442100524902,
0.004651335533708334,
0.0001869379193522036,
1.4444511180045083e-05,
0.9999998807907104,
1.0,
0.9999759197235107,
0.79854816198349,
0.5046471357345581,
0.9990099668502808,
0.99369215965271,
0.0043740225955843925,
0.05449388176202774,
1.0,
1.0,
1.0,
0.9999809265136719,
1.0,
0.99128657579422,
0.9998859167098999,
0.999728262424469,
0.9999839067459106,
0.00016595126362517476,
0.02103896625339985,
0.024976782500743866,
1.0,
0.9999954700469971,
0.9999998807907104,
1.0,
1.0,
1.0,
0.6514801383018494,
0.9780262112617493,
0.9999254941940308,
2.189738612656811e-09,
3.2669267966412008e-06,
0.0008265993092209101,
9.858178964350373e-05,
0.007948733866214752,
0.9988512992858887,
0.9993112087249756,
0.007991177029907703,
0.9999977350234985,
0.9998981952667236,
0.9999642372131348,
0.9999281167984009,
0.8989755511283875,
0.9909796714782715,
0.9999992847442627,
0.9996218681335449,
0.9999951124191284,
0.9505088925361633,
0.1275900900363922,
0.8378230929374695,
0.1916273832321167,
0.9999955892562866,
0.9941998720169067,
0.9999337196350098,
0.9999921321868896,
0.7730574607849121,
0.9855798482894897,
0.9999992847442627,
0.9999995231628418,
0.9999063014984131,
0.9654306769371033,
0.9086850881576538,
0.9970625042915344,
0.00011868889123434201,
0.9871053695678711,
0.962754487991333,
0.999996542930603,
0.5310143828392029,
0.9996671676635742,
0.28350409865379333,
0.9845860600471497,
0.002773666987195611,
0.9999983310699463,
0.9990894794464111,
0.999972939491272,
1.0,
0.9999998807907104,
0.9999982118606567,
0.9997745156288147,
0.9999819993972778,
0.00044673527008853853,
0.9999911785125732,
1.0,
1.0,
0.9998553991317749,
1.0,
0.9999963045120239,
0.01165656466037035,
0.0004307126218918711,
0.00019968363631051034,
0.9999244213104248
],
"eval_biology_with_literary_style_runtime": 34.5257,
"eval_biology_with_literary_style_samples_per_second": 2.896,
"eval_biology_with_literary_style_steps_per_second": 0.116,
"epoch": 3.95,
"step": 150
},
{
"train_runtime": 3937.8753,
"train_samples_per_second": 1.219,
"train_steps_per_second": 0.038,
"total_flos": 0.0,
"train_loss": 0.053941556412501084,
"epoch": 3.95,
"step": 150
}
]