llama-30b-commonsense_qa / training_logs.json
joshuaclymer's picture
Upload folder using huggingface_hub
4b242c9
raw
history blame contribute delete
No virus
41.5 kB
[
{
"loss": 0.8525,
"learning_rate": 0.0002,
"epoch": 0.02,
"step": 1
},
{
"loss": 0.6634,
"learning_rate": 0.0002,
"epoch": 0.04,
"step": 2
},
{
"loss": 0.7302,
"learning_rate": 0.0002,
"epoch": 0.05,
"step": 3
},
{
"loss": 0.7507,
"learning_rate": 0.0002,
"epoch": 0.07,
"step": 4
},
{
"loss": 0.7715,
"learning_rate": 0.0002,
"epoch": 0.09,
"step": 5
},
{
"loss": 0.7064,
"learning_rate": 0.0002,
"epoch": 0.11,
"step": 6
},
{
"loss": 0.7124,
"learning_rate": 0.0002,
"epoch": 0.12,
"step": 7
},
{
"loss": 0.7223,
"learning_rate": 0.0002,
"epoch": 0.14,
"step": 8
},
{
"loss": 0.6888,
"learning_rate": 0.0002,
"epoch": 0.16,
"step": 9
},
{
"loss": 0.7404,
"learning_rate": 0.0002,
"epoch": 0.18,
"step": 10
},
{
"loss": 0.6575,
"learning_rate": 0.0002,
"epoch": 0.19,
"step": 11
},
{
"loss": 0.6896,
"learning_rate": 0.0002,
"epoch": 0.21,
"step": 12
},
{
"loss": 0.6336,
"learning_rate": 0.0002,
"epoch": 0.23,
"step": 13
},
{
"loss": 0.6474,
"learning_rate": 0.0002,
"epoch": 0.25,
"step": 14
},
{
"loss": 0.6993,
"learning_rate": 0.0002,
"epoch": 0.26,
"step": 15
},
{
"loss": 0.6219,
"learning_rate": 0.0002,
"epoch": 0.28,
"step": 16
},
{
"loss": 0.7434,
"learning_rate": 0.0002,
"epoch": 0.3,
"step": 17
},
{
"loss": 0.7488,
"learning_rate": 0.0002,
"epoch": 0.32,
"step": 18
},
{
"loss": 0.7165,
"learning_rate": 0.0002,
"epoch": 0.33,
"step": 19
},
{
"loss": 0.7253,
"learning_rate": 0.0002,
"epoch": 0.35,
"step": 20
},
{
"loss": 0.6785,
"learning_rate": 0.0002,
"epoch": 0.37,
"step": 21
},
{
"loss": 0.7281,
"learning_rate": 0.0002,
"epoch": 0.39,
"step": 22
},
{
"loss": 0.7451,
"learning_rate": 0.0002,
"epoch": 0.4,
"step": 23
},
{
"loss": 0.6618,
"learning_rate": 0.0002,
"epoch": 0.42,
"step": 24
},
{
"loss": 0.6452,
"learning_rate": 0.0002,
"epoch": 0.44,
"step": 25
},
{
"eval_commonsense_qa_loss": 0.7914362549781799,
"eval_commonsense_qa_score": -0.29111722111701965,
"eval_commonsense_qa_brier_score": 0.29111722111701965,
"eval_commonsense_qa_average_probability": 0.47185489535331726,
"eval_commonsense_qa_accuracy": 0.39,
"eval_commonsense_qa_probabilities": [
0.3736628592014313,
0.41951867938041687,
0.40974757075309753,
0.43108245730400085,
0.39417126774787903,
0.36827391386032104,
0.3217296004295349,
0.4527994692325592,
0.44031405448913574,
0.5761461853981018,
0.6323458552360535,
0.5307877659797668,
0.3072277307510376,
0.35649847984313965,
0.24212150275707245,
0.4968399107456207,
0.7714088559150696,
0.4102341830730438,
0.5640316605567932,
0.5279660820960999,
0.26524466276168823,
0.46642741560935974,
0.667880654335022,
0.5137766003608704,
0.7010161280632019,
0.5712583065032959,
0.5321078300476074,
0.4304255247116089,
0.25722262263298035,
0.49681854248046875,
0.39940145611763,
0.38804712891578674,
0.413534939289093,
0.36740759015083313,
0.3980674743652344,
0.3686825931072235,
0.45741069316864014,
0.4793001115322113,
0.5210141539573669,
0.5084275603294373,
0.5377230644226074,
0.4399715065956116,
0.5001020431518555,
0.4058927297592163,
0.4787653684616089,
0.35228681564331055,
0.3852904140949249,
0.38124364614486694,
0.49779626727104187,
0.47046318650245667,
0.45446330308914185,
0.49293678998947144,
0.5469354391098022,
0.4111570715904236,
0.6435064077377319,
0.3415040075778961,
0.557502269744873,
0.6114356517791748,
0.4695909917354584,
0.3526020348072052,
0.6928711533546448,
0.688827633857727,
0.6876643896102905,
0.4979974031448364,
0.5267980098724365,
0.5480849146842957,
0.5159767866134644,
0.6435903310775757,
0.48293471336364746,
0.4974452555179596,
0.39486047625541687,
0.3298123776912689,
0.39702242612838745,
0.2585730254650116,
0.5650836229324341,
0.6514415144920349,
0.567798376083374,
0.5929319858551025,
0.35160502791404724,
0.39691317081451416,
0.589564859867096,
0.4809286296367645,
0.3265831768512726,
0.34647417068481445,
0.29951584339141846,
0.540679931640625,
0.477059006690979,
0.42304784059524536,
0.3258381187915802,
0.33986833691596985,
0.6030049920082092,
0.5700676441192627,
0.5453739166259766,
0.5489950180053711,
0.5006292462348938,
0.5012415647506714,
0.4897501468658447,
0.405569463968277,
0.41794145107269287,
0.6035709977149963
],
"eval_commonsense_qa_runtime": 5.072,
"eval_commonsense_qa_samples_per_second": 19.716,
"eval_commonsense_qa_steps_per_second": 0.789,
"epoch": 0.44,
"step": 25
},
{
"eval_trivia_qa_loss": 0.7058285474777222,
"eval_trivia_qa_score": -0.2509709298610687,
"eval_trivia_qa_brier_score": 0.2509709298610687,
"eval_trivia_qa_average_probability": 0.508730411529541,
"eval_trivia_qa_accuracy": 0.56,
"eval_trivia_qa_probabilities": [
0.6108027696609497,
0.47548747062683105,
0.5868535041809082,
0.32189613580703735,
0.4967602789402008,
0.5013786554336548,
0.582642138004303,
0.5476276278495789,
0.6525075435638428,
0.3721744418144226,
0.46289870142936707,
0.5139896273612976,
0.6440446376800537,
0.5662510395050049,
0.5801640748977661,
0.27492260932922363,
0.5860297679901123,
0.4890085756778717,
0.6448793411254883,
0.5299046039581299,
0.3838443160057068,
0.2839445173740387,
0.5129914879798889,
0.6394293308258057,
0.5504795908927917,
0.6178440451622009,
0.35971587896347046,
0.45417338609695435,
0.42903023958206177,
0.559572160243988,
0.5979811549186707,
0.4994199275970459,
0.5579876899719238,
0.7093907594680786,
0.4760594666004181,
0.4218994379043579,
0.5057868361473083,
0.6060559153556824,
0.5037559866905212,
0.5046707391738892,
0.42048025131225586,
0.6409589648246765,
0.5305330753326416,
0.3996846079826355,
0.4467884302139282,
0.6522667407989502,
0.4101843535900116,
0.3745155334472656,
0.5663012266159058,
0.48798033595085144,
0.6159585118293762,
0.47460001707077026,
0.6789429187774658,
0.3534046411514282,
0.5315006375312805,
0.4756757915019989,
0.6186851263046265,
0.5154322981834412,
0.37286680936813354,
0.5532978177070618,
0.45223331451416016,
0.41775307059288025,
0.5951501727104187,
0.640890896320343,
0.4332258701324463,
0.6166451573371887,
0.626221239566803,
0.545464277267456,
0.36058875918388367,
0.4321788251399994,
0.4153941571712494,
0.5391730070114136,
0.396107017993927,
0.5979107618331909,
0.5979316830635071,
0.40809130668640137,
0.6226964592933655,
0.38245290517807007,
0.587360680103302,
0.44519785046577454,
0.659287691116333,
0.570533812046051,
0.3212454915046692,
0.5290343761444092,
0.3304256200790405,
0.5141531229019165,
0.4628876745700836,
0.4035484790802002,
0.5565099716186523,
0.5873599052429199,
0.5347287058830261,
0.4785170257091522,
0.44041702151298523,
0.4931771457195282,
0.5670301914215088,
0.5181891322135925,
0.6944608688354492,
0.4809003174304962,
0.5023355484008789,
0.3813191056251526
],
"eval_trivia_qa_runtime": 6.9532,
"eval_trivia_qa_samples_per_second": 14.382,
"eval_trivia_qa_steps_per_second": 0.575,
"epoch": 0.44,
"step": 25
},
{
"loss": 0.6417,
"learning_rate": 0.0002,
"epoch": 0.46,
"step": 26
},
{
"loss": 0.696,
"learning_rate": 0.0002,
"epoch": 0.47,
"step": 27
},
{
"loss": 0.688,
"learning_rate": 0.0002,
"epoch": 0.49,
"step": 28
},
{
"loss": 0.6599,
"learning_rate": 0.0002,
"epoch": 0.51,
"step": 29
},
{
"loss": 0.726,
"learning_rate": 0.0002,
"epoch": 0.53,
"step": 30
},
{
"loss": 0.6367,
"learning_rate": 0.0002,
"epoch": 0.54,
"step": 31
},
{
"loss": 0.7377,
"learning_rate": 0.0002,
"epoch": 0.56,
"step": 32
},
{
"loss": 0.5971,
"learning_rate": 0.0002,
"epoch": 0.58,
"step": 33
},
{
"loss": 0.7561,
"learning_rate": 0.0002,
"epoch": 0.6,
"step": 34
},
{
"loss": 0.6453,
"learning_rate": 0.0002,
"epoch": 0.61,
"step": 35
},
{
"loss": 0.6648,
"learning_rate": 0.0002,
"epoch": 0.63,
"step": 36
},
{
"loss": 0.6106,
"learning_rate": 0.0002,
"epoch": 0.65,
"step": 37
},
{
"loss": 0.6421,
"learning_rate": 0.0002,
"epoch": 0.67,
"step": 38
},
{
"loss": 0.6576,
"learning_rate": 0.0002,
"epoch": 0.68,
"step": 39
},
{
"loss": 0.6426,
"learning_rate": 0.0002,
"epoch": 0.7,
"step": 40
},
{
"loss": 0.6443,
"learning_rate": 0.0002,
"epoch": 0.72,
"step": 41
},
{
"loss": 0.6842,
"learning_rate": 0.0002,
"epoch": 0.74,
"step": 42
},
{
"loss": 0.6407,
"learning_rate": 0.0002,
"epoch": 0.75,
"step": 43
},
{
"loss": 0.7051,
"learning_rate": 0.0002,
"epoch": 0.77,
"step": 44
},
{
"loss": 0.6729,
"learning_rate": 0.0002,
"epoch": 0.79,
"step": 45
},
{
"loss": 0.6876,
"learning_rate": 0.0002,
"epoch": 0.81,
"step": 46
},
{
"loss": 0.6711,
"learning_rate": 0.0002,
"epoch": 0.82,
"step": 47
},
{
"loss": 0.6613,
"learning_rate": 0.0002,
"epoch": 0.84,
"step": 48
},
{
"loss": 0.6044,
"learning_rate": 0.0002,
"epoch": 0.86,
"step": 49
},
{
"loss": 0.5769,
"learning_rate": 0.0002,
"epoch": 0.88,
"step": 50
},
{
"eval_commonsense_qa_loss": 0.7653241157531738,
"eval_commonsense_qa_score": -0.2791268527507782,
"eval_commonsense_qa_brier_score": 0.2791268527507782,
"eval_commonsense_qa_average_probability": 0.48683643341064453,
"eval_commonsense_qa_accuracy": 0.42,
"eval_commonsense_qa_probabilities": [
0.4020081162452698,
0.48435282707214355,
0.43516698479652405,
0.4614918529987335,
0.4766950011253357,
0.4323585629463196,
0.30255478620529175,
0.29709967970848083,
0.545942485332489,
0.5383279323577881,
0.6857153177261353,
0.48378315567970276,
0.16650518774986267,
0.2533150315284729,
0.19820035994052887,
0.44554558396339417,
0.7646204233169556,
0.6391469240188599,
0.5909577012062073,
0.5142195224761963,
0.46391570568084717,
0.4665536880493164,
0.5840488076210022,
0.43470215797424316,
0.7169751524925232,
0.7758315205574036,
0.5787749886512756,
0.5941563844680786,
0.46830788254737854,
0.5470129251480103,
0.46815431118011475,
0.526231050491333,
0.5080574750900269,
0.29635000228881836,
0.47670942544937134,
0.32638877630233765,
0.4723879396915436,
0.6098498702049255,
0.4592856168746948,
0.5818286538124084,
0.5597203373908997,
0.3799402415752411,
0.3624171018600464,
0.40649276971817017,
0.42219263315200806,
0.3799329996109009,
0.3508457839488983,
0.46101444959640503,
0.4670948088169098,
0.6448565721511841,
0.4881596565246582,
0.44159600138664246,
0.6058750748634338,
0.590934157371521,
0.46451979875564575,
0.37275078892707825,
0.5106647610664368,
0.6377049684524536,
0.5288455486297607,
0.4052494764328003,
0.6817584037780762,
0.682870090007782,
0.7456340193748474,
0.4100780189037323,
0.5996410846710205,
0.46194082498550415,
0.4774899482727051,
0.6287940740585327,
0.442749559879303,
0.38106194138526917,
0.3868009150028229,
0.34707480669021606,
0.5333090424537659,
0.4054989218711853,
0.6355715990066528,
0.5217822790145874,
0.3845539689064026,
0.45426151156425476,
0.43503573536872864,
0.4364105463027954,
0.6591715812683105,
0.5792595744132996,
0.22389499843120575,
0.4098465144634247,
0.3524221181869507,
0.42123866081237793,
0.48706841468811035,
0.41214796900749207,
0.22283338010311127,
0.28352028131484985,
0.688373863697052,
0.5961286425590515,
0.6101290583610535,
0.5565056204795837,
0.5037069320678711,
0.41246306896209717,
0.6470851302146912,
0.5888364315032959,
0.4137541353702545,
0.6086077690124512
],
"eval_commonsense_qa_runtime": 5.0497,
"eval_commonsense_qa_samples_per_second": 19.803,
"eval_commonsense_qa_steps_per_second": 0.792,
"epoch": 0.88,
"step": 50
},
{
"eval_trivia_qa_loss": 0.6967568397521973,
"eval_trivia_qa_score": -0.24871157109737396,
"eval_trivia_qa_brier_score": 0.24871157109737396,
"eval_trivia_qa_average_probability": 0.5072451829910278,
"eval_trivia_qa_accuracy": 0.5,
"eval_trivia_qa_probabilities": [
0.5316422581672668,
0.432894766330719,
0.5510445833206177,
0.4671865999698639,
0.5174410343170166,
0.5549404621124268,
0.4334378242492676,
0.39600223302841187,
0.528852641582489,
0.42653071880340576,
0.5234686136245728,
0.42779213190078735,
0.5071923732757568,
0.4844723641872406,
0.5264027714729309,
0.439650297164917,
0.553399384021759,
0.5268545746803284,
0.42666733264923096,
0.47586703300476074,
0.4965582489967346,
0.34971049427986145,
0.6940780878067017,
0.5421137809753418,
0.48094817996025085,
0.43245548009872437,
0.5597365498542786,
0.4554421305656433,
0.4893034100532532,
0.5171124935150146,
0.5146493911743164,
0.495728462934494,
0.4713417887687683,
0.6222578287124634,
0.498948872089386,
0.40657901763916016,
0.527847409248352,
0.7099897265434265,
0.5247665047645569,
0.4212891161441803,
0.5828162431716919,
0.540741503238678,
0.5784919857978821,
0.5161374807357788,
0.4517030119895935,
0.6503217220306396,
0.5777159929275513,
0.36965277791023254,
0.4844502806663513,
0.5828574299812317,
0.6145827174186707,
0.3802032172679901,
0.7286924123764038,
0.4977825880050659,
0.5343342423439026,
0.5175969004631042,
0.48933130502700806,
0.5237868428230286,
0.39064183831214905,
0.5096392035484314,
0.5159491896629333,
0.3979681730270386,
0.6354355812072754,
0.5407276153564453,
0.4297018051147461,
0.5711097121238708,
0.6063168048858643,
0.4898560345172882,
0.43474793434143066,
0.5227797627449036,
0.4176207184791565,
0.6359185576438904,
0.4207601547241211,
0.4994364082813263,
0.5145835876464844,
0.37604933977127075,
0.5790051817893982,
0.45702680945396423,
0.48608124256134033,
0.47564682364463806,
0.6444876194000244,
0.4628547132015228,
0.4236738979816437,
0.5284913778305054,
0.3784801661968231,
0.6344289779663086,
0.523414671421051,
0.4968854784965515,
0.4976916015148163,
0.4860800504684448,
0.47529691457748413,
0.4533490836620331,
0.4189370274543762,
0.6194345355033875,
0.484923779964447,
0.5832501649856567,
0.6392970681190491,
0.509278416633606,
0.5340873003005981,
0.4593735337257385
],
"eval_trivia_qa_runtime": 6.9799,
"eval_trivia_qa_samples_per_second": 14.327,
"eval_trivia_qa_steps_per_second": 0.573,
"epoch": 0.88,
"step": 50
},
{
"loss": 0.6601,
"learning_rate": 0.0002,
"epoch": 0.89,
"step": 51
},
{
"loss": 0.6454,
"learning_rate": 0.0002,
"epoch": 0.91,
"step": 52
},
{
"loss": 0.6547,
"learning_rate": 0.0002,
"epoch": 0.93,
"step": 53
},
{
"loss": 0.7068,
"learning_rate": 0.0002,
"epoch": 0.95,
"step": 54
},
{
"loss": 0.6629,
"learning_rate": 0.0002,
"epoch": 0.96,
"step": 55
},
{
"loss": 0.7136,
"learning_rate": 0.0002,
"epoch": 0.98,
"step": 56
},
{
"loss": 0.6437,
"learning_rate": 0.0002,
"epoch": 1.0,
"step": 57
},
{
"loss": 0.5198,
"learning_rate": 0.0002,
"epoch": 1.02,
"step": 58
},
{
"loss": 0.4605,
"learning_rate": 0.0002,
"epoch": 1.04,
"step": 59
},
{
"loss": 0.4861,
"learning_rate": 0.0002,
"epoch": 1.05,
"step": 60
},
{
"loss": 0.483,
"learning_rate": 0.0002,
"epoch": 1.07,
"step": 61
},
{
"loss": 0.5581,
"learning_rate": 0.0002,
"epoch": 1.09,
"step": 62
},
{
"loss": 0.4564,
"learning_rate": 0.0002,
"epoch": 1.11,
"step": 63
},
{
"loss": 0.4056,
"learning_rate": 0.0002,
"epoch": 1.12,
"step": 64
},
{
"loss": 0.4167,
"learning_rate": 0.0002,
"epoch": 1.14,
"step": 65
},
{
"loss": 0.7253,
"learning_rate": 0.0002,
"epoch": 1.16,
"step": 66
},
{
"loss": 0.4609,
"learning_rate": 0.0002,
"epoch": 1.18,
"step": 67
},
{
"loss": 0.5302,
"learning_rate": 0.0002,
"epoch": 1.19,
"step": 68
},
{
"loss": 0.6734,
"learning_rate": 0.0002,
"epoch": 1.21,
"step": 69
},
{
"loss": 0.5224,
"learning_rate": 0.0002,
"epoch": 1.23,
"step": 70
},
{
"loss": 0.4326,
"learning_rate": 0.0002,
"epoch": 1.25,
"step": 71
},
{
"loss": 0.5751,
"learning_rate": 0.0002,
"epoch": 1.26,
"step": 72
},
{
"loss": 0.5653,
"learning_rate": 0.0002,
"epoch": 1.28,
"step": 73
},
{
"loss": 0.4586,
"learning_rate": 0.0002,
"epoch": 1.3,
"step": 74
},
{
"loss": 0.4466,
"learning_rate": 0.0002,
"epoch": 1.32,
"step": 75
},
{
"eval_commonsense_qa_loss": 1.0953351259231567,
"eval_commonsense_qa_score": -0.3630787134170532,
"eval_commonsense_qa_brier_score": 0.3630787134170532,
"eval_commonsense_qa_average_probability": 0.46727100014686584,
"eval_commonsense_qa_accuracy": 0.47,
"eval_commonsense_qa_probabilities": [
0.6413354277610779,
0.6861792206764221,
0.9139598608016968,
0.3129615783691406,
0.4858136773109436,
0.3958660662174225,
0.015349932946264744,
0.03121619112789631,
0.3732204735279083,
0.5287988781929016,
0.9410275220870972,
0.4859994351863861,
0.00484914006665349,
0.06006177142262459,
0.016101302579045296,
0.25591790676116943,
0.9382115006446838,
0.7453911304473877,
0.49844464659690857,
0.11961045861244202,
0.21799755096435547,
0.22540347278118134,
0.6641191840171814,
0.3415297865867615,
0.9549143314361572,
0.9577696323394775,
0.3804933726787567,
0.5659024715423584,
0.6399015188217163,
0.6054954528808594,
0.16932412981987,
0.24352750182151794,
0.5880881547927856,
0.10066776722669601,
0.2192084938287735,
0.08628320693969727,
0.33604246377944946,
0.27943921089172363,
0.3816390931606293,
0.7643809914588928,
0.7210124731063843,
0.5266180038452148,
0.204672709107399,
0.5603741407394409,
0.6822576522827148,
0.19128814339637756,
0.20498374104499817,
0.09368855506181717,
0.6106529235839844,
0.8263741135597229,
0.6096042394638062,
0.3612224757671356,
0.3293110430240631,
0.4423503577709198,
0.1176837608218193,
0.1601405143737793,
0.3888700008392334,
0.8851404786109924,
0.578056275844574,
0.2645527720451355,
0.9431173205375671,
0.8939391374588013,
0.9614054560661316,
0.15922978520393372,
0.5092063546180725,
0.7479695081710815,
0.2555106580257416,
0.9339343905448914,
0.5020483136177063,
0.2890869677066803,
0.25056713819503784,
0.07140284031629562,
0.6142622828483582,
0.6608532667160034,
0.8618139028549194,
0.8793162107467651,
0.5890896916389465,
0.8838966488838196,
0.4716782569885254,
0.398629367351532,
0.8436529040336609,
0.3973071277141571,
0.06621242314577103,
0.5546140670776367,
0.16331911087036133,
0.606509804725647,
0.6033067107200623,
0.2906716763973236,
0.009353392757475376,
0.061065930873155594,
0.8538415431976318,
0.5957357883453369,
0.7623599767684937,
0.6353998780250549,
0.6718450784683228,
0.06562834978103638,
0.4774560332298279,
0.20875515043735504,
0.2653869092464447,
0.7864195108413696
],
"eval_commonsense_qa_runtime": 5.0892,
"eval_commonsense_qa_samples_per_second": 19.649,
"eval_commonsense_qa_steps_per_second": 0.786,
"epoch": 1.32,
"step": 75
},
{
"eval_trivia_qa_loss": 0.7178796529769897,
"eval_trivia_qa_score": -0.25323230028152466,
"eval_trivia_qa_brier_score": 0.25323230028152466,
"eval_trivia_qa_average_probability": 0.5258346796035767,
"eval_trivia_qa_accuracy": 0.56,
"eval_trivia_qa_probabilities": [
0.6001328825950623,
0.43153026700019836,
0.7303735613822937,
0.4660419523715973,
0.6144695281982422,
0.44857344031333923,
0.13023461401462555,
0.21497581899166107,
0.7541935443878174,
0.3930618166923523,
0.5923050045967102,
0.24356049299240112,
0.5985594391822815,
0.44693607091903687,
0.5665600299835205,
0.37776947021484375,
0.6056777834892273,
0.6600034832954407,
0.506766676902771,
0.3960486650466919,
0.6760654449462891,
0.325588583946228,
0.84026700258255,
0.6310024857521057,
0.41549575328826904,
0.48468682169914246,
0.30185914039611816,
0.4738370478153229,
0.40581652522087097,
0.538021445274353,
0.40905657410621643,
0.5703331232070923,
0.5543002486228943,
0.6837395429611206,
0.7139797806739807,
0.47817718982696533,
0.5026704668998718,
0.6653541922569275,
0.5143362283706665,
0.33803310990333557,
0.5520848035812378,
0.7011743783950806,
0.8836812973022461,
0.6092294454574585,
0.2784689962863922,
0.7969092726707458,
0.6553284525871277,
0.5456470251083374,
0.44695183634757996,
0.548007071018219,
0.5680496692657471,
0.41621971130371094,
0.830963134765625,
0.7895110845565796,
0.4964308738708496,
0.3645930290222168,
0.5637221932411194,
0.44506263732910156,
0.2878129184246063,
0.5505443811416626,
0.494486927986145,
0.36506953835487366,
0.8300395011901855,
0.7011266946792603,
0.348209947347641,
0.626815140247345,
0.7101113796234131,
0.6857610940933228,
0.4028257131576538,
0.6243658065795898,
0.15364059805870056,
0.7464989423751831,
0.5398872494697571,
0.4613312780857086,
0.40898871421813965,
0.2252740114927292,
0.7652521133422852,
0.2925339937210083,
0.4388081431388855,
0.5058109164237976,
0.8158372044563293,
0.7546953558921814,
0.2097131758928299,
0.5340847373008728,
0.25506791472435,
0.537187397480011,
0.592171311378479,
0.5829126238822937,
0.7708747982978821,
0.43876922130584717,
0.45161038637161255,
0.4637928605079651,
0.32175856828689575,
0.6439905762672424,
0.35225629806518555,
0.5903196930885315,
0.8497107625007629,
0.530751645565033,
0.5443508625030518,
0.38598567247390747
],
"eval_trivia_qa_runtime": 6.9586,
"eval_trivia_qa_samples_per_second": 14.371,
"eval_trivia_qa_steps_per_second": 0.575,
"epoch": 1.32,
"step": 75
},
{
"loss": 0.6169,
"learning_rate": 0.0002,
"epoch": 1.33,
"step": 76
},
{
"loss": 0.4915,
"learning_rate": 0.0002,
"epoch": 1.35,
"step": 77
},
{
"loss": 0.3843,
"learning_rate": 0.0002,
"epoch": 1.37,
"step": 78
},
{
"loss": 0.3157,
"learning_rate": 0.0002,
"epoch": 1.39,
"step": 79
},
{
"loss": 0.4288,
"learning_rate": 0.0002,
"epoch": 1.4,
"step": 80
},
{
"loss": 0.4981,
"learning_rate": 0.0002,
"epoch": 1.42,
"step": 81
},
{
"loss": 0.3743,
"learning_rate": 0.0002,
"epoch": 1.44,
"step": 82
},
{
"loss": 0.4731,
"learning_rate": 0.0002,
"epoch": 1.46,
"step": 83
},
{
"loss": 0.5496,
"learning_rate": 0.0002,
"epoch": 1.47,
"step": 84
},
{
"loss": 0.5248,
"learning_rate": 0.0002,
"epoch": 1.49,
"step": 85
},
{
"loss": 0.3161,
"learning_rate": 0.0002,
"epoch": 1.51,
"step": 86
},
{
"loss": 0.4111,
"learning_rate": 0.0002,
"epoch": 1.53,
"step": 87
},
{
"loss": 0.6771,
"learning_rate": 0.0002,
"epoch": 1.54,
"step": 88
},
{
"loss": 0.3828,
"learning_rate": 0.0002,
"epoch": 1.56,
"step": 89
},
{
"loss": 0.5683,
"learning_rate": 0.0002,
"epoch": 1.58,
"step": 90
},
{
"loss": 0.3922,
"learning_rate": 0.0002,
"epoch": 1.6,
"step": 91
},
{
"loss": 0.3031,
"learning_rate": 0.0002,
"epoch": 1.61,
"step": 92
},
{
"loss": 0.4393,
"learning_rate": 0.0002,
"epoch": 1.63,
"step": 93
},
{
"loss": 0.5812,
"learning_rate": 0.0002,
"epoch": 1.65,
"step": 94
},
{
"loss": 0.3824,
"learning_rate": 0.0002,
"epoch": 1.67,
"step": 95
},
{
"loss": 0.438,
"learning_rate": 0.0002,
"epoch": 1.68,
"step": 96
},
{
"loss": 0.4176,
"learning_rate": 0.0002,
"epoch": 1.7,
"step": 97
},
{
"loss": 0.6267,
"learning_rate": 0.0002,
"epoch": 1.72,
"step": 98
},
{
"loss": 0.3414,
"learning_rate": 0.0002,
"epoch": 1.74,
"step": 99
},
{
"loss": 0.347,
"learning_rate": 0.0002,
"epoch": 1.75,
"step": 100
},
{
"eval_commonsense_qa_loss": 1.1523878574371338,
"eval_commonsense_qa_score": -0.396799772977829,
"eval_commonsense_qa_brier_score": 0.396799772977829,
"eval_commonsense_qa_average_probability": 0.4321339428424835,
"eval_commonsense_qa_accuracy": 0.37,
"eval_commonsense_qa_probabilities": [
0.5133974552154541,
0.19955140352249146,
0.12726318836212158,
0.2381817102432251,
0.3061620891094208,
0.4511687159538269,
0.031751181930303574,
0.04943360388278961,
0.0996626764535904,
0.7441174387931824,
0.9300025105476379,
0.7207792401313782,
0.00975093338638544,
0.05811166390776634,
0.01687975972890854,
0.6953206658363342,
0.8796400427818298,
0.8870823979377747,
0.7761392593383789,
0.2514343857765198,
0.7701127529144287,
0.8666774034500122,
0.9593339562416077,
0.39567831158638,
0.9251853227615356,
0.956170380115509,
0.5061752796173096,
0.3293822705745697,
0.29882335662841797,
0.24122630059719086,
0.23124100267887115,
0.27979356050491333,
0.7157328724861145,
0.08215536922216415,
0.899575412273407,
0.2522304952144623,
0.3705737888813019,
0.41751977801322937,
0.28646501898765564,
0.7097967267036438,
0.5926461219787598,
0.5077208280563354,
0.1959858238697052,
0.39983221888542175,
0.27750301361083984,
0.40524882078170776,
0.383944571018219,
0.677962064743042,
0.36190852522850037,
0.9803575873374939,
0.7406795620918274,
0.16878190636634827,
0.12593944370746613,
0.17838409543037415,
0.3623591661453247,
0.13244767487049103,
0.3018617331981659,
0.8912862539291382,
0.5757622122764587,
0.3221542537212372,
0.871790885925293,
0.8111267685890198,
0.7601278424263,
0.2897421419620514,
0.7055788040161133,
0.5388709306716919,
0.40989163517951965,
0.4253596067428589,
0.0980086550116539,
0.38305872678756714,
0.3864794969558716,
0.19545888900756836,
0.6149375438690186,
0.4320893883705139,
0.8303354382514954,
0.24265244603157043,
0.13059014081954956,
0.2409209907054901,
0.16863232851028442,
0.040597084909677505,
0.37653404474258423,
0.1172540932893753,
0.21813638508319855,
0.34870657324790955,
0.1515800803899765,
0.12445370852947235,
0.37387892603874207,
0.2328016608953476,
0.5161333680152893,
0.2136731743812561,
0.8099603056907654,
0.08686374127864838,
0.3295922577381134,
0.4922294318675995,
0.5317037105560303,
0.21833769977092743,
0.40150973200798035,
0.6546261310577393,
0.5003038048744202,
0.8764225840568542
],
"eval_commonsense_qa_runtime": 5.0489,
"eval_commonsense_qa_samples_per_second": 19.806,
"eval_commonsense_qa_steps_per_second": 0.792,
"epoch": 1.75,
"step": 100
},
{
"eval_trivia_qa_loss": 0.6934054493904114,
"eval_trivia_qa_score": -0.24079853296279907,
"eval_trivia_qa_brier_score": 0.24079853296279907,
"eval_trivia_qa_average_probability": 0.5418477654457092,
"eval_trivia_qa_accuracy": 0.65,
"eval_trivia_qa_probabilities": [
0.6125680208206177,
0.5168354511260986,
0.8149361610412598,
0.6358698010444641,
0.6971873641014099,
0.43288537859916687,
0.07869447767734528,
0.3557625114917755,
0.5231017470359802,
0.6738269925117493,
0.6492495536804199,
0.2834418714046478,
0.32658106088638306,
0.5844005942344666,
0.6492470502853394,
0.7314375042915344,
0.6042511463165283,
0.6276711821556091,
0.576631486415863,
0.386127233505249,
0.6349095702171326,
0.5288182497024536,
0.729836642742157,
0.9058018326759338,
0.5136997103691101,
0.5128685832023621,
0.3874431252479553,
0.5315768122673035,
0.5235913991928101,
0.3415122330188751,
0.6356922388076782,
0.6705965995788574,
0.6255914568901062,
0.5862753987312317,
0.7883551716804504,
0.44843554496765137,
0.7369633913040161,
0.661395788192749,
0.4538457691669464,
0.30895760655403137,
0.7524657249450684,
0.7141547799110413,
0.7967507839202881,
0.5967062711715698,
0.43285977840423584,
0.7750150561332703,
0.5969040393829346,
0.4807274043560028,
0.5844646096229553,
0.33085906505584717,
0.3553105592727661,
0.43021368980407715,
0.6605834364891052,
0.7358336448669434,
0.5644637942314148,
0.49804893136024475,
0.2971603572368622,
0.5024909377098083,
0.3082197606563568,
0.707648515701294,
0.4596785306930542,
0.28572654724121094,
0.7529811859130859,
0.7141095995903015,
0.38188186287879944,
0.5586990714073181,
0.7269975543022156,
0.5075417757034302,
0.48148077726364136,
0.6394890546798706,
0.09881781041622162,
0.7859747409820557,
0.5184196829795837,
0.19931602478027344,
0.6356649398803711,
0.21822792291641235,
0.6228802800178528,
0.13568221032619476,
0.3445551097393036,
0.5373251438140869,
0.830248236656189,
0.7611830830574036,
0.4023577868938446,
0.537299394607544,
0.2600027620792389,
0.6060169339179993,
0.6491380333900452,
0.4489743113517761,
0.7347836494445801,
0.3445013165473938,
0.5936704874038696,
0.6110560297966003,
0.36522528529167175,
0.6722674369812012,
0.40544307231903076,
0.5686450004577637,
0.9408103823661804,
0.5205425024032593,
0.5648188591003418,
0.35458648204803467
],
"eval_trivia_qa_runtime": 6.9847,
"eval_trivia_qa_samples_per_second": 14.317,
"eval_trivia_qa_steps_per_second": 0.573,
"epoch": 1.75,
"step": 100
},
{
"train_runtime": 538.1504,
"train_samples_per_second": 5.946,
"train_steps_per_second": 0.186,
"total_flos": 0.0,
"train_loss": 0.593766241967678,
"epoch": 1.75,
"step": 100
}
]]