[ { "loss": 0.8525, "learning_rate": 0.0002, "epoch": 0.02, "step": 1 }, { "loss": 0.6634, "learning_rate": 0.0002, "epoch": 0.04, "step": 2 }, { "loss": 0.7302, "learning_rate": 0.0002, "epoch": 0.05, "step": 3 }, { "loss": 0.7507, "learning_rate": 0.0002, "epoch": 0.07, "step": 4 }, { "loss": 0.7715, "learning_rate": 0.0002, "epoch": 0.09, "step": 5 }, { "loss": 0.7064, "learning_rate": 0.0002, "epoch": 0.11, "step": 6 }, { "loss": 0.7124, "learning_rate": 0.0002, "epoch": 0.12, "step": 7 }, { "loss": 0.7223, "learning_rate": 0.0002, "epoch": 0.14, "step": 8 }, { "loss": 0.6888, "learning_rate": 0.0002, "epoch": 0.16, "step": 9 }, { "loss": 0.7404, "learning_rate": 0.0002, "epoch": 0.18, "step": 10 }, { "loss": 0.6575, "learning_rate": 0.0002, "epoch": 0.19, "step": 11 }, { "loss": 0.6896, "learning_rate": 0.0002, "epoch": 0.21, "step": 12 }, { "loss": 0.6336, "learning_rate": 0.0002, "epoch": 0.23, "step": 13 }, { "loss": 0.6474, "learning_rate": 0.0002, "epoch": 0.25, "step": 14 }, { "loss": 0.6993, "learning_rate": 0.0002, "epoch": 0.26, "step": 15 }, { "loss": 0.6219, "learning_rate": 0.0002, "epoch": 0.28, "step": 16 }, { "loss": 0.7434, "learning_rate": 0.0002, "epoch": 0.3, "step": 17 }, { "loss": 0.7488, "learning_rate": 0.0002, "epoch": 0.32, "step": 18 }, { "loss": 0.7165, "learning_rate": 0.0002, "epoch": 0.33, "step": 19 }, { "loss": 0.7253, "learning_rate": 0.0002, "epoch": 0.35, "step": 20 }, { "loss": 0.6785, "learning_rate": 0.0002, "epoch": 0.37, "step": 21 }, { "loss": 0.7281, "learning_rate": 0.0002, "epoch": 0.39, "step": 22 }, { "loss": 0.7451, "learning_rate": 0.0002, "epoch": 0.4, "step": 23 }, { "loss": 0.6618, "learning_rate": 0.0002, "epoch": 0.42, "step": 24 }, { "loss": 0.6452, "learning_rate": 0.0002, "epoch": 0.44, "step": 25 }, { "eval_commonsense_qa_loss": 0.7914362549781799, "eval_commonsense_qa_score": -0.29111722111701965, "eval_commonsense_qa_brier_score": 0.29111722111701965, "eval_commonsense_qa_average_probability": 0.47185489535331726, "eval_commonsense_qa_accuracy": 0.39, "eval_commonsense_qa_probabilities": [ 0.3736628592014313, 0.41951867938041687, 0.40974757075309753, 0.43108245730400085, 0.39417126774787903, 0.36827391386032104, 0.3217296004295349, 0.4527994692325592, 0.44031405448913574, 0.5761461853981018, 0.6323458552360535, 0.5307877659797668, 0.3072277307510376, 0.35649847984313965, 0.24212150275707245, 0.4968399107456207, 0.7714088559150696, 0.4102341830730438, 0.5640316605567932, 0.5279660820960999, 0.26524466276168823, 0.46642741560935974, 0.667880654335022, 0.5137766003608704, 0.7010161280632019, 0.5712583065032959, 0.5321078300476074, 0.4304255247116089, 0.25722262263298035, 0.49681854248046875, 0.39940145611763, 0.38804712891578674, 0.413534939289093, 0.36740759015083313, 0.3980674743652344, 0.3686825931072235, 0.45741069316864014, 0.4793001115322113, 0.5210141539573669, 0.5084275603294373, 0.5377230644226074, 0.4399715065956116, 0.5001020431518555, 0.4058927297592163, 0.4787653684616089, 0.35228681564331055, 0.3852904140949249, 0.38124364614486694, 0.49779626727104187, 0.47046318650245667, 0.45446330308914185, 0.49293678998947144, 0.5469354391098022, 0.4111570715904236, 0.6435064077377319, 0.3415040075778961, 0.557502269744873, 0.6114356517791748, 0.4695909917354584, 0.3526020348072052, 0.6928711533546448, 0.688827633857727, 0.6876643896102905, 0.4979974031448364, 0.5267980098724365, 0.5480849146842957, 0.5159767866134644, 0.6435903310775757, 0.48293471336364746, 0.4974452555179596, 0.39486047625541687, 0.3298123776912689, 0.39702242612838745, 0.2585730254650116, 0.5650836229324341, 0.6514415144920349, 0.567798376083374, 0.5929319858551025, 0.35160502791404724, 0.39691317081451416, 0.589564859867096, 0.4809286296367645, 0.3265831768512726, 0.34647417068481445, 0.29951584339141846, 0.540679931640625, 0.477059006690979, 0.42304784059524536, 0.3258381187915802, 0.33986833691596985, 0.6030049920082092, 0.5700676441192627, 0.5453739166259766, 0.5489950180053711, 0.5006292462348938, 0.5012415647506714, 0.4897501468658447, 0.405569463968277, 0.41794145107269287, 0.6035709977149963 ], "eval_commonsense_qa_runtime": 5.072, "eval_commonsense_qa_samples_per_second": 19.716, "eval_commonsense_qa_steps_per_second": 0.789, "epoch": 0.44, "step": 25 }, { "eval_trivia_qa_loss": 0.7058285474777222, "eval_trivia_qa_score": -0.2509709298610687, "eval_trivia_qa_brier_score": 0.2509709298610687, "eval_trivia_qa_average_probability": 0.508730411529541, "eval_trivia_qa_accuracy": 0.56, "eval_trivia_qa_probabilities": [ 0.6108027696609497, 0.47548747062683105, 0.5868535041809082, 0.32189613580703735, 0.4967602789402008, 0.5013786554336548, 0.582642138004303, 0.5476276278495789, 0.6525075435638428, 0.3721744418144226, 0.46289870142936707, 0.5139896273612976, 0.6440446376800537, 0.5662510395050049, 0.5801640748977661, 0.27492260932922363, 0.5860297679901123, 0.4890085756778717, 0.6448793411254883, 0.5299046039581299, 0.3838443160057068, 0.2839445173740387, 0.5129914879798889, 0.6394293308258057, 0.5504795908927917, 0.6178440451622009, 0.35971587896347046, 0.45417338609695435, 0.42903023958206177, 0.559572160243988, 0.5979811549186707, 0.4994199275970459, 0.5579876899719238, 0.7093907594680786, 0.4760594666004181, 0.4218994379043579, 0.5057868361473083, 0.6060559153556824, 0.5037559866905212, 0.5046707391738892, 0.42048025131225586, 0.6409589648246765, 0.5305330753326416, 0.3996846079826355, 0.4467884302139282, 0.6522667407989502, 0.4101843535900116, 0.3745155334472656, 0.5663012266159058, 0.48798033595085144, 0.6159585118293762, 0.47460001707077026, 0.6789429187774658, 0.3534046411514282, 0.5315006375312805, 0.4756757915019989, 0.6186851263046265, 0.5154322981834412, 0.37286680936813354, 0.5532978177070618, 0.45223331451416016, 0.41775307059288025, 0.5951501727104187, 0.640890896320343, 0.4332258701324463, 0.6166451573371887, 0.626221239566803, 0.545464277267456, 0.36058875918388367, 0.4321788251399994, 0.4153941571712494, 0.5391730070114136, 0.396107017993927, 0.5979107618331909, 0.5979316830635071, 0.40809130668640137, 0.6226964592933655, 0.38245290517807007, 0.587360680103302, 0.44519785046577454, 0.659287691116333, 0.570533812046051, 0.3212454915046692, 0.5290343761444092, 0.3304256200790405, 0.5141531229019165, 0.4628876745700836, 0.4035484790802002, 0.5565099716186523, 0.5873599052429199, 0.5347287058830261, 0.4785170257091522, 0.44041702151298523, 0.4931771457195282, 0.5670301914215088, 0.5181891322135925, 0.6944608688354492, 0.4809003174304962, 0.5023355484008789, 0.3813191056251526 ], "eval_trivia_qa_runtime": 6.9532, "eval_trivia_qa_samples_per_second": 14.382, "eval_trivia_qa_steps_per_second": 0.575, "epoch": 0.44, "step": 25 }, { "loss": 0.6417, "learning_rate": 0.0002, "epoch": 0.46, "step": 26 }, { "loss": 0.696, "learning_rate": 0.0002, "epoch": 0.47, "step": 27 }, { "loss": 0.688, "learning_rate": 0.0002, "epoch": 0.49, "step": 28 }, { "loss": 0.6599, "learning_rate": 0.0002, "epoch": 0.51, "step": 29 }, { "loss": 0.726, "learning_rate": 0.0002, "epoch": 0.53, "step": 30 }, { "loss": 0.6367, "learning_rate": 0.0002, "epoch": 0.54, "step": 31 }, { "loss": 0.7377, "learning_rate": 0.0002, "epoch": 0.56, "step": 32 }, { "loss": 0.5971, "learning_rate": 0.0002, "epoch": 0.58, "step": 33 }, { "loss": 0.7561, "learning_rate": 0.0002, "epoch": 0.6, "step": 34 }, { "loss": 0.6453, "learning_rate": 0.0002, "epoch": 0.61, "step": 35 }, { "loss": 0.6648, "learning_rate": 0.0002, "epoch": 0.63, "step": 36 }, { "loss": 0.6106, "learning_rate": 0.0002, "epoch": 0.65, "step": 37 }, { "loss": 0.6421, "learning_rate": 0.0002, "epoch": 0.67, "step": 38 }, { "loss": 0.6576, "learning_rate": 0.0002, "epoch": 0.68, "step": 39 }, { "loss": 0.6426, "learning_rate": 0.0002, "epoch": 0.7, "step": 40 }, { "loss": 0.6443, "learning_rate": 0.0002, "epoch": 0.72, "step": 41 }, { "loss": 0.6842, "learning_rate": 0.0002, "epoch": 0.74, "step": 42 }, { "loss": 0.6407, "learning_rate": 0.0002, "epoch": 0.75, "step": 43 }, { "loss": 0.7051, "learning_rate": 0.0002, "epoch": 0.77, "step": 44 }, { "loss": 0.6729, "learning_rate": 0.0002, "epoch": 0.79, "step": 45 }, { "loss": 0.6876, "learning_rate": 0.0002, "epoch": 0.81, "step": 46 }, { "loss": 0.6711, "learning_rate": 0.0002, "epoch": 0.82, "step": 47 }, { "loss": 0.6613, "learning_rate": 0.0002, "epoch": 0.84, "step": 48 }, { "loss": 0.6044, "learning_rate": 0.0002, "epoch": 0.86, "step": 49 }, { "loss": 0.5769, "learning_rate": 0.0002, "epoch": 0.88, "step": 50 }, { "eval_commonsense_qa_loss": 0.7653241157531738, "eval_commonsense_qa_score": -0.2791268527507782, "eval_commonsense_qa_brier_score": 0.2791268527507782, "eval_commonsense_qa_average_probability": 0.48683643341064453, "eval_commonsense_qa_accuracy": 0.42, "eval_commonsense_qa_probabilities": [ 0.4020081162452698, 0.48435282707214355, 0.43516698479652405, 0.4614918529987335, 0.4766950011253357, 0.4323585629463196, 0.30255478620529175, 0.29709967970848083, 0.545942485332489, 0.5383279323577881, 0.6857153177261353, 0.48378315567970276, 0.16650518774986267, 0.2533150315284729, 0.19820035994052887, 0.44554558396339417, 0.7646204233169556, 0.6391469240188599, 0.5909577012062073, 0.5142195224761963, 0.46391570568084717, 0.4665536880493164, 0.5840488076210022, 0.43470215797424316, 0.7169751524925232, 0.7758315205574036, 0.5787749886512756, 0.5941563844680786, 0.46830788254737854, 0.5470129251480103, 0.46815431118011475, 0.526231050491333, 0.5080574750900269, 0.29635000228881836, 0.47670942544937134, 0.32638877630233765, 0.4723879396915436, 0.6098498702049255, 0.4592856168746948, 0.5818286538124084, 0.5597203373908997, 0.3799402415752411, 0.3624171018600464, 0.40649276971817017, 0.42219263315200806, 0.3799329996109009, 0.3508457839488983, 0.46101444959640503, 0.4670948088169098, 0.6448565721511841, 0.4881596565246582, 0.44159600138664246, 0.6058750748634338, 0.590934157371521, 0.46451979875564575, 0.37275078892707825, 0.5106647610664368, 0.6377049684524536, 0.5288455486297607, 0.4052494764328003, 0.6817584037780762, 0.682870090007782, 0.7456340193748474, 0.4100780189037323, 0.5996410846710205, 0.46194082498550415, 0.4774899482727051, 0.6287940740585327, 0.442749559879303, 0.38106194138526917, 0.3868009150028229, 0.34707480669021606, 0.5333090424537659, 0.4054989218711853, 0.6355715990066528, 0.5217822790145874, 0.3845539689064026, 0.45426151156425476, 0.43503573536872864, 0.4364105463027954, 0.6591715812683105, 0.5792595744132996, 0.22389499843120575, 0.4098465144634247, 0.3524221181869507, 0.42123866081237793, 0.48706841468811035, 0.41214796900749207, 0.22283338010311127, 0.28352028131484985, 0.688373863697052, 0.5961286425590515, 0.6101290583610535, 0.5565056204795837, 0.5037069320678711, 0.41246306896209717, 0.6470851302146912, 0.5888364315032959, 0.4137541353702545, 0.6086077690124512 ], "eval_commonsense_qa_runtime": 5.0497, "eval_commonsense_qa_samples_per_second": 19.803, "eval_commonsense_qa_steps_per_second": 0.792, "epoch": 0.88, "step": 50 }, { "eval_trivia_qa_loss": 0.6967568397521973, "eval_trivia_qa_score": -0.24871157109737396, "eval_trivia_qa_brier_score": 0.24871157109737396, "eval_trivia_qa_average_probability": 0.5072451829910278, "eval_trivia_qa_accuracy": 0.5, "eval_trivia_qa_probabilities": [ 0.5316422581672668, 0.432894766330719, 0.5510445833206177, 0.4671865999698639, 0.5174410343170166, 0.5549404621124268, 0.4334378242492676, 0.39600223302841187, 0.528852641582489, 0.42653071880340576, 0.5234686136245728, 0.42779213190078735, 0.5071923732757568, 0.4844723641872406, 0.5264027714729309, 0.439650297164917, 0.553399384021759, 0.5268545746803284, 0.42666733264923096, 0.47586703300476074, 0.4965582489967346, 0.34971049427986145, 0.6940780878067017, 0.5421137809753418, 0.48094817996025085, 0.43245548009872437, 0.5597365498542786, 0.4554421305656433, 0.4893034100532532, 0.5171124935150146, 0.5146493911743164, 0.495728462934494, 0.4713417887687683, 0.6222578287124634, 0.498948872089386, 0.40657901763916016, 0.527847409248352, 0.7099897265434265, 0.5247665047645569, 0.4212891161441803, 0.5828162431716919, 0.540741503238678, 0.5784919857978821, 0.5161374807357788, 0.4517030119895935, 0.6503217220306396, 0.5777159929275513, 0.36965277791023254, 0.4844502806663513, 0.5828574299812317, 0.6145827174186707, 0.3802032172679901, 0.7286924123764038, 0.4977825880050659, 0.5343342423439026, 0.5175969004631042, 0.48933130502700806, 0.5237868428230286, 0.39064183831214905, 0.5096392035484314, 0.5159491896629333, 0.3979681730270386, 0.6354355812072754, 0.5407276153564453, 0.4297018051147461, 0.5711097121238708, 0.6063168048858643, 0.4898560345172882, 0.43474793434143066, 0.5227797627449036, 0.4176207184791565, 0.6359185576438904, 0.4207601547241211, 0.4994364082813263, 0.5145835876464844, 0.37604933977127075, 0.5790051817893982, 0.45702680945396423, 0.48608124256134033, 0.47564682364463806, 0.6444876194000244, 0.4628547132015228, 0.4236738979816437, 0.5284913778305054, 0.3784801661968231, 0.6344289779663086, 0.523414671421051, 0.4968854784965515, 0.4976916015148163, 0.4860800504684448, 0.47529691457748413, 0.4533490836620331, 0.4189370274543762, 0.6194345355033875, 0.484923779964447, 0.5832501649856567, 0.6392970681190491, 0.509278416633606, 0.5340873003005981, 0.4593735337257385 ], "eval_trivia_qa_runtime": 6.9799, "eval_trivia_qa_samples_per_second": 14.327, "eval_trivia_qa_steps_per_second": 0.573, "epoch": 0.88, "step": 50 }, { "loss": 0.6601, "learning_rate": 0.0002, "epoch": 0.89, "step": 51 }, { "loss": 0.6454, "learning_rate": 0.0002, "epoch": 0.91, "step": 52 }, { "loss": 0.6547, "learning_rate": 0.0002, "epoch": 0.93, "step": 53 }, { "loss": 0.7068, "learning_rate": 0.0002, "epoch": 0.95, "step": 54 }, { "loss": 0.6629, "learning_rate": 0.0002, "epoch": 0.96, "step": 55 }, { "loss": 0.7136, "learning_rate": 0.0002, "epoch": 0.98, "step": 56 }, { "loss": 0.6437, "learning_rate": 0.0002, "epoch": 1.0, "step": 57 }, { "loss": 0.5198, "learning_rate": 0.0002, "epoch": 1.02, "step": 58 }, { "loss": 0.4605, "learning_rate": 0.0002, "epoch": 1.04, "step": 59 }, { "loss": 0.4861, "learning_rate": 0.0002, "epoch": 1.05, "step": 60 }, { "loss": 0.483, "learning_rate": 0.0002, "epoch": 1.07, "step": 61 }, { "loss": 0.5581, "learning_rate": 0.0002, "epoch": 1.09, "step": 62 }, { "loss": 0.4564, "learning_rate": 0.0002, "epoch": 1.11, "step": 63 }, { "loss": 0.4056, "learning_rate": 0.0002, "epoch": 1.12, "step": 64 }, { "loss": 0.4167, "learning_rate": 0.0002, "epoch": 1.14, "step": 65 }, { "loss": 0.7253, "learning_rate": 0.0002, "epoch": 1.16, "step": 66 }, { "loss": 0.4609, "learning_rate": 0.0002, "epoch": 1.18, "step": 67 }, { "loss": 0.5302, "learning_rate": 0.0002, "epoch": 1.19, "step": 68 }, { "loss": 0.6734, "learning_rate": 0.0002, "epoch": 1.21, "step": 69 }, { "loss": 0.5224, "learning_rate": 0.0002, "epoch": 1.23, "step": 70 }, { "loss": 0.4326, "learning_rate": 0.0002, "epoch": 1.25, "step": 71 }, { "loss": 0.5751, "learning_rate": 0.0002, "epoch": 1.26, "step": 72 }, { "loss": 0.5653, "learning_rate": 0.0002, "epoch": 1.28, "step": 73 }, { "loss": 0.4586, "learning_rate": 0.0002, "epoch": 1.3, "step": 74 }, { "loss": 0.4466, "learning_rate": 0.0002, "epoch": 1.32, "step": 75 }, { "eval_commonsense_qa_loss": 1.0953351259231567, "eval_commonsense_qa_score": -0.3630787134170532, "eval_commonsense_qa_brier_score": 0.3630787134170532, "eval_commonsense_qa_average_probability": 0.46727100014686584, "eval_commonsense_qa_accuracy": 0.47, "eval_commonsense_qa_probabilities": [ 0.6413354277610779, 0.6861792206764221, 0.9139598608016968, 0.3129615783691406, 0.4858136773109436, 0.3958660662174225, 0.015349932946264744, 0.03121619112789631, 0.3732204735279083, 0.5287988781929016, 0.9410275220870972, 0.4859994351863861, 0.00484914006665349, 0.06006177142262459, 0.016101302579045296, 0.25591790676116943, 0.9382115006446838, 0.7453911304473877, 0.49844464659690857, 0.11961045861244202, 0.21799755096435547, 0.22540347278118134, 0.6641191840171814, 0.3415297865867615, 0.9549143314361572, 0.9577696323394775, 0.3804933726787567, 0.5659024715423584, 0.6399015188217163, 0.6054954528808594, 0.16932412981987, 0.24352750182151794, 0.5880881547927856, 0.10066776722669601, 0.2192084938287735, 0.08628320693969727, 0.33604246377944946, 0.27943921089172363, 0.3816390931606293, 0.7643809914588928, 0.7210124731063843, 0.5266180038452148, 0.204672709107399, 0.5603741407394409, 0.6822576522827148, 0.19128814339637756, 0.20498374104499817, 0.09368855506181717, 0.6106529235839844, 0.8263741135597229, 0.6096042394638062, 0.3612224757671356, 0.3293110430240631, 0.4423503577709198, 0.1176837608218193, 0.1601405143737793, 0.3888700008392334, 0.8851404786109924, 0.578056275844574, 0.2645527720451355, 0.9431173205375671, 0.8939391374588013, 0.9614054560661316, 0.15922978520393372, 0.5092063546180725, 0.7479695081710815, 0.2555106580257416, 0.9339343905448914, 0.5020483136177063, 0.2890869677066803, 0.25056713819503784, 0.07140284031629562, 0.6142622828483582, 0.6608532667160034, 0.8618139028549194, 0.8793162107467651, 0.5890896916389465, 0.8838966488838196, 0.4716782569885254, 0.398629367351532, 0.8436529040336609, 0.3973071277141571, 0.06621242314577103, 0.5546140670776367, 0.16331911087036133, 0.606509804725647, 0.6033067107200623, 0.2906716763973236, 0.009353392757475376, 0.061065930873155594, 0.8538415431976318, 0.5957357883453369, 0.7623599767684937, 0.6353998780250549, 0.6718450784683228, 0.06562834978103638, 0.4774560332298279, 0.20875515043735504, 0.2653869092464447, 0.7864195108413696 ], "eval_commonsense_qa_runtime": 5.0892, "eval_commonsense_qa_samples_per_second": 19.649, "eval_commonsense_qa_steps_per_second": 0.786, "epoch": 1.32, "step": 75 }, { "eval_trivia_qa_loss": 0.7178796529769897, "eval_trivia_qa_score": -0.25323230028152466, "eval_trivia_qa_brier_score": 0.25323230028152466, "eval_trivia_qa_average_probability": 0.5258346796035767, "eval_trivia_qa_accuracy": 0.56, "eval_trivia_qa_probabilities": [ 0.6001328825950623, 0.43153026700019836, 0.7303735613822937, 0.4660419523715973, 0.6144695281982422, 0.44857344031333923, 0.13023461401462555, 0.21497581899166107, 0.7541935443878174, 0.3930618166923523, 0.5923050045967102, 0.24356049299240112, 0.5985594391822815, 0.44693607091903687, 0.5665600299835205, 0.37776947021484375, 0.6056777834892273, 0.6600034832954407, 0.506766676902771, 0.3960486650466919, 0.6760654449462891, 0.325588583946228, 0.84026700258255, 0.6310024857521057, 0.41549575328826904, 0.48468682169914246, 0.30185914039611816, 0.4738370478153229, 0.40581652522087097, 0.538021445274353, 0.40905657410621643, 0.5703331232070923, 0.5543002486228943, 0.6837395429611206, 0.7139797806739807, 0.47817718982696533, 0.5026704668998718, 0.6653541922569275, 0.5143362283706665, 0.33803310990333557, 0.5520848035812378, 0.7011743783950806, 0.8836812973022461, 0.6092294454574585, 0.2784689962863922, 0.7969092726707458, 0.6553284525871277, 0.5456470251083374, 0.44695183634757996, 0.548007071018219, 0.5680496692657471, 0.41621971130371094, 0.830963134765625, 0.7895110845565796, 0.4964308738708496, 0.3645930290222168, 0.5637221932411194, 0.44506263732910156, 0.2878129184246063, 0.5505443811416626, 0.494486927986145, 0.36506953835487366, 0.8300395011901855, 0.7011266946792603, 0.348209947347641, 0.626815140247345, 0.7101113796234131, 0.6857610940933228, 0.4028257131576538, 0.6243658065795898, 0.15364059805870056, 0.7464989423751831, 0.5398872494697571, 0.4613312780857086, 0.40898871421813965, 0.2252740114927292, 0.7652521133422852, 0.2925339937210083, 0.4388081431388855, 0.5058109164237976, 0.8158372044563293, 0.7546953558921814, 0.2097131758928299, 0.5340847373008728, 0.25506791472435, 0.537187397480011, 0.592171311378479, 0.5829126238822937, 0.7708747982978821, 0.43876922130584717, 0.45161038637161255, 0.4637928605079651, 0.32175856828689575, 0.6439905762672424, 0.35225629806518555, 0.5903196930885315, 0.8497107625007629, 0.530751645565033, 0.5443508625030518, 0.38598567247390747 ], "eval_trivia_qa_runtime": 6.9586, "eval_trivia_qa_samples_per_second": 14.371, "eval_trivia_qa_steps_per_second": 0.575, "epoch": 1.32, "step": 75 }, { "loss": 0.6169, "learning_rate": 0.0002, "epoch": 1.33, "step": 76 }, { "loss": 0.4915, "learning_rate": 0.0002, "epoch": 1.35, "step": 77 }, { "loss": 0.3843, "learning_rate": 0.0002, "epoch": 1.37, "step": 78 }, { "loss": 0.3157, "learning_rate": 0.0002, "epoch": 1.39, "step": 79 }, { "loss": 0.4288, "learning_rate": 0.0002, "epoch": 1.4, "step": 80 }, { "loss": 0.4981, "learning_rate": 0.0002, "epoch": 1.42, "step": 81 }, { "loss": 0.3743, "learning_rate": 0.0002, "epoch": 1.44, "step": 82 }, { "loss": 0.4731, "learning_rate": 0.0002, "epoch": 1.46, "step": 83 }, { "loss": 0.5496, "learning_rate": 0.0002, "epoch": 1.47, "step": 84 }, { "loss": 0.5248, "learning_rate": 0.0002, "epoch": 1.49, "step": 85 }, { "loss": 0.3161, "learning_rate": 0.0002, "epoch": 1.51, "step": 86 }, { "loss": 0.4111, "learning_rate": 0.0002, "epoch": 1.53, "step": 87 }, { "loss": 0.6771, "learning_rate": 0.0002, "epoch": 1.54, "step": 88 }, { "loss": 0.3828, "learning_rate": 0.0002, "epoch": 1.56, "step": 89 }, { "loss": 0.5683, "learning_rate": 0.0002, "epoch": 1.58, "step": 90 }, { "loss": 0.3922, "learning_rate": 0.0002, "epoch": 1.6, "step": 91 }, { "loss": 0.3031, "learning_rate": 0.0002, "epoch": 1.61, "step": 92 }, { "loss": 0.4393, "learning_rate": 0.0002, "epoch": 1.63, "step": 93 }, { "loss": 0.5812, "learning_rate": 0.0002, "epoch": 1.65, "step": 94 }, { "loss": 0.3824, "learning_rate": 0.0002, "epoch": 1.67, "step": 95 }, { "loss": 0.438, "learning_rate": 0.0002, "epoch": 1.68, "step": 96 }, { "loss": 0.4176, "learning_rate": 0.0002, "epoch": 1.7, "step": 97 }, { "loss": 0.6267, "learning_rate": 0.0002, "epoch": 1.72, "step": 98 }, { "loss": 0.3414, "learning_rate": 0.0002, "epoch": 1.74, "step": 99 }, { "loss": 0.347, "learning_rate": 0.0002, "epoch": 1.75, "step": 100 }, { "eval_commonsense_qa_loss": 1.1523878574371338, "eval_commonsense_qa_score": -0.396799772977829, "eval_commonsense_qa_brier_score": 0.396799772977829, "eval_commonsense_qa_average_probability": 0.4321339428424835, "eval_commonsense_qa_accuracy": 0.37, "eval_commonsense_qa_probabilities": [ 0.5133974552154541, 0.19955140352249146, 0.12726318836212158, 0.2381817102432251, 0.3061620891094208, 0.4511687159538269, 0.031751181930303574, 0.04943360388278961, 0.0996626764535904, 0.7441174387931824, 0.9300025105476379, 0.7207792401313782, 0.00975093338638544, 0.05811166390776634, 0.01687975972890854, 0.6953206658363342, 0.8796400427818298, 0.8870823979377747, 0.7761392593383789, 0.2514343857765198, 0.7701127529144287, 0.8666774034500122, 0.9593339562416077, 0.39567831158638, 0.9251853227615356, 0.956170380115509, 0.5061752796173096, 0.3293822705745697, 0.29882335662841797, 0.24122630059719086, 0.23124100267887115, 0.27979356050491333, 0.7157328724861145, 0.08215536922216415, 0.899575412273407, 0.2522304952144623, 0.3705737888813019, 0.41751977801322937, 0.28646501898765564, 0.7097967267036438, 0.5926461219787598, 0.5077208280563354, 0.1959858238697052, 0.39983221888542175, 0.27750301361083984, 0.40524882078170776, 0.383944571018219, 0.677962064743042, 0.36190852522850037, 0.9803575873374939, 0.7406795620918274, 0.16878190636634827, 0.12593944370746613, 0.17838409543037415, 0.3623591661453247, 0.13244767487049103, 0.3018617331981659, 0.8912862539291382, 0.5757622122764587, 0.3221542537212372, 0.871790885925293, 0.8111267685890198, 0.7601278424263, 0.2897421419620514, 0.7055788040161133, 0.5388709306716919, 0.40989163517951965, 0.4253596067428589, 0.0980086550116539, 0.38305872678756714, 0.3864794969558716, 0.19545888900756836, 0.6149375438690186, 0.4320893883705139, 0.8303354382514954, 0.24265244603157043, 0.13059014081954956, 0.2409209907054901, 0.16863232851028442, 0.040597084909677505, 0.37653404474258423, 0.1172540932893753, 0.21813638508319855, 0.34870657324790955, 0.1515800803899765, 0.12445370852947235, 0.37387892603874207, 0.2328016608953476, 0.5161333680152893, 0.2136731743812561, 0.8099603056907654, 0.08686374127864838, 0.3295922577381134, 0.4922294318675995, 0.5317037105560303, 0.21833769977092743, 0.40150973200798035, 0.6546261310577393, 0.5003038048744202, 0.8764225840568542 ], "eval_commonsense_qa_runtime": 5.0489, "eval_commonsense_qa_samples_per_second": 19.806, "eval_commonsense_qa_steps_per_second": 0.792, "epoch": 1.75, "step": 100 }, { "eval_trivia_qa_loss": 0.6934054493904114, "eval_trivia_qa_score": -0.24079853296279907, "eval_trivia_qa_brier_score": 0.24079853296279907, "eval_trivia_qa_average_probability": 0.5418477654457092, "eval_trivia_qa_accuracy": 0.65, "eval_trivia_qa_probabilities": [ 0.6125680208206177, 0.5168354511260986, 0.8149361610412598, 0.6358698010444641, 0.6971873641014099, 0.43288537859916687, 0.07869447767734528, 0.3557625114917755, 0.5231017470359802, 0.6738269925117493, 0.6492495536804199, 0.2834418714046478, 0.32658106088638306, 0.5844005942344666, 0.6492470502853394, 0.7314375042915344, 0.6042511463165283, 0.6276711821556091, 0.576631486415863, 0.386127233505249, 0.6349095702171326, 0.5288182497024536, 0.729836642742157, 0.9058018326759338, 0.5136997103691101, 0.5128685832023621, 0.3874431252479553, 0.5315768122673035, 0.5235913991928101, 0.3415122330188751, 0.6356922388076782, 0.6705965995788574, 0.6255914568901062, 0.5862753987312317, 0.7883551716804504, 0.44843554496765137, 0.7369633913040161, 0.661395788192749, 0.4538457691669464, 0.30895760655403137, 0.7524657249450684, 0.7141547799110413, 0.7967507839202881, 0.5967062711715698, 0.43285977840423584, 0.7750150561332703, 0.5969040393829346, 0.4807274043560028, 0.5844646096229553, 0.33085906505584717, 0.3553105592727661, 0.43021368980407715, 0.6605834364891052, 0.7358336448669434, 0.5644637942314148, 0.49804893136024475, 0.2971603572368622, 0.5024909377098083, 0.3082197606563568, 0.707648515701294, 0.4596785306930542, 0.28572654724121094, 0.7529811859130859, 0.7141095995903015, 0.38188186287879944, 0.5586990714073181, 0.7269975543022156, 0.5075417757034302, 0.48148077726364136, 0.6394890546798706, 0.09881781041622162, 0.7859747409820557, 0.5184196829795837, 0.19931602478027344, 0.6356649398803711, 0.21822792291641235, 0.6228802800178528, 0.13568221032619476, 0.3445551097393036, 0.5373251438140869, 0.830248236656189, 0.7611830830574036, 0.4023577868938446, 0.537299394607544, 0.2600027620792389, 0.6060169339179993, 0.6491380333900452, 0.4489743113517761, 0.7347836494445801, 0.3445013165473938, 0.5936704874038696, 0.6110560297966003, 0.36522528529167175, 0.6722674369812012, 0.40544307231903076, 0.5686450004577637, 0.9408103823661804, 0.5205425024032593, 0.5648188591003418, 0.35458648204803467 ], "eval_trivia_qa_runtime": 6.9847, "eval_trivia_qa_samples_per_second": 14.317, "eval_trivia_qa_steps_per_second": 0.573, "epoch": 1.75, "step": 100 }, { "train_runtime": 538.1504, "train_samples_per_second": 5.946, "train_steps_per_second": 0.186, "total_flos": 0.0, "train_loss": 0.593766241967678, "epoch": 1.75, "step": 100 } ]]