Muennighoff's picture
Add
2aef930
{"GEM/web_nlg_en": {"0": {"PALM_prompt": {"bleu": 0.2736384503031292, "bleu_stderr": 0.013761131588040647, "rouge1_fmeasure": 0.0841242506256365, "rouge1_fmeasure_stderr": 0.0018270976968369269, "rouge1_precision": 0.06743672908113135, "rouge1_precision_stderr": 0.002638252273936629, "rouge1_recall": 0.24234907953353363, "rouge1_recall_stderr": 0.004536576899675782, "rouge2_fmeasure": 0.03514871886969164, "rouge2_fmeasure_stderr": 0.0011146408262655305, "rouge2_precision": 0.027017029568202292, "rouge2_precision_stderr": 0.0014828730907298778, "rouge2_recall": 0.10299041387162453, "rouge2_recall_stderr": 0.0029846982659956075, "rougeL_fmeasure": 0.07737030634527048, "rougeL_fmeasure_stderr": 0.0016531878395951427, "rougeL_precision": 0.06194176556417661, "rougeL_precision_stderr": 0.002469243942808823, "rougeL_recall": 0.2264836217634177, "rougeL_recall_stderr": 0.00433268038244018, "rougeLsum_fmeasure": 0.07803274538661605, "rougeLsum_fmeasure_stderr": 0.0016759567284788054, "rougeLsum_precision": 0.06286672427102329, "rougeLsum_precision_stderr": 0.002521739438447608, "rougeLsum_recall": 0.22562189058095142, "rougeLsum_recall_stderr": 0.004207794303094894}}, "1": {"PALM_prompt": {"bleu": 0.3280132335844989, "bleu_stderr": 0.023397817639014704, "rouge1_fmeasure": 0.08616965529606423, "rouge1_fmeasure_stderr": 0.0017333332075431082, "rouge1_precision": 0.0644491309312055, "rouge1_precision_stderr": 0.002168252601283438, "rouge1_recall": 0.26932324402478697, "rouge1_recall_stderr": 0.004869448854108838, "rouge2_fmeasure": 0.0348005109489672, "rouge2_fmeasure_stderr": 0.001057061308651459, "rouge2_precision": 0.026688837851098213, "rouge2_precision_stderr": 0.00144137777314208, "rouge2_recall": 0.11232023057734815, "rouge2_recall_stderr": 0.0031229809372560016, "rougeL_fmeasure": 0.07766746661115345, "rougeL_fmeasure_stderr": 0.0015488126211683285, "rougeL_precision": 0.05835398948355422, "rougeL_precision_stderr": 0.002014413516440926, "rougeL_recall": 0.24510976411983082, "rougeL_recall_stderr": 0.004441288545500777, "rougeLsum_fmeasure": 0.07959776724269638, "rougeLsum_fmeasure_stderr": 0.0015771133465866834, "rougeLsum_precision": 0.05982295830374904, "rougeLsum_precision_stderr": 0.0020397591398194034, "rougeLsum_recall": 0.24879869028857263, "rougeLsum_recall_stderr": 0.004418589131657239}}, "2": {"PALM_prompt": {"bleu": 0.34638515482577353, "bleu_stderr": 0.015291163912320053, "rouge1_fmeasure": 0.0880916672062598, "rouge1_fmeasure_stderr": 0.0018284068254863554, "rouge1_precision": 0.06391106809448908, "rouge1_precision_stderr": 0.0022299798579838675, "rouge1_recall": 0.27425504532607553, "rouge1_recall_stderr": 0.004799759636672703, "rouge2_fmeasure": 0.036072521136247385, "rouge2_fmeasure_stderr": 0.0011506150158658523, "rouge2_precision": 0.026175962829444736, "rouge2_precision_stderr": 0.0013416093394333784, "rouge2_recall": 0.11550668310546741, "rouge2_recall_stderr": 0.003058036972217679, "rougeL_fmeasure": 0.07881332955128942, "rougeL_fmeasure_stderr": 0.001602085026744074, "rougeL_precision": 0.05713027372206721, "rougeL_precision_stderr": 0.001991481877156521, "rougeL_recall": 0.24716315799999466, "rougeL_recall_stderr": 0.0043125243751206115, "rougeLsum_fmeasure": 0.08169534672440366, "rougeLsum_fmeasure_stderr": 0.0016715880286721457, "rougeLsum_precision": 0.059367895890295445, "rougeLsum_precision_stderr": 0.0020685069924175725, "rougeLsum_recall": 0.25477280658969237, "rougeLsum_recall_stderr": 0.004397648802195207}}, "3": {"PALM_prompt": {"bleu": 0.3704624770003497, "bleu_stderr": 0.03638159942943428, "rouge1_fmeasure": 0.08559857536619786, "rouge1_fmeasure_stderr": 0.0016363130752602227, "rouge1_precision": 0.05982634280444436, "rouge1_precision_stderr": 0.0018001262758726789, "rouge1_recall": 0.2777138419634843, "rouge1_recall_stderr": 0.004952916631808108, "rouge2_fmeasure": 0.03505346120993315, "rouge2_fmeasure_stderr": 0.0010090465429173114, "rouge2_precision": 0.02360102097448928, "rouge2_precision_stderr": 0.000854243652157208, "rouge2_recall": 0.12042632696557598, "rouge2_recall_stderr": 0.0032866446467497232, "rougeL_fmeasure": 0.07654509455251524, "rougeL_fmeasure_stderr": 0.0014639780726145254, "rougeL_precision": 0.05383446823399059, "rougeL_precision_stderr": 0.0017041071876981678, "rougeL_recall": 0.2500627435472675, "rougeL_recall_stderr": 0.0044852293234451, "rougeLsum_fmeasure": 0.07947542844329104, "rougeLsum_fmeasure_stderr": 0.0015233699868767465, "rougeLsum_precision": 0.055977403732155415, "rougeLsum_precision_stderr": 0.0017529438408709402, "rougeLsum_recall": 0.25674832287427335, "rougeLsum_recall_stderr": 0.00451833760788113}}, "4": {"PALM_prompt": {"bleu": 0.34948975698170615, "bleu_stderr": 0.023821938767836993, "rouge1_fmeasure": 0.08584051874793835, "rouge1_fmeasure_stderr": 0.0015622002417028488, "rouge1_precision": 0.058510872569050076, "rouge1_precision_stderr": 0.0014977652612347687, "rouge1_recall": 0.27839964342296464, "rouge1_recall_stderr": 0.004925790540801674, "rouge2_fmeasure": 0.03527561542480963, "rouge2_fmeasure_stderr": 0.0009617481477463297, "rouge2_precision": 0.023403315924946395, "rouge2_precision_stderr": 0.0007924389946385025, "rouge2_recall": 0.12173985471099795, "rouge2_recall_stderr": 0.003270953240241298, "rougeL_fmeasure": 0.07682570075935022, "rougeL_fmeasure_stderr": 0.0013946187591393226, "rougeL_precision": 0.052502534519531324, "rougeL_precision_stderr": 0.0013726859531974134, "rougeL_recall": 0.24948504716127445, "rougeL_recall_stderr": 0.004413042414587561, "rougeLsum_fmeasure": 0.079801865581312, "rougeLsum_fmeasure_stderr": 0.0014350152199570983, "rougeLsum_precision": 0.05451930264143289, "rougeLsum_precision_stderr": 0.0014056245713133853, "rougeLsum_recall": 0.25804555072515556, "rougeLsum_recall_stderr": 0.004485018866773874}}, "5": {"PALM_prompt": {"bleu": 0.3520654919870807, "bleu_stderr": 0.027609734870300613, "rouge1_fmeasure": 0.0854739749972663, "rouge1_fmeasure_stderr": 0.0016018030269853596, "rouge1_precision": 0.06066641398573986, "rouge1_precision_stderr": 0.0019391074427159668, "rouge1_recall": 0.27264418002989005, "rouge1_recall_stderr": 0.004798703480922437, "rouge2_fmeasure": 0.034951021231894634, "rouge2_fmeasure_stderr": 0.0009990590843116766, "rouge2_precision": 0.02575665330833014, "rouge2_precision_stderr": 0.0013664015126702937, "rouge2_recall": 0.11728661099632723, "rouge2_recall_stderr": 0.003159030312730633, "rougeL_fmeasure": 0.0757905577263458, "rougeL_fmeasure_stderr": 0.0014241979978356996, "rougeL_precision": 0.05442877601650985, "rougeL_precision_stderr": 0.0018622354837109474, "rougeL_recall": 0.24215142547294755, "rougeL_recall_stderr": 0.0042165672086959295, "rougeLsum_fmeasure": 0.07931691685317459, "rougeLsum_fmeasure_stderr": 0.001477735740337954, "rougeLsum_precision": 0.05680657817739951, "rougeLsum_precision_stderr": 0.0018856743142604026, "rougeLsum_recall": 0.2521970662394448, "rougeLsum_recall_stderr": 0.004360316207858167}}}, "GEM/wiki_lingua_en": {"0": {"tldr_en": {"bleu": 1.1939811006947518, "bleu_stderr": 0.05144840709887267, "rouge1_fmeasure": 0.15984310029967955, "rouge1_fmeasure_stderr": 0.0017964555443949755, "rouge1_precision": 0.14402662945431702, "rouge1_precision_stderr": 0.0019964636983908577, "rouge1_recall": 0.224835520834695, "rouge1_recall_stderr": 0.002534663076636594, "rouge2_fmeasure": 0.026484542226918303, "rouge2_fmeasure_stderr": 0.0007149276960787613, "rouge2_precision": 0.024253651892450524, "rouge2_precision_stderr": 0.0007830043238366567, "rouge2_recall": 0.03807915014193239, "rouge2_recall_stderr": 0.0011514900025684483, "rougeL_fmeasure": 0.12162866229386085, "rougeL_fmeasure_stderr": 0.0012526718135058654, "rougeL_precision": 0.1089736641128092, "rougeL_precision_stderr": 0.0014490175204519933, "rougeL_recall": 0.1749701135473759, "rougeL_recall_stderr": 0.0019676023959095718, "rougeLsum_fmeasure": 0.14948770603973066, "rougeLsum_fmeasure_stderr": 0.001676038166549879, "rougeLsum_precision": 0.1346760996841278, "rougeLsum_precision_stderr": 0.0018704153662604697, "rougeLsum_recall": 0.21077414533420868, "rougeLsum_recall_stderr": 0.0023879704709676495}}, "1": {"tldr_en": {"bleu": 1.329387173898731, "bleu_stderr": 0.050959742820502744, "rouge1_fmeasure": 0.16323809480458262, "rouge1_fmeasure_stderr": 0.0017781665840047188, "rouge1_precision": 0.1421779647141786, "rouge1_precision_stderr": 0.00183899601619, "rouge1_recall": 0.2328221252469704, "rouge1_recall_stderr": 0.002523117541816358, "rouge2_fmeasure": 0.026405299879801242, "rouge2_fmeasure_stderr": 0.0007513499596621286, "rouge2_precision": 0.023068734244902384, "rouge2_precision_stderr": 0.0006924316781711188, "rouge2_recall": 0.038394150548631906, "rouge2_recall_stderr": 0.0012070561863604995, "rougeL_fmeasure": 0.12039709037310402, "rougeL_fmeasure_stderr": 0.0012183558463707484, "rougeL_precision": 0.10407135199844923, "rougeL_precision_stderr": 0.0012739274090963085, "rougeL_recall": 0.1756629066815182, "rougeL_recall_stderr": 0.001920166706499377, "rougeLsum_fmeasure": 0.15379828585730648, "rougeLsum_fmeasure_stderr": 0.0016655511240505972, "rougeLsum_precision": 0.13389561548404935, "rougeLsum_precision_stderr": 0.0017270516951590925, "rougeLsum_recall": 0.21986093946231267, "rougeLsum_recall_stderr": 0.0023909852118580966}}, "2": {"tldr_en": {"bleu": 1.4424517914266726, "bleu_stderr": 0.07020911999324617, "rouge1_fmeasure": 0.16632065161606457, "rouge1_fmeasure_stderr": 0.0018066810527815245, "rouge1_precision": 0.1457832945742694, "rouge1_precision_stderr": 0.0019709762483393995, "rouge1_recall": 0.23637305672174788, "rouge1_recall_stderr": 0.00247992475878217, "rouge2_fmeasure": 0.028604389417761292, "rouge2_fmeasure_stderr": 0.0007628849403399381, "rouge2_precision": 0.025484778469339338, "rouge2_precision_stderr": 0.0007890273465689312, "rouge2_recall": 0.04140023492730905, "rouge2_recall_stderr": 0.0012184459681475824, "rougeL_fmeasure": 0.1241823537201758, "rougeL_fmeasure_stderr": 0.0012458496339153408, "rougeL_precision": 0.10769050653186357, "rougeL_precision_stderr": 0.0013738979743412702, "rougeL_recall": 0.1810403836066358, "rougeL_recall_stderr": 0.0019346598459529787, "rougeLsum_fmeasure": 0.15640138097648992, "rougeLsum_fmeasure_stderr": 0.0016797214333223785, "rougeLsum_precision": 0.1369600906660757, "rougeLsum_precision_stderr": 0.0018450558019738083, "rougeLsum_recall": 0.22309275958929797, "rougeLsum_recall_stderr": 0.00234205408760469}}, "3": {"tldr_en": {"bleu": 1.6327965384480934, "bleu_stderr": 0.04784558757070599, "rouge1_fmeasure": 0.143979322470228, "rouge1_fmeasure_stderr": 0.0020322874462190108, "rouge1_precision": 0.12998655695663616, "rouge1_precision_stderr": 0.002189516833129907, "rouge1_recall": 0.20496486516154497, "rouge1_recall_stderr": 0.0028659172041743037, "rouge2_fmeasure": 0.025681636061568994, "rouge2_fmeasure_stderr": 0.0007575885700225844, "rouge2_precision": 0.02321033760008198, "rouge2_precision_stderr": 0.0007517668617932092, "rouge2_recall": 0.03739800165990479, "rouge2_recall_stderr": 0.0012436961876240002, "rougeL_fmeasure": 0.10744111532164179, "rougeL_fmeasure_stderr": 0.0014289232021387648, "rougeL_precision": 0.09639136628498862, "rougeL_precision_stderr": 0.0015654498649897994, "rougeL_recall": 0.15704796853631195, "rougeL_recall_stderr": 0.002245229184123941, "rougeLsum_fmeasure": 0.13580783797212667, "rougeLsum_fmeasure_stderr": 0.0019069485763797925, "rougeLsum_precision": 0.12253923603559544, "rougeLsum_precision_stderr": 0.0020589899658542553, "rougeLsum_recall": 0.19382819078757685, "rougeLsum_recall_stderr": 0.0027171877182869237}}, "4": {"tldr_en": {"bleu": 0.3975830375151671, "bleu_stderr": 0.03517002110590927, "rouge1_fmeasure": 0.04993869263777386, "rouge1_fmeasure_stderr": 0.0017400482796179053, "rouge1_precision": 0.046448930876018805, "rouge1_precision_stderr": 0.0018035348802430623, "rouge1_recall": 0.07357293605788026, "rouge1_recall_stderr": 0.0025697179083559174, "rouge2_fmeasure": 0.009405169337583598, "rouge2_fmeasure_stderr": 0.0005257744049594801, "rouge2_precision": 0.009115547013068444, "rouge2_precision_stderr": 0.0007080420784229646, "rouge2_recall": 0.014379490534632422, "rouge2_recall_stderr": 0.0008642671865072303, "rougeL_fmeasure": 0.038166867834712785, "rougeL_fmeasure_stderr": 0.0012986554235132886, "rougeL_precision": 0.03565948935943829, "rougeL_precision_stderr": 0.0013979177010115587, "rougeL_recall": 0.057422168791446994, "rougeL_recall_stderr": 0.0020177366914654244, "rougeLsum_fmeasure": 0.0469149420555244, "rougeLsum_fmeasure_stderr": 0.0016335234715811244, "rougeLsum_precision": 0.04362589861222556, "rougeLsum_precision_stderr": 0.0016994783987957273, "rougeLsum_recall": 0.06935466728288939, "rougeLsum_recall_stderr": 0.002427541920224267}}, "5": {"tldr_en": {"bleu": 6.239939007315392e-07, "bleu_stderr": 9.71466408645211e-07, "rouge1_fmeasure": 0.007568370913695297, "rouge1_fmeasure_stderr": 0.0007491159620591852, "rouge1_precision": 0.007172892267383475, "rouge1_precision_stderr": 0.0007917929568286087, "rouge1_recall": 0.011671122997395015, "rouge1_recall_stderr": 0.0011606516791878689, "rouge2_fmeasure": 0.0013965027873250944, "rouge2_fmeasure_stderr": 0.00020572985862442358, "rouge2_precision": 0.001264178384729475, "rouge2_precision_stderr": 0.0001966473650926985, "rouge2_recall": 0.0024796221258986603, "rouge2_recall_stderr": 0.0004192114120392164, "rougeL_fmeasure": 0.005865552021821947, "rougeL_fmeasure_stderr": 0.0005564517319441601, "rougeL_precision": 0.005490417768289501, "rougeL_precision_stderr": 0.0005849801172714558, "rougeL_recall": 0.009413328535258628, "rougeL_recall_stderr": 0.0009398460308935353, "rougeLsum_fmeasure": 0.007131866271390784, "rougeLsum_fmeasure_stderr": 0.0007049678056309991, "rougeLsum_precision": 0.006712756393605032, "rougeLsum_precision_stderr": 0.0007345666949798625, "rougeLsum_recall": 0.010998399946717396, "rougeLsum_recall_stderr": 0.0010932046581100556}}}, "e2e_nlg_cleaned": {"0": {"generate_text_restaurant": {"bleu": 0.9812514400249457, "bleu_stderr": 0.05562262752722095, "rouge1_fmeasure": 0.10900954926081764, "rouge1_fmeasure_stderr": 0.0020004132814706306, "rouge1_precision": 0.15847815457785414, "rouge1_precision_stderr": 0.004328803941964481, "rouge1_recall": 0.12742700108402852, "rouge1_recall_stderr": 0.0023559640853185737, "rouge2_fmeasure": 0.027201662156796255, "rouge2_fmeasure_stderr": 0.000788791139157471, "rouge2_precision": 0.07159611132449242, "rouge2_precision_stderr": 0.0037286287454660018, "rouge2_recall": 0.028548308692682663, "rouge2_recall_stderr": 0.0008068071456433969, "rougeL_fmeasure": 0.08788250933957609, "rougeL_fmeasure_stderr": 0.0015590214437749034, "rougeL_precision": 0.1334388407711523, "rougeL_precision_stderr": 0.003974193073139387, "rougeL_recall": 0.10337061722487395, "rougeL_recall_stderr": 0.0018911584688758604, "rougeLsum_fmeasure": 0.09356044304033936, "rougeLsum_fmeasure_stderr": 0.001722161996359038, "rougeLsum_precision": 0.14150177498276953, "rougeLsum_precision_stderr": 0.004129595856079377, "rougeLsum_recall": 0.10875548432324901, "rougeLsum_recall_stderr": 0.002033382683937447}}, "1": {"generate_text_restaurant": {"bleu": 5.2253192440366405, "bleu_stderr": 0.07140167205007719, "rouge1_fmeasure": 0.26325944037191484, "rouge1_fmeasure_stderr": 0.0022748072352886776, "rouge1_precision": 0.3815177071515503, "rouge1_precision_stderr": 0.005218371241177884, "rouge1_recall": 0.3106318538579719, "rouge1_recall_stderr": 0.0031963478818675803, "rouge2_fmeasure": 0.10002146341400579, "rouge2_fmeasure_stderr": 0.0014693572942374512, "rouge2_precision": 0.20178279286083556, "rouge2_precision_stderr": 0.005294112759514037, "rouge2_recall": 0.11654879964306347, "rouge2_recall_stderr": 0.001824032288290023, "rougeL_fmeasure": 0.2113652311205057, "rougeL_fmeasure_stderr": 0.0017419352055642414, "rougeL_precision": 0.324114703856428, "rougeL_precision_stderr": 0.005097258650525136, "rougeL_recall": 0.2500017558247758, "rougeL_recall_stderr": 0.0025922878822160645, "rougeLsum_fmeasure": 0.22354194196380525, "rougeLsum_fmeasure_stderr": 0.0019987653462617975, "rougeLsum_precision": 0.3390145444380815, "rougeLsum_precision_stderr": 0.005172645689386997, "rougeLsum_recall": 0.26174975224096514, "rougeLsum_recall_stderr": 0.002787496997673839}}, "2": {"generate_text_restaurant": {"bleu": 5.968462167589187, "bleu_stderr": 0.0809413455934123, "rouge1_fmeasure": 0.29114774014783334, "rouge1_fmeasure_stderr": 0.0021518045305416187, "rouge1_precision": 0.37561041209802154, "rouge1_precision_stderr": 0.004803576540487985, "rouge1_recall": 0.34542246645230873, "rouge1_recall_stderr": 0.0028469806255225247, "rouge2_fmeasure": 0.11837013500076157, "rouge2_fmeasure_stderr": 0.0014809679532815788, "rouge2_precision": 0.18697031703703257, "rouge2_precision_stderr": 0.004365267354030048, "rouge2_recall": 0.13946536473639037, "rouge2_recall_stderr": 0.0018038930024966585, "rougeL_fmeasure": 0.2378485997813391, "rougeL_fmeasure_stderr": 0.0017083954437583943, "rougeL_precision": 0.315951536990457, "rougeL_precision_stderr": 0.004519636510616847, "rougeL_recall": 0.2848990694431196, "rougeL_recall_stderr": 0.0024528350275542597, "rougeLsum_fmeasure": 0.24624266553405377, "rougeLsum_fmeasure_stderr": 0.001958228700135443, "rougeLsum_precision": 0.32795875851147216, "rougeLsum_precision_stderr": 0.004687306327421472, "rougeLsum_recall": 0.29047623226002345, "rougeLsum_recall_stderr": 0.0025636131138164543}}, "3": {"generate_text_restaurant": {"bleu": 6.100413535925033, "bleu_stderr": 0.10204084214961892, "rouge1_fmeasure": 0.29715275676346536, "rouge1_fmeasure_stderr": 0.0020593689131373845, "rouge1_precision": 0.35577144203191385, "rouge1_precision_stderr": 0.004537003722998761, "rouge1_recall": 0.3647244934358361, "rouge1_recall_stderr": 0.002747523679173933, "rouge2_fmeasure": 0.12236707177425063, "rouge2_fmeasure_stderr": 0.001437321932186944, "rouge2_precision": 0.17035987156923135, "rouge2_precision_stderr": 0.0038407536951717567, "rouge2_recall": 0.15028814529506035, "rouge2_recall_stderr": 0.0018136946483250282, "rougeL_fmeasure": 0.24251170955511187, "rougeL_fmeasure_stderr": 0.0016065436945657693, "rougeL_precision": 0.29603811661828844, "rougeL_precision_stderr": 0.004152196775629119, "rougeL_recall": 0.3006722948935132, "rougeL_recall_stderr": 0.0023586910132368166, "rougeLsum_fmeasure": 0.2502378170722232, "rougeLsum_fmeasure_stderr": 0.0018582144974208202, "rougeLsum_precision": 0.30701171887463186, "rougeLsum_precision_stderr": 0.0043364855185944965, "rougeLsum_recall": 0.30602796565632656, "rougeLsum_recall_stderr": 0.0024654434831691296}}, "4": {"generate_text_restaurant": {"bleu": 6.302528506929725, "bleu_stderr": 0.09159160557761398, "rouge1_fmeasure": 0.3106389862425593, "rouge1_fmeasure_stderr": 0.0019745864967604964, "rouge1_precision": 0.3457184925802193, "rouge1_precision_stderr": 0.004160231582604038, "rouge1_recall": 0.38873799656965313, "rouge1_recall_stderr": 0.002639030454229803, "rouge2_fmeasure": 0.12862161964342028, "rouge2_fmeasure_stderr": 0.001433312603510002, "rouge2_precision": 0.1613981344463189, "rouge2_precision_stderr": 0.0034319902981585207, "rouge2_recall": 0.16100746129847524, "rouge2_recall_stderr": 0.0017978172805353685, "rougeL_fmeasure": 0.25227586868839813, "rougeL_fmeasure_stderr": 0.001559219635105758, "rougeL_precision": 0.28411370893647675, "rougeL_precision_stderr": 0.003743798776985757, "rougeL_recall": 0.319390176337282, "rougeL_recall_stderr": 0.0023152107019332466, "rougeLsum_fmeasure": 0.26125127654019925, "rougeLsum_fmeasure_stderr": 0.0018446022776906345, "rougeLsum_precision": 0.2963124375137464, "rougeLsum_precision_stderr": 0.003972319515480073, "rougeLsum_recall": 0.32622007058557423, "rougeLsum_recall_stderr": 0.002450252978851981}}, "5": {"generate_text_restaurant": {"bleu": 6.2420502388251435, "bleu_stderr": 0.11069521906199865, "rouge1_fmeasure": 0.31044786268293756, "rouge1_fmeasure_stderr": 0.001944681263041426, "rouge1_precision": 0.31078931466957455, "rouge1_precision_stderr": 0.0035821838565772805, "rouge1_recall": 0.40928814978401035, "rouge1_recall_stderr": 0.002513611780694676, "rouge2_fmeasure": 0.12861734204623188, "rouge2_fmeasure_stderr": 0.0014302618265553057, "rouge2_precision": 0.13661422794726982, "rouge2_precision_stderr": 0.00268531843352623, "rouge2_recall": 0.17131478141957424, "rouge2_recall_stderr": 0.001846264774766135, "rougeL_fmeasure": 0.2510224592044638, "rougeL_fmeasure_stderr": 0.0015295416689170924, "rougeL_precision": 0.2514460944139299, "rougeL_precision_stderr": 0.0030526186239578533, "rougeL_recall": 0.3356791583944898, "rougeL_recall_stderr": 0.002240539567274668, "rougeLsum_fmeasure": 0.2617560668553504, "rougeLsum_fmeasure_stderr": 0.001820974367559263, "rougeLsum_precision": 0.26462055977448007, "rougeLsum_precision_stderr": 0.003336730030918389, "rougeLsum_recall": 0.3452406840839544, "rougeLsum_recall_stderr": 0.0023886668900561766}}}, "gem_xsum": {"0": {"article_DOC_summary": {"bleu": 1.4539286316534636, "bleu_stderr": 0.10413341300371938, "rouge1_fmeasure": 0.20008656508370964, "rouge1_fmeasure_stderr": 0.002602919942357546, "rouge1_precision": 0.16316619187689388, "rouge1_precision_stderr": 0.002890984890147137, "rouge1_recall": 0.31001559634568143, "rouge1_recall_stderr": 0.003978659821378838, "rouge2_fmeasure": 0.04090748442736069, "rouge2_fmeasure_stderr": 0.0014928104681925642, "rouge2_precision": 0.03352547525672803, "rouge2_precision_stderr": 0.0014320698536196147, "rouge2_recall": 0.06430340976455366, "rouge2_recall_stderr": 0.00231938341604023, "rougeL_fmeasure": 0.1502142949913443, "rougeL_fmeasure_stderr": 0.0020028080134693966, "rougeL_precision": 0.12270675062564947, "rougeL_precision_stderr": 0.0022927482127938856, "rougeL_recall": 0.2338821411106156, "rougeL_recall_stderr": 0.0031011088285461284, "rougeLsum_fmeasure": 0.15761386746032657, "rougeLsum_fmeasure_stderr": 0.0021239498717418375, "rougeLsum_precision": 0.12803202942224567, "rougeLsum_precision_stderr": 0.002326732638749928, "rougeLsum_recall": 0.24656433036848785, "rougeLsum_recall_stderr": 0.003410843786433149}}, "1": {"article_DOC_summary": {"bleu": 0.9965190176386574, "bleu_stderr": 0.05559453420420461, "rouge1_fmeasure": 0.16026754839725937, "rouge1_fmeasure_stderr": 0.002152722052964271, "rouge1_precision": 0.11453236899619632, "rouge1_precision_stderr": 0.0016195986503994135, "rouge1_recall": 0.2802645254992808, "rouge1_recall_stderr": 0.003696829148468212, "rouge2_fmeasure": 0.024638353865890434, "rouge2_fmeasure_stderr": 0.0011382421012757812, "rouge2_precision": 0.01742972704474614, "rouge2_precision_stderr": 0.0008103528180574307, "rouge2_recall": 0.04404260214452908, "rouge2_recall_stderr": 0.0020813739232181275, "rougeL_fmeasure": 0.12352821757343255, "rougeL_fmeasure_stderr": 0.0016133160942209523, "rougeL_precision": 0.08816027894258283, "rougeL_precision_stderr": 0.0012193640200953474, "rougeL_recall": 0.21709258927943814, "rougeL_recall_stderr": 0.002827223535684834, "rougeLsum_fmeasure": 0.12904870256695447, "rougeLsum_fmeasure_stderr": 0.0017598213737764256, "rougeLsum_precision": 0.0920451713692867, "rougeLsum_precision_stderr": 0.0013155981196239118, "rougeLsum_recall": 0.2271186416361983, "rougeLsum_recall_stderr": 0.003132670977496884}}, "2": {"article_DOC_summary": {"bleu": 0.8252686451228434, "bleu_stderr": 0.06219441463160752, "rouge1_fmeasure": 0.15769070631922, "rouge1_fmeasure_stderr": 0.001982981910213691, "rouge1_precision": 0.11145983804593526, "rouge1_precision_stderr": 0.0014743643800890196, "rouge1_recall": 0.28045679367482057, "rouge1_recall_stderr": 0.0034259770552426184, "rouge2_fmeasure": 0.021605793985650032, "rouge2_fmeasure_stderr": 0.0009634567881537258, "rouge2_precision": 0.015152005516530816, "rouge2_precision_stderr": 0.0006754060180232467, "rouge2_recall": 0.039166379272577614, "rouge2_recall_stderr": 0.0017988727907617531, "rougeL_fmeasure": 0.12029157280634108, "rougeL_fmeasure_stderr": 0.001476371836829727, "rougeL_precision": 0.08486603983815774, "rougeL_precision_stderr": 0.001085791049406374, "rougeL_recall": 0.21516366945216606, "rougeL_recall_stderr": 0.0026703236853124913, "rougeLsum_fmeasure": 0.12777306089054677, "rougeLsum_fmeasure_stderr": 0.0016653146013723525, "rougeLsum_precision": 0.0901270375976316, "rougeLsum_precision_stderr": 0.0012216984674086196, "rougeLsum_recall": 0.2285463112828528, "rougeLsum_recall_stderr": 0.002980474602095322}}, "3": {"article_DOC_summary": {"bleu": 0.8113683570520359, "bleu_stderr": 0.06619825265722147, "rouge1_fmeasure": 0.14870537856013735, "rouge1_fmeasure_stderr": 0.0022250373297856207, "rouge1_precision": 0.1078263305297306, "rouge1_precision_stderr": 0.0017542085299995392, "rouge1_recall": 0.2580253690370548, "rouge1_recall_stderr": 0.0037660524996162735, "rouge2_fmeasure": 0.020403012896272645, "rouge2_fmeasure_stderr": 0.0009871637156077466, "rouge2_precision": 0.014571709458807564, "rouge2_precision_stderr": 0.0007101227457714503, "rouge2_recall": 0.03574556425775684, "rouge2_recall_stderr": 0.0017388222484751564, "rougeL_fmeasure": 0.11315310318316395, "rougeL_fmeasure_stderr": 0.0016139643872092233, "rougeL_precision": 0.08197966655283043, "rougeL_precision_stderr": 0.001292307383970243, "rougeL_recall": 0.1974050382443013, "rougeL_recall_stderr": 0.0028035058486686956, "rougeLsum_fmeasure": 0.12007749682129613, "rougeLsum_fmeasure_stderr": 0.0017680042779764178, "rougeLsum_precision": 0.08677115079741256, "rougeLsum_precision_stderr": 0.001375140243984192, "rougeLsum_recall": 0.20995584785478394, "rougeLsum_recall_stderr": 0.0031091921366029267}}, "4": {"article_DOC_summary": {"bleu": 0.4568380418312376, "bleu_stderr": 0.11240957017003458, "rouge1_fmeasure": 0.04010781297274983, "rouge1_fmeasure_stderr": 0.0022481684730005936, "rouge1_precision": 0.03418472303276383, "rouge1_precision_stderr": 0.00210110351555125, "rouge1_recall": 0.06304793651249774, "rouge1_recall_stderr": 0.0035973524455498603, "rouge2_fmeasure": 0.00536187053653973, "rouge2_fmeasure_stderr": 0.0006114168649663856, "rouge2_precision": 0.004145873623694822, "rouge2_precision_stderr": 0.0005061006325311766, "rouge2_recall": 0.008843875903750643, "rouge2_recall_stderr": 0.0010081977796634128, "rougeL_fmeasure": 0.03144663530333203, "rougeL_fmeasure_stderr": 0.0017459513025573287, "rougeL_precision": 0.027278513716520444, "rougeL_precision_stderr": 0.0017455746766801424, "rougeL_recall": 0.04970336071033803, "rougeL_recall_stderr": 0.002848378744916723, "rougeLsum_fmeasure": 0.03239030933843817, "rougeLsum_fmeasure_stderr": 0.0018053015484972615, "rougeLsum_precision": 0.02813790632578131, "rougeLsum_precision_stderr": 0.0018032712966730017, "rougeLsum_recall": 0.05107839677205752, "rougeLsum_recall_stderr": 0.0029256027588574272}}, "5": {"article_DOC_summary": {"bleu": 1.2435863834999107e-38, "bleu_stderr": 1.6010999120135594e-32, "rouge1_fmeasure": 0.0017758047725847756, "rouge1_fmeasure_stderr": 0.0004794001433637215, "rouge1_precision": 0.0019374196381469005, "rouge1_precision_stderr": 0.0005297555109388115, "rouge1_recall": 0.0017199042758875617, "rouge1_recall_stderr": 0.0004699087955021987, "rouge2_fmeasure": 0.0, "rouge2_fmeasure_stderr": 0.0, "rouge2_precision": 0.0, "rouge2_precision_stderr": 0.0, "rouge2_recall": 0.0, "rouge2_recall_stderr": 0.0, "rougeL_fmeasure": 0.0014811178551071952, "rougeL_fmeasure_stderr": 0.00039295568397820974, "rougeL_precision": 0.0016417177155976152, "rougeL_precision_stderr": 0.0004455222998055521, "rougeL_recall": 0.0014066712761193988, "rougeL_recall_stderr": 0.0003697747285853825, "rougeLsum_fmeasure": 0.0016076723854203951, "rougeLsum_fmeasure_stderr": 0.0004229539683443897, "rougeLsum_precision": 0.0017493422405362358, "rougeLsum_precision_stderr": 0.0004643643474163307, "rougeLsum_recall": 0.0015674774510765171, "rougeLsum_recall_stderr": 0.00042362427764150467}}}}