Muennighoff's picture
Add
2aef930
raw
history blame
28.1 kB
{"GEM/web_nlg_en": {"0": {"PALM_prompt": {"bleu": 0.35166211431807476, "bleu_stderr": 0.038071413053652275, "rouge1_fmeasure": 0.1064177979826906, "rouge1_fmeasure_stderr": 0.002014407824130901, "rouge1_precision": 0.06996455274792455, "rouge1_precision_stderr": 0.0015592090868851113, "rouge1_recall": 0.3142067124043025, "rouge1_recall_stderr": 0.0048759710824468425, "rouge2_fmeasure": 0.04977759217169638, "rouge2_fmeasure_stderr": 0.0012651503383739272, "rouge2_precision": 0.0322534557054686, "rouge2_precision_stderr": 0.0009291159250523059, "rouge2_recall": 0.15163416252366027, "rouge2_recall_stderr": 0.003394361327422675, "rougeL_fmeasure": 0.10151875678161845, "rougeL_fmeasure_stderr": 0.0018675156066895481, "rougeL_precision": 0.0665503328402659, "rougeL_precision_stderr": 0.0014349991462073507, "rougeL_recall": 0.30272317644546903, "rougeL_recall_stderr": 0.004757737332652531, "rougeLsum_fmeasure": 0.1005468816321386, "rougeLsum_fmeasure_stderr": 0.0018871768805081306, "rougeLsum_precision": 0.06618311419871553, "rougeLsum_precision_stderr": 0.0014716875527080561, "rougeLsum_recall": 0.29700852030137187, "rougeLsum_recall_stderr": 0.004591273606148228}}, "1": {"PALM_prompt": {"bleu": 0.5073000336177755, "bleu_stderr": 0.05344474459205947, "rouge1_fmeasure": 0.11830423680079659, "rouge1_fmeasure_stderr": 0.0020114505178455027, "rouge1_precision": 0.07583504538763819, "rouge1_precision_stderr": 0.001486787446354916, "rouge1_recall": 0.3716954850162639, "rouge1_recall_stderr": 0.005205035164096044, "rouge2_fmeasure": 0.0546051702795999, "rouge2_fmeasure_stderr": 0.0012569436349181779, "rouge2_precision": 0.03495776879514267, "rouge2_precision_stderr": 0.0009075777652215385, "rouge2_recall": 0.17885977259906066, "rouge2_recall_stderr": 0.003619881028179333, "rougeL_fmeasure": 0.1099788832885831, "rougeL_fmeasure_stderr": 0.0017852180819479216, "rougeL_precision": 0.07030089422838176, "rougeL_precision_stderr": 0.001298347917395423, "rougeL_recall": 0.34696500909784356, "rougeL_recall_stderr": 0.00482360237080409, "rougeLsum_fmeasure": 0.1110119989726, "rougeLsum_fmeasure_stderr": 0.0018755811955381053, "rougeLsum_precision": 0.07122182052038024, "rougeLsum_precision_stderr": 0.0013897967007664056, "rougeLsum_recall": 0.3477576171750515, "rougeLsum_recall_stderr": 0.0047659276141866136}}, "2": {"PALM_prompt": {"bleu": 0.5965263236594742, "bleu_stderr": 0.05163954961828016, "rouge1_fmeasure": 0.12233537069614382, "rouge1_fmeasure_stderr": 0.0019128091926460928, "rouge1_precision": 0.07783318626900966, "rouge1_precision_stderr": 0.0014071193338354882, "rouge1_recall": 0.39287654613370876, "rouge1_recall_stderr": 0.005155876341027884, "rouge2_fmeasure": 0.05592276690064219, "rouge2_fmeasure_stderr": 0.001197562574066807, "rouge2_precision": 0.0354373968856108, "rouge2_precision_stderr": 0.0008545023435501012, "rouge2_recall": 0.19039459495393043, "rouge2_recall_stderr": 0.003663120720639406, "rougeL_fmeasure": 0.11320053656812012, "rougeL_fmeasure_stderr": 0.001703639135984389, "rougeL_precision": 0.07188967304527148, "rougeL_precision_stderr": 0.0012421548275795777, "rougeL_recall": 0.3648630199526146, "rougeL_recall_stderr": 0.004750553642386819, "rougeLsum_fmeasure": 0.1142026340807714, "rougeLsum_fmeasure_stderr": 0.0017614419186672153, "rougeLsum_precision": 0.07268380811649039, "rougeLsum_precision_stderr": 0.0012997045865083563, "rougeLsum_recall": 0.36691870218506767, "rougeLsum_recall_stderr": 0.004689100059731652}}, "3": {"PALM_prompt": {"bleu": 0.6590445507565462, "bleu_stderr": 0.03731228606768085, "rouge1_fmeasure": 0.1255592398663772, "rouge1_fmeasure_stderr": 0.0018984572804718245, "rouge1_precision": 0.07980698513957299, "rouge1_precision_stderr": 0.00144116669925528, "rouge1_recall": 0.40871250304941253, "rouge1_recall_stderr": 0.005277184107543118, "rouge2_fmeasure": 0.05799188686736059, "rouge2_fmeasure_stderr": 0.0012470877418224642, "rouge2_precision": 0.036737647094888005, "rouge2_precision_stderr": 0.0009490563555727562, "rouge2_recall": 0.2012500620682874, "rouge2_recall_stderr": 0.003837876814963123, "rougeL_fmeasure": 0.11563164980687576, "rougeL_fmeasure_stderr": 0.0017088580020787614, "rougeL_precision": 0.07344239380035893, "rougeL_precision_stderr": 0.001301053396600686, "rougeL_recall": 0.37652823939289803, "rougeL_recall_stderr": 0.0048219321223642725, "rougeLsum_fmeasure": 0.1170142277810441, "rougeLsum_fmeasure_stderr": 0.0017619775635762797, "rougeLsum_precision": 0.07446732582003722, "rougeLsum_precision_stderr": 0.0013512166475080024, "rougeLsum_recall": 0.37983284753458935, "rougeLsum_recall_stderr": 0.004768406523815667}}, "4": {"PALM_prompt": {"bleu": 0.6612094103636722, "bleu_stderr": 0.05316688615822092, "rouge1_fmeasure": 0.12810389443498593, "rouge1_fmeasure_stderr": 0.0018474124639315977, "rouge1_precision": 0.0814867015244332, "rouge1_precision_stderr": 0.00140335178133597, "rouge1_recall": 0.41970698412830326, "rouge1_recall_stderr": 0.005264636607062869, "rouge2_fmeasure": 0.05856459210667893, "rouge2_fmeasure_stderr": 0.0011483796934916865, "rouge2_precision": 0.03716974645311799, "rouge2_precision_stderr": 0.000858671949423613, "rouge2_recall": 0.20569650614003127, "rouge2_recall_stderr": 0.0037797450322498744, "rougeL_fmeasure": 0.11674639728784307, "rougeL_fmeasure_stderr": 0.0016142840608080332, "rougeL_precision": 0.07414197510231946, "rougeL_precision_stderr": 0.0012218676591713073, "rougeL_recall": 0.3844479540779492, "rougeL_recall_stderr": 0.004824012642018971, "rougeLsum_fmeasure": 0.11883717065008154, "rougeLsum_fmeasure_stderr": 0.0017051707976467297, "rougeLsum_precision": 0.07570666678042967, "rougeLsum_precision_stderr": 0.0013105985925116346, "rougeLsum_recall": 0.3891045320252018, "rougeLsum_recall_stderr": 0.004752985303492144}}, "5": {"PALM_prompt": {"bleu": 0.7275241684724033, "bleu_stderr": 0.025139024300557544, "rouge1_fmeasure": 0.12807283189555618, "rouge1_fmeasure_stderr": 0.0018561044445990925, "rouge1_precision": 0.08119001231813255, "rouge1_precision_stderr": 0.0014308370032712263, "rouge1_recall": 0.42544190008314364, "rouge1_recall_stderr": 0.005347650585072293, "rouge2_fmeasure": 0.05924660282751187, "rouge2_fmeasure_stderr": 0.0011837956870791252, "rouge2_precision": 0.037488476057553025, "rouge2_precision_stderr": 0.0009127843819336664, "rouge2_recall": 0.21034382299607524, "rouge2_recall_stderr": 0.0038365434728806005, "rougeL_fmeasure": 0.1163811957107418, "rougeL_fmeasure_stderr": 0.0016389675453477302, "rougeL_precision": 0.07373330048793485, "rougeL_precision_stderr": 0.0012777957450006742, "rougeL_recall": 0.38841487576118355, "rougeL_recall_stderr": 0.004859412970167045, "rougeLsum_fmeasure": 0.11860693664537329, "rougeLsum_fmeasure_stderr": 0.0017120447774363115, "rougeLsum_precision": 0.07528625093598806, "rougeLsum_precision_stderr": 0.0013390502316204457, "rougeLsum_recall": 0.39421410712837596, "rougeLsum_recall_stderr": 0.004856189403160334}}}, "GEM/wiki_lingua_en": {"0": {"tldr_en": {"bleu": 1.8709275972877584, "bleu_stderr": 0.058898637546592025, "rouge1_fmeasure": 0.18622713587913528, "rouge1_fmeasure_stderr": 0.0018412460647723918, "rouge1_precision": 0.15946778937891515, "rouge1_precision_stderr": 0.0019263515035577486, "rouge1_recall": 0.2715850357160657, "rouge1_recall_stderr": 0.0027017071210897574, "rouge2_fmeasure": 0.040460580826441606, "rouge2_fmeasure_stderr": 0.0008960319674160765, "rouge2_precision": 0.03438888805377945, "rouge2_precision_stderr": 0.0008080459258315309, "rouge2_recall": 0.06117675765752758, "rouge2_recall_stderr": 0.0015061198705940962, "rougeL_fmeasure": 0.14446880136361956, "rougeL_fmeasure_stderr": 0.0013159464984661877, "rougeL_precision": 0.12211228339854341, "rougeL_precision_stderr": 0.0013298965520439228, "rougeL_recall": 0.21625650856608786, "rougeL_recall_stderr": 0.0022314529211675395, "rougeLsum_fmeasure": 0.1716160186471173, "rougeLsum_fmeasure_stderr": 0.0016894452306051078, "rougeLsum_precision": 0.1467045219684518, "rougeLsum_precision_stderr": 0.0017626659229341545, "rougeLsum_recall": 0.25116019588094557, "rougeLsum_recall_stderr": 0.0025177140997767675}}, "1": {"tldr_en": {"bleu": 2.3540646113466352, "bleu_stderr": 0.058383312223254456, "rouge1_fmeasure": 0.2051454909041775, "rouge1_fmeasure_stderr": 0.001963503184239578, "rouge1_precision": 0.17861978084436733, "rouge1_precision_stderr": 0.0021557050699636633, "rouge1_recall": 0.29492911982793857, "rouge1_recall_stderr": 0.0027364010294303274, "rouge2_fmeasure": 0.047516585339244355, "rouge2_fmeasure_stderr": 0.0009726713638331002, "rouge2_precision": 0.04216196051123768, "rouge2_precision_stderr": 0.0009857371725593724, "rouge2_recall": 0.06892249984818269, "rouge2_recall_stderr": 0.0015097658330848196, "rougeL_fmeasure": 0.14783021982665137, "rougeL_fmeasure_stderr": 0.00133667062385201, "rougeL_precision": 0.1277101470262864, "rougeL_precision_stderr": 0.0014839765534000772, "rougeL_recall": 0.21755280146548553, "rougeL_recall_stderr": 0.0021188768570991334, "rougeLsum_fmeasure": 0.1927127973435724, "rougeLsum_fmeasure_stderr": 0.0018361383918979964, "rougeLsum_precision": 0.16764488030279848, "rougeLsum_precision_stderr": 0.002021292385911164, "rougeLsum_recall": 0.2778932641853077, "rougeLsum_recall_stderr": 0.0025900462140142086}}, "2": {"tldr_en": {"bleu": 2.5117616289013234, "bleu_stderr": 0.07882958513520422, "rouge1_fmeasure": 0.2125892107712728, "rouge1_fmeasure_stderr": 0.0018726113691632032, "rouge1_precision": 0.1867354040241945, "rouge1_precision_stderr": 0.0021764534616989487, "rouge1_recall": 0.307356685382873, "rouge1_recall_stderr": 0.002668787558898061, "rouge2_fmeasure": 0.05037415373416898, "rouge2_fmeasure_stderr": 0.0009688792622757568, "rouge2_precision": 0.044945343265659306, "rouge2_precision_stderr": 0.0010090897572849198, "rouge2_recall": 0.07429122479757874, "rouge2_recall_stderr": 0.0015703671923323263, "rougeL_fmeasure": 0.15111305737547598, "rougeL_fmeasure_stderr": 0.0012722677867412764, "rougeL_precision": 0.13207273293373048, "rougeL_precision_stderr": 0.0015334022424691339, "rougeL_recall": 0.22372031586233712, "rougeL_recall_stderr": 0.002107048468174928, "rougeLsum_fmeasure": 0.20022450371280426, "rougeLsum_fmeasure_stderr": 0.0017542714847597852, "rougeLsum_precision": 0.1758960740764525, "rougeLsum_precision_stderr": 0.0020592289423643946, "rougeLsum_recall": 0.2900411283233057, "rougeLsum_recall_stderr": 0.0025253027294766133}}, "3": {"tldr_en": {"bleu": 2.403113898933743, "bleu_stderr": 0.053251073858493, "rouge1_fmeasure": 0.17575906704306832, "rouge1_fmeasure_stderr": 0.002174949765654744, "rouge1_precision": 0.1591881844404937, "rouge1_precision_stderr": 0.0024467695339504187, "rouge1_recall": 0.25516734786727036, "rouge1_recall_stderr": 0.0032356801874518703, "rouge2_fmeasure": 0.0413255105224322, "rouge2_fmeasure_stderr": 0.000931832765467482, "rouge2_precision": 0.03784379091188361, "rouge2_precision_stderr": 0.001089131871792838, "rouge2_recall": 0.0624780077769272, "rouge2_recall_stderr": 0.0016087440931355302, "rougeL_fmeasure": 0.12628359782789042, "rougeL_fmeasure_stderr": 0.0015182895930992903, "rougeL_precision": 0.11442394801240031, "rougeL_precision_stderr": 0.0018112801572746817, "rougeL_recall": 0.18773348293405673, "rougeL_recall_stderr": 0.002527108820020842, "rougeLsum_fmeasure": 0.16530823720647217, "rougeLsum_fmeasure_stderr": 0.0020367190470455404, "rougeLsum_precision": 0.14968568215454517, "rougeLsum_precision_stderr": 0.002308127867301231, "rougeLsum_recall": 0.2408810647537952, "rougeLsum_recall_stderr": 0.0030822171919214422}}, "4": {"tldr_en": {"bleu": 0.472345677088637, "bleu_stderr": 0.040900356286228075, "rouge1_fmeasure": 0.0557867591334426, "rouge1_fmeasure_stderr": 0.00186394804170534, "rouge1_precision": 0.052405749034228705, "rouge1_precision_stderr": 0.0020110499867171685, "rouge1_recall": 0.08478707705138788, "rouge1_recall_stderr": 0.0028865777805548412, "rouge2_fmeasure": 0.012185104729686681, "rouge2_fmeasure_stderr": 0.0005907016058266906, "rouge2_precision": 0.011080419003140766, "rouge2_precision_stderr": 0.0006523070791806857, "rouge2_recall": 0.01999068467854864, "rouge2_recall_stderr": 0.001096551185475821, "rougeL_fmeasure": 0.04111644120483341, "rougeL_fmeasure_stderr": 0.0013476527639030426, "rougeL_precision": 0.03862877667774788, "rougeL_precision_stderr": 0.0014997122070137985, "rougeL_recall": 0.0641687643508006, "rougeL_recall_stderr": 0.002226474813987104, "rougeLsum_fmeasure": 0.052305917688342855, "rougeLsum_fmeasure_stderr": 0.0017400550554708667, "rougeLsum_precision": 0.04914657123032221, "rougeLsum_precision_stderr": 0.0018810939013272998, "rougeLsum_recall": 0.07980206255787106, "rougeLsum_recall_stderr": 0.0027207315430338498}}, "5": {"tldr_en": {"bleu": 6.538062149513937e-07, "bleu_stderr": 1.1494813563989744e-06, "rouge1_fmeasure": 0.00898341548277317, "rouge1_fmeasure_stderr": 0.0008281424457556686, "rouge1_precision": 0.008509363653553257, "rouge1_precision_stderr": 0.000845658818252671, "rouge1_recall": 0.01382276652509776, "rouge1_recall_stderr": 0.0013334225811003739, "rouge2_fmeasure": 0.002227060333018499, "rouge2_fmeasure_stderr": 0.0002638537239607946, "rouge2_precision": 0.0019932915366697277, "rouge2_precision_stderr": 0.0002440516250439588, "rouge2_recall": 0.004040972091745142, "rouge2_recall_stderr": 0.0006014270152725088, "rougeL_fmeasure": 0.006634289847575651, "rougeL_fmeasure_stderr": 0.0006061194677904214, "rougeL_precision": 0.006364757119046798, "rougeL_precision_stderr": 0.0006395473026447492, "rougeL_recall": 0.010409531448448452, "rougeL_recall_stderr": 0.0010327387282074423, "rougeLsum_fmeasure": 0.008483394098012836, "rougeLsum_fmeasure_stderr": 0.0007802466552819305, "rougeLsum_precision": 0.00802904222397337, "rougeLsum_precision_stderr": 0.0007953313177037809, "rougeLsum_recall": 0.013136908473646484, "rougeLsum_recall_stderr": 0.0012773726821035238}}}, "e2e_nlg_cleaned": {"0": {"generate_text_restaurant": {"bleu": 0.20344702036475212, "bleu_stderr": 0.020813423545746928, "rouge1_fmeasure": 0.02272612382623739, "rouge1_fmeasure_stderr": 0.000657613160095564, "rouge1_precision": 0.02031066874429793, "rouge1_precision_stderr": 0.0010982973394100017, "rouge1_recall": 0.0339006777431073, "rouge1_recall_stderr": 0.0010754582360608094, "rouge2_fmeasure": 0.0021610970637232323, "rouge2_fmeasure_stderr": 0.00025014704785064965, "rouge2_precision": 0.004428962002328776, "rouge2_precision_stderr": 0.0010133668145643837, "rouge2_recall": 0.0032484488515415436, "rouge2_recall_stderr": 0.00039041664574734875, "rougeL_fmeasure": 0.022638837838076648, "rougeL_fmeasure_stderr": 0.000646894021986954, "rougeL_precision": 0.02023890948503867, "rougeL_precision_stderr": 0.001094108815755343, "rougeL_recall": 0.03378068019453833, "rougeL_recall_stderr": 0.0010619663136781138, "rougeLsum_fmeasure": 0.021275240072120734, "rougeLsum_fmeasure_stderr": 0.0005850724780177193, "rougeLsum_precision": 0.019207655955704183, "rougeLsum_precision_stderr": 0.0010760427436256133, "rougeLsum_recall": 0.03164987205173273, "rougeLsum_recall_stderr": 0.0009558614487519096}}, "1": {"generate_text_restaurant": {"bleu": 6.931679880377635, "bleu_stderr": 0.12521222343600627, "rouge1_fmeasure": 0.35867604679541404, "rouge1_fmeasure_stderr": 0.0022242609651642654, "rouge1_precision": 0.34003150968688334, "rouge1_precision_stderr": 0.003075339331446666, "rouge1_recall": 0.45015732570626815, "rouge1_recall_stderr": 0.0028997064403193467, "rouge2_fmeasure": 0.15367455208735437, "rouge2_fmeasure_stderr": 0.0016232409779530833, "rouge2_precision": 0.14759589015368266, "rouge2_precision_stderr": 0.002060777542304401, "rouge2_recall": 0.1951244486489613, "rouge2_recall_stderr": 0.0020953412261598093, "rougeL_fmeasure": 0.2708573841017006, "rougeL_fmeasure_stderr": 0.0016414160973280045, "rougeL_precision": 0.25480483800873843, "rougeL_precision_stderr": 0.0023341935238719103, "rougeL_recall": 0.3465181251792575, "rougeL_recall_stderr": 0.0024646626498150185, "rougeLsum_fmeasure": 0.29387614768355197, "rougeLsum_fmeasure_stderr": 0.0021153745644870423, "rougeLsum_precision": 0.2799685955963331, "rougeLsum_precision_stderr": 0.0028032299642029843, "rougeLsum_recall": 0.3677356916274029, "rougeLsum_recall_stderr": 0.0026996771832412166}}, "2": {"generate_text_restaurant": {"bleu": 7.657232828965316, "bleu_stderr": 0.12471874140369284, "rouge1_fmeasure": 0.3776586411733957, "rouge1_fmeasure_stderr": 0.002198646985806164, "rouge1_precision": 0.35675768591806567, "rouge1_precision_stderr": 0.0031022663155459203, "rouge1_recall": 0.46937017367708633, "rouge1_recall_stderr": 0.00275346130622528, "rouge2_fmeasure": 0.17010386224275442, "rouge2_fmeasure_stderr": 0.0016283471019571314, "rouge2_precision": 0.16231756057946886, "rouge2_precision_stderr": 0.0020395417907086693, "rouge2_recall": 0.2138289664047894, "rouge2_recall_stderr": 0.0020707270316515283, "rougeL_fmeasure": 0.28123834488197835, "rougeL_fmeasure_stderr": 0.0016069446891559397, "rougeL_precision": 0.26310458036396617, "rougeL_precision_stderr": 0.002272981131996748, "rougeL_recall": 0.35664473399252883, "rougeL_recall_stderr": 0.0023940807408273975, "rougeLsum_fmeasure": 0.30825756247609604, "rougeLsum_fmeasure_stderr": 0.0020874241622850045, "rougeLsum_precision": 0.2914642398591572, "rougeLsum_precision_stderr": 0.002757164859310039, "rougeLsum_recall": 0.3829945698750505, "rougeLsum_recall_stderr": 0.0026117917187163785}}, "3": {"generate_text_restaurant": {"bleu": 8.442104172996691, "bleu_stderr": 0.1225265310989897, "rouge1_fmeasure": 0.39418201029816835, "rouge1_fmeasure_stderr": 0.0021766899975066423, "rouge1_precision": 0.37510123434341147, "rouge1_precision_stderr": 0.0029659588823890727, "rouge1_recall": 0.47688116434271155, "rouge1_recall_stderr": 0.002779761613350503, "rouge2_fmeasure": 0.18042060590724515, "rouge2_fmeasure_stderr": 0.0016663829828460059, "rouge2_precision": 0.1722185872161438, "rouge2_precision_stderr": 0.001982108880961337, "rouge2_recall": 0.22148343783169197, "rouge2_recall_stderr": 0.0021298024706319753, "rougeL_fmeasure": 0.2871477669735601, "rougeL_fmeasure_stderr": 0.0016369768534961649, "rougeL_precision": 0.2708367244008351, "rougeL_precision_stderr": 0.002174690044000994, "rougeL_recall": 0.35404771153503223, "rougeL_recall_stderr": 0.002443187318628693, "rougeLsum_fmeasure": 0.3246521112217336, "rougeLsum_fmeasure_stderr": 0.002119544362824107, "rougeLsum_precision": 0.30935204172558023, "rougeLsum_precision_stderr": 0.0026977427492078823, "rougeLsum_recall": 0.3923447311858455, "rougeLsum_recall_stderr": 0.0026716176658316635}}, "4": {"generate_text_restaurant": {"bleu": 9.264771481612339, "bleu_stderr": 0.14797632752636197, "rouge1_fmeasure": 0.4086207638030011, "rouge1_fmeasure_stderr": 0.0020852610120524026, "rouge1_precision": 0.3925987211674676, "rouge1_precision_stderr": 0.0026976461573692354, "rouge1_recall": 0.47354063069748037, "rouge1_recall_stderr": 0.0026338292343196816, "rouge2_fmeasure": 0.18703841259223752, "rouge2_fmeasure_stderr": 0.0016890429531878357, "rouge2_precision": 0.1796234616071412, "rouge2_precision_stderr": 0.0018685574967649593, "rouge2_recall": 0.2190905542412407, "rouge2_recall_stderr": 0.002068607728271308, "rougeL_fmeasure": 0.29515772076134655, "rougeL_fmeasure_stderr": 0.0016661150684501613, "rougeL_precision": 0.2819581010790434, "rougeL_precision_stderr": 0.002044363363394681, "rougeL_recall": 0.346344258787895, "rougeL_recall_stderr": 0.0023161012828013965, "rougeLsum_fmeasure": 0.3382507422373424, "rougeLsum_fmeasure_stderr": 0.0021124639320887124, "rougeLsum_precision": 0.32521791939739797, "rougeLsum_precision_stderr": 0.0025320061675288244, "rougeLsum_recall": 0.3913010253854303, "rougeLsum_recall_stderr": 0.0025849947764755407}}, "5": {"generate_text_restaurant": {"bleu": 9.958980611111656, "bleu_stderr": 0.14855771135694565, "rouge1_fmeasure": 0.4197228631027415, "rouge1_fmeasure_stderr": 0.0020063425382385058, "rouge1_precision": 0.4061514037393209, "rouge1_precision_stderr": 0.0025821552864821757, "rouge1_recall": 0.47804480569363766, "rouge1_recall_stderr": 0.002643357770719704, "rouge2_fmeasure": 0.1931509426359425, "rouge2_fmeasure_stderr": 0.001665707134510459, "rouge2_precision": 0.18757831560333757, "rouge2_precision_stderr": 0.001891660346486441, "rouge2_recall": 0.2218502429628836, "rouge2_recall_stderr": 0.0020386307840161645, "rougeL_fmeasure": 0.3017298916186032, "rougeL_fmeasure_stderr": 0.0017018141577312348, "rougeL_precision": 0.2911232268001962, "rougeL_precision_stderr": 0.0020806579815607195, "rougeL_recall": 0.3466822060660839, "rougeL_recall_stderr": 0.002329148139407088, "rougeLsum_fmeasure": 0.35086688827184365, "rougeLsum_fmeasure_stderr": 0.0020482132353444454, "rougeLsum_precision": 0.33992936633595283, "rougeLsum_precision_stderr": 0.0024759755832926282, "rougeLsum_recall": 0.39927240691403565, "rougeLsum_recall_stderr": 0.0025795756365194628}}}, "gem_xsum": {"0": {"article_DOC_summary": {"bleu": 1.7847107445080181, "bleu_stderr": 0.12259281036544756, "rouge1_fmeasure": 0.20091233893732696, "rouge1_fmeasure_stderr": 0.002460990636185999, "rouge1_precision": 0.14717191874948962, "rouge1_precision_stderr": 0.002039989999763031, "rouge1_recall": 0.34130886710278735, "rouge1_recall_stderr": 0.004109809014072106, "rouge2_fmeasure": 0.04296352475719179, "rouge2_fmeasure_stderr": 0.0015091336137169667, "rouge2_precision": 0.031180276988501406, "rouge2_precision_stderr": 0.0011515924925153934, "rouge2_recall": 0.0750546362679565, "rouge2_recall_stderr": 0.002664189585730598, "rougeL_fmeasure": 0.15294193870162445, "rougeL_fmeasure_stderr": 0.0018589320642533784, "rougeL_precision": 0.11187209453346593, "rougeL_precision_stderr": 0.0015620035707168242, "rougeL_recall": 0.26167076851220833, "rougeL_recall_stderr": 0.0032523973074039445, "rougeLsum_fmeasure": 0.15798602735496012, "rougeLsum_fmeasure_stderr": 0.0020505297987330412, "rougeLsum_precision": 0.11548986592851035, "rougeLsum_precision_stderr": 0.0016848405957502828, "rougeLsum_recall": 0.27023318122685785, "rougeLsum_recall_stderr": 0.0035574900499767722}}, "1": {"article_DOC_summary": {"bleu": 1.1845032944798495, "bleu_stderr": 0.08458246586222135, "rouge1_fmeasure": 0.17024912689462188, "rouge1_fmeasure_stderr": 0.0023476916428796877, "rouge1_precision": 0.12097137240313847, "rouge1_precision_stderr": 0.0017496349490089252, "rouge1_recall": 0.2990148200796171, "rouge1_recall_stderr": 0.003991241961142267, "rouge2_fmeasure": 0.030605161726994057, "rouge2_fmeasure_stderr": 0.0012798169612394362, "rouge2_precision": 0.021525610014554653, "rouge2_precision_stderr": 0.0009005470188155159, "rouge2_recall": 0.055447375013310755, "rouge2_recall_stderr": 0.002401181861336932, "rougeL_fmeasure": 0.13223813090153158, "rougeL_fmeasure_stderr": 0.0017763582648502928, "rougeL_precision": 0.09373642633960022, "rougeL_precision_stderr": 0.001306812474264428, "rougeL_recall": 0.23409871583030145, "rougeL_recall_stderr": 0.0031894294773192737, "rougeLsum_fmeasure": 0.13796987863879345, "rougeLsum_fmeasure_stderr": 0.0019475228863655185, "rougeLsum_precision": 0.0978239478590475, "rougeLsum_precision_stderr": 0.0014318544124746646, "rougeLsum_recall": 0.24391576493442443, "rougeLsum_recall_stderr": 0.0034515787165719444}}, "2": {"article_DOC_summary": {"bleu": 1.235087762721956, "bleu_stderr": 0.12954971142620794, "rouge1_fmeasure": 0.17123176762196604, "rouge1_fmeasure_stderr": 0.002367991688636443, "rouge1_precision": 0.12153696089604207, "rouge1_precision_stderr": 0.0017564783405385376, "rouge1_recall": 0.30200065000392107, "rouge1_recall_stderr": 0.004133147911453231, "rouge2_fmeasure": 0.03194665621464301, "rouge2_fmeasure_stderr": 0.0012704379487708378, "rouge2_precision": 0.022397648703525567, "rouge2_precision_stderr": 0.0008966352941752963, "rouge2_recall": 0.05865756100278522, "rouge2_recall_stderr": 0.002417706489905652, "rougeL_fmeasure": 0.13347870518175978, "rougeL_fmeasure_stderr": 0.001810460575841908, "rougeL_precision": 0.09454924200528139, "rougeL_precision_stderr": 0.0013285980685601034, "rougeL_recall": 0.23705948565261042, "rougeL_recall_stderr": 0.0033238074174046623, "rougeLsum_fmeasure": 0.13805797792168725, "rougeLsum_fmeasure_stderr": 0.0019538992181766693, "rougeLsum_precision": 0.09773827044948902, "rougeLsum_precision_stderr": 0.0014279562335011396, "rougeLsum_recall": 0.24540888083433957, "rougeLsum_recall_stderr": 0.0035863376843111756}}, "3": {"article_DOC_summary": {"bleu": 1.2530842896627028, "bleu_stderr": 0.1292510298416823, "rouge1_fmeasure": 0.1640154958358604, "rouge1_fmeasure_stderr": 0.0025105121490214565, "rouge1_precision": 0.11912123096636808, "rouge1_precision_stderr": 0.001985064027306789, "rouge1_recall": 0.284241972897382, "rouge1_recall_stderr": 0.00429277701738966, "rouge2_fmeasure": 0.03073680556060331, "rouge2_fmeasure_stderr": 0.0012970855907124138, "rouge2_precision": 0.022035764094734074, "rouge2_precision_stderr": 0.0009495941851705396, "rouge2_recall": 0.05485929060591564, "rouge2_recall_stderr": 0.0023528230640158182, "rougeL_fmeasure": 0.12797306233278474, "rougeL_fmeasure_stderr": 0.0018874238035470816, "rougeL_precision": 0.09256740744943301, "rougeL_precision_stderr": 0.0014688063331016958, "rougeL_recall": 0.2237667779749703, "rougeL_recall_stderr": 0.0033868663103060566, "rougeLsum_fmeasure": 0.13064448950535576, "rougeLsum_fmeasure_stderr": 0.0020334528859491884, "rougeLsum_precision": 0.09452589861322153, "rougeLsum_precision_stderr": 0.0015705338179864972, "rougeLsum_recall": 0.22814994714493522, "rougeLsum_recall_stderr": 0.0036128044767980974}}, "4": {"article_DOC_summary": {"bleu": 0.5139598974348316, "bleu_stderr": 0.11912575032590099, "rouge1_fmeasure": 0.04359155004979404, "rouge1_fmeasure_stderr": 0.002446009970548131, "rouge1_precision": 0.036060956179475516, "rouge1_precision_stderr": 0.002146455590056505, "rouge1_recall": 0.0690610482890532, "rouge1_recall_stderr": 0.003982103092460407, "rouge2_fmeasure": 0.007201974037466402, "rouge2_fmeasure_stderr": 0.0006993387361055898, "rouge2_precision": 0.00544898809262409, "rouge2_precision_stderr": 0.0005451635252349526, "rouge2_recall": 0.012376718286934233, "rouge2_recall_stderr": 0.001258378038487793, "rougeL_fmeasure": 0.034064899256478404, "rougeL_fmeasure_stderr": 0.0019036508052699063, "rougeL_precision": 0.02859153283892875, "rougeL_precision_stderr": 0.0017520581139644556, "rougeL_recall": 0.0540675976921445, "rougeL_recall_stderr": 0.0031205628190529014, "rougeLsum_fmeasure": 0.035141373500357204, "rougeLsum_fmeasure_stderr": 0.0019687859509567853, "rougeLsum_precision": 0.02946392180089849, "rougeLsum_precision_stderr": 0.0018091240355730465, "rougeLsum_recall": 0.05579499878286511, "rougeLsum_recall_stderr": 0.0032320807958250114}}, "5": {"article_DOC_summary": {"bleu": 1.5223289746048162e-36, "bleu_stderr": 3.669632606867041e-32, "rouge1_fmeasure": 0.0026631982951325368, "rouge1_fmeasure_stderr": 0.0007488447516617362, "rouge1_precision": 0.00304016734736987, "rouge1_precision_stderr": 0.000888339058787168, "rouge1_recall": 0.002441151228222129, "rouge1_recall_stderr": 0.0006688844091459975, "rouge2_fmeasure": 0.00041201488569909625, "rouge2_fmeasure_stderr": 0.0002949309636685756, "rouge2_precision": 0.0005433276798426632, "rouge2_precision_stderr": 0.00039244433049951473, "rouge2_recall": 0.00033315740862910673, "rouge2_recall_stderr": 0.0002365408622759455, "rougeL_fmeasure": 0.0019214818091001502, "rougeL_fmeasure_stderr": 0.0005698678616813168, "rougeL_precision": 0.002191679085043183, "rougeL_precision_stderr": 0.000685805567529641, "rougeL_recall": 0.0017718084851004953, "rougeL_recall_stderr": 0.0005070173978560121, "rougeLsum_fmeasure": 0.001998668773079567, "rougeLsum_fmeasure_stderr": 0.000600651825136055, "rougeLsum_precision": 0.002284463934531111, "rougeLsum_precision_stderr": 0.0007267983753519665, "rougeLsum_recall": 0.0018384573787288605, "rougeLsum_recall_stderr": 0.000531115868075893}}}}