Muennighoff commited on
Commit
75fd4ec
1 Parent(s): c462de0
4b284b1b9c4/evaluation/generation/merged.csv CHANGED
@@ -1 +1,53 @@
1
  dataset,fewshots,prompt,metric,value
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  dataset,fewshots,prompt,metric,value
2
+ e2e_nlg_cleaned,0,generate_text_restaurant,rouge2_fmeasure,0.022375489011305233
3
+ e2e_nlg_cleaned,0,median,rouge2_fmeasure,0.022375489011305233
4
+ e2e_nlg_cleaned,1,generate_text_restaurant,rouge2_fmeasure,0.13963411320567692
5
+ e2e_nlg_cleaned,1,median,rouge2_fmeasure,0.13963411320567692
6
+ e2e_nlg_cleaned,2,generate_text_restaurant,rouge2_fmeasure,0.1566733423938655
7
+ e2e_nlg_cleaned,2,median,rouge2_fmeasure,0.1566733423938655
8
+ e2e_nlg_cleaned,3,generate_text_restaurant,rouge2_fmeasure,0.1625675970078791
9
+ e2e_nlg_cleaned,3,median,rouge2_fmeasure,0.1625675970078791
10
+ e2e_nlg_cleaned,4,generate_text_restaurant,rouge2_fmeasure,0.16792240286212373
11
+ e2e_nlg_cleaned,4,median,rouge2_fmeasure,0.16792240286212373
12
+ e2e_nlg_cleaned,5,generate_text_restaurant,rouge2_fmeasure,0.1671450372115207
13
+ e2e_nlg_cleaned,5,median,rouge2_fmeasure,0.1671450372115207
14
+ e2e_nlg_cleaned,5,average,multiple,0.13605299694872852
15
+ gem_xsum,0,article_DOC_summary,rouge2_fmeasure,0.036126066636598314
16
+ gem_xsum,0,median,rouge2_fmeasure,0.036126066636598314
17
+ gem_xsum,1,article_DOC_summary,rouge2_fmeasure,0.022429611615385675
18
+ gem_xsum,1,median,rouge2_fmeasure,0.022429611615385675
19
+ gem_xsum,2,article_DOC_summary,rouge2_fmeasure,0.021292811682357263
20
+ gem_xsum,2,median,rouge2_fmeasure,0.021292811682357263
21
+ gem_xsum,3,article_DOC_summary,rouge2_fmeasure,0.021167446810664674
22
+ gem_xsum,3,median,rouge2_fmeasure,0.021167446810664674
23
+ gem_xsum,4,article_DOC_summary,rouge2_fmeasure,0.006191903982694321
24
+ gem_xsum,4,median,rouge2_fmeasure,0.006191903982694321
25
+ gem_xsum,5,article_DOC_summary,rouge2_fmeasure,3.8117019249094724e-05
26
+ gem_xsum,5,median,rouge2_fmeasure,3.8117019249094724e-05
27
+ gem_xsum,5,average,multiple,0.017874326291158222
28
+ web_nlg_en,0,PALM_prompt,rouge2_fmeasure,0.041931586488861895
29
+ web_nlg_en,0,median,rouge2_fmeasure,0.041931586488861895
30
+ web_nlg_en,1,PALM_prompt,rouge2_fmeasure,0.036418630396022084
31
+ web_nlg_en,1,median,rouge2_fmeasure,0.036418630396022084
32
+ web_nlg_en,2,PALM_prompt,rouge2_fmeasure,0.04002742617237918
33
+ web_nlg_en,2,median,rouge2_fmeasure,0.04002742617237918
34
+ web_nlg_en,3,PALM_prompt,rouge2_fmeasure,0.037603505078738005
35
+ web_nlg_en,3,median,rouge2_fmeasure,0.037603505078738005
36
+ web_nlg_en,4,PALM_prompt,rouge2_fmeasure,0.03886088367864174
37
+ web_nlg_en,4,median,rouge2_fmeasure,0.03886088367864174
38
+ web_nlg_en,5,PALM_prompt,rouge2_fmeasure,0.03935591402102145
39
+ web_nlg_en,5,median,rouge2_fmeasure,0.03935591402102145
40
+ web_nlg_en,5,average,multiple,0.039032990972610725
41
+ wiki_lingua_en,0,tldr_en,rouge2_fmeasure,0.031784532049007205
42
+ wiki_lingua_en,0,median,rouge2_fmeasure,0.031784532049007205
43
+ wiki_lingua_en,1,tldr_en,rouge2_fmeasure,0.03024160289297904
44
+ wiki_lingua_en,1,median,rouge2_fmeasure,0.03024160289297904
45
+ wiki_lingua_en,2,tldr_en,rouge2_fmeasure,0.030130202799052323
46
+ wiki_lingua_en,2,median,rouge2_fmeasure,0.030130202799052323
47
+ wiki_lingua_en,3,tldr_en,rouge2_fmeasure,0.02527715313239358
48
+ wiki_lingua_en,3,median,rouge2_fmeasure,0.02527715313239358
49
+ wiki_lingua_en,4,tldr_en,rouge2_fmeasure,0.008199708088564734
50
+ wiki_lingua_en,4,median,rouge2_fmeasure,0.008199708088564734
51
+ wiki_lingua_en,5,tldr_en,rouge2_fmeasure,0.0012873383840521695
52
+ wiki_lingua_en,5,median,rouge2_fmeasure,0.0012873383840521695
53
+ wiki_lingua_en,5,average,multiple,0.021153422891008175
4b284b1b9c4/evaluation/generation/merged.json CHANGED
@@ -1 +1 @@
1
- {}
 
1
+ {"GEM/web_nlg_en": {"0": {"PALM_prompt": {"bleu": 0.3190651465856718, "bleu_stderr": 0.015243909630507646, "rouge1_fmeasure": 0.0997318816573953, "rouge1_fmeasure_stderr": 0.0021156518889089582, "rouge1_precision": 0.08643170272763852, "rouge1_precision_stderr": 0.0029752364548641585, "rouge1_recall": 0.2606405590645512, "rouge1_recall_stderr": 0.004731118962384157, "rouge2_fmeasure": 0.041931586488861895, "rouge2_fmeasure_stderr": 0.0013179930778612692, "rouge2_precision": 0.036668398413519, "rouge2_precision_stderr": 0.0019573720295730864, "rouge2_recall": 0.11400452934466292, "rouge2_recall_stderr": 0.0030793812158255393, "rougeL_fmeasure": 0.09052629534128674, "rougeL_fmeasure_stderr": 0.0018679799075585592, "rougeL_precision": 0.07834795250202393, "rougeL_precision_stderr": 0.002700745161136578, "rougeL_recall": 0.24056751030373802, "rougeL_recall_stderr": 0.004433582003257431, "rougeLsum_fmeasure": 0.09227026328046098, "rougeLsum_fmeasure_stderr": 0.0019370761621205978, "rougeLsum_precision": 0.08031657714527592, "rougeLsum_precision_stderr": 0.002804477170720135, "rougeLsum_recall": 0.24107310348334796, "rougeLsum_recall_stderr": 0.004335653599630527}}, "1": {"PALM_prompt": {"bleu": 0.3371393379385822, "bleu_stderr": 0.029206770271577982, "rouge1_fmeasure": 0.09029486734492195, "rouge1_fmeasure_stderr": 0.0018433756085981456, "rouge1_precision": 0.06839664555566648, "rouge1_precision_stderr": 0.0023780792066031052, "rouge1_recall": 0.26849287923639203, "rouge1_recall_stderr": 0.004806355291903039, "rouge2_fmeasure": 0.036418630396022084, "rouge2_fmeasure_stderr": 0.001105848667936206, "rouge2_precision": 0.02769731249554232, "rouge2_precision_stderr": 0.0014447703452549708, "rouge2_recall": 0.11230775617683765, "rouge2_recall_stderr": 0.003047916691043346, "rougeL_fmeasure": 0.0810471539767474, "rougeL_fmeasure_stderr": 0.0016161372448830458, "rougeL_precision": 0.06125236621102583, "rougeL_precision_stderr": 0.002147395533725833, "rougeL_recall": 0.2446038633130873, "rougeL_recall_stderr": 0.004415235603175971, "rougeLsum_fmeasure": 0.08293177770121675, "rougeLsum_fmeasure_stderr": 0.0016768544086881434, "rougeLsum_precision": 0.06307769515529386, "rougeLsum_precision_stderr": 0.002246682832101251, "rougeLsum_recall": 0.24783709282133357, "rougeLsum_recall_stderr": 0.004397710432472069}}, "2": {"PALM_prompt": {"bleu": 0.35948236234248565, "bleu_stderr": 0.017377358413139317, "rouge1_fmeasure": 0.09642958487413873, "rouge1_fmeasure_stderr": 0.0018025829878258186, "rouge1_precision": 0.06657566788938414, "rouge1_precision_stderr": 0.001749992548119195, "rouge1_recall": 0.2937554955883932, "rouge1_recall_stderr": 0.004965816647115664, "rouge2_fmeasure": 0.04002742617237918, "rouge2_fmeasure_stderr": 0.0010795392466190256, "rouge2_precision": 0.02735639402169493, "rouge2_precision_stderr": 0.0009474391818383607, "rouge2_recall": 0.12911080763433516, "rouge2_recall_stderr": 0.0033052734287089074, "rougeL_fmeasure": 0.08617381649946318, "rougeL_fmeasure_stderr": 0.0015882243198153, "rougeL_precision": 0.05938008985261857, "rougeL_precision_stderr": 0.0015616531635880319, "rougeL_recall": 0.26499125697603243, "rougeL_recall_stderr": 0.0045013518989927715, "rougeLsum_fmeasure": 0.08883976098941902, "rougeLsum_fmeasure_stderr": 0.0016500461412155038, "rougeLsum_precision": 0.06141798726194324, "rougeLsum_precision_stderr": 0.0016333446707655825, "rougeLsum_recall": 0.27069807324549544, "rougeLsum_recall_stderr": 0.004497552128191681}}, "3": {"PALM_prompt": {"bleu": 0.3606694391727648, "bleu_stderr": 0.008310853233127335, "rouge1_fmeasure": 0.09331924425577678, "rouge1_fmeasure_stderr": 0.0017173808477082145, "rouge1_precision": 0.06263742199688632, "rouge1_precision_stderr": 0.001562084157360286, "rouge1_recall": 0.2882752944793032, "rouge1_recall_stderr": 0.004756361702522272, "rouge2_fmeasure": 0.037603505078738005, "rouge2_fmeasure_stderr": 0.0010092525151178737, "rouge2_precision": 0.025137141382002674, "rouge2_precision_stderr": 0.0008536966977355885, "rouge2_recall": 0.1235074310378113, "rouge2_recall_stderr": 0.003079905467653997, "rougeL_fmeasure": 0.08199088296141296, "rougeL_fmeasure_stderr": 0.0014646302142578439, "rougeL_precision": 0.05483700608671878, "rougeL_precision_stderr": 0.0013294793385053626, "rougeL_recall": 0.25566717649514564, "rougeL_recall_stderr": 0.004201784527188464, "rougeLsum_fmeasure": 0.08582511025772276, "rougeLsum_fmeasure_stderr": 0.0015742173586373988, "rougeLsum_precision": 0.057656593391401105, "rougeLsum_precision_stderr": 0.001448279369339703, "rougeLsum_recall": 0.26530644858530983, "rougeLsum_recall_stderr": 0.004344765335471821}}, "4": {"PALM_prompt": {"bleu": 0.3675361736763453, "bleu_stderr": 0.020326931756428485, "rouge1_fmeasure": 0.09491569610433652, "rouge1_fmeasure_stderr": 0.0017327884200105889, "rouge1_precision": 0.06309958433539423, "rouge1_precision_stderr": 0.0015745556868410056, "rouge1_recall": 0.29594813333717956, "rouge1_recall_stderr": 0.004771334901839537, "rouge2_fmeasure": 0.03886088367864174, "rouge2_fmeasure_stderr": 0.001091169845130052, "rouge2_precision": 0.026012865234820656, "rouge2_precision_stderr": 0.0010773188564636973, "rouge2_recall": 0.1278071694216065, "rouge2_recall_stderr": 0.0031167450419853516, "rougeL_fmeasure": 0.08308799835736733, "rougeL_fmeasure_stderr": 0.0015013595623938796, "rougeL_precision": 0.05515515588436286, "rougeL_precision_stderr": 0.0013836477250754422, "rougeL_recall": 0.2622982265838491, "rougeL_recall_stderr": 0.00427139168291985, "rougeLsum_fmeasure": 0.08712710597598072, "rougeLsum_fmeasure_stderr": 0.001586405218976028, "rougeLsum_precision": 0.05805468643167903, "rougeLsum_precision_stderr": 0.0014829094286729005, "rougeLsum_recall": 0.2720751223127869, "rougeLsum_recall_stderr": 0.004344066687775499}}, "5": {"PALM_prompt": {"bleu": 0.4031967239919602, "bleu_stderr": 0.01955569062229382, "rouge1_fmeasure": 0.0972626867838164, "rouge1_fmeasure_stderr": 0.0017725244848752428, "rouge1_precision": 0.06594936434431786, "rouge1_precision_stderr": 0.0016995820332288193, "rouge1_recall": 0.2974883530256426, "rouge1_recall_stderr": 0.004771172456555242, "rouge2_fmeasure": 0.03935591402102145, "rouge2_fmeasure_stderr": 0.0010700530850907975, "rouge2_precision": 0.026786521346973263, "rouge2_precision_stderr": 0.0010176136831779137, "rouge2_recall": 0.1288552508178483, "rouge2_recall_stderr": 0.0031933269259351795, "rougeL_fmeasure": 0.08472785424842505, "rougeL_fmeasure_stderr": 0.0015087882971143603, "rougeL_precision": 0.057306189995987036, "rougeL_precision_stderr": 0.001479195174246019, "rougeL_recall": 0.2644105292140493, "rougeL_recall_stderr": 0.004297208694424085, "rougeLsum_fmeasure": 0.08880660587294974, "rougeLsum_fmeasure_stderr": 0.0016008461579977826, "rougeLsum_precision": 0.0602718294500663, "rougeLsum_precision_stderr": 0.0015666595879118396, "rougeLsum_recall": 0.27341706006086924, "rougeLsum_recall_stderr": 0.004362973739226787}}}, "GEM/wiki_lingua_en": {"0": {"tldr_en": {"bleu": 1.3117914364402146, "bleu_stderr": 0.038869790576114435, "rouge1_fmeasure": 0.17807050875857605, "rouge1_fmeasure_stderr": 0.0017262847277674262, "rouge1_precision": 0.15761536552810515, "rouge1_precision_stderr": 0.0018814472277496766, "rouge1_recall": 0.2507562802368397, "rouge1_recall_stderr": 0.0024139529147923916, "rouge2_fmeasure": 0.031784532049007205, "rouge2_fmeasure_stderr": 0.0007525512665802046, "rouge2_precision": 0.028135476713686543, "rouge2_precision_stderr": 0.0007093919394456972, "rouge2_recall": 0.045496948279537944, "rouge2_recall_stderr": 0.001187263127037588, "rougeL_fmeasure": 0.13329487015930067, "rougeL_fmeasure_stderr": 0.0011787943028081437, "rougeL_precision": 0.11693729790963828, "rougeL_precision_stderr": 0.0013004009830310601, "rougeL_recall": 0.19236030131954102, "rougeL_recall_stderr": 0.0018866755826042925, "rougeLsum_fmeasure": 0.1665727782234612, "rougeLsum_fmeasure_stderr": 0.0016051270891830533, "rougeLsum_precision": 0.1472752886707766, "rougeLsum_precision_stderr": 0.0017436582481876687, "rougeLsum_recall": 0.23501712267651972, "rougeLsum_recall_stderr": 0.002263064167528001}}, "1": {"tldr_en": {"bleu": 1.3893987808146016, "bleu_stderr": 0.046619139332931256, "rouge1_fmeasure": 0.17500708992775654, "rouge1_fmeasure_stderr": 0.0017745369150350312, "rouge1_precision": 0.15210581075864876, "rouge1_precision_stderr": 0.0018845369080012803, "rouge1_recall": 0.25067061371907867, "rouge1_recall_stderr": 0.0024637572297500332, "rouge2_fmeasure": 0.03024160289297904, "rouge2_fmeasure_stderr": 0.0007419991144241662, "rouge2_precision": 0.026734929684742795, "rouge2_precision_stderr": 0.0007423712339718682, "rouge2_recall": 0.043230755660788475, "rouge2_recall_stderr": 0.0011424532519919088, "rougeL_fmeasure": 0.1276978402771411, "rougeL_fmeasure_stderr": 0.0011737206412969644, "rougeL_precision": 0.10978966696756372, "rougeL_precision_stderr": 0.0012381033104423614, "rougeL_recall": 0.1878092392441192, "rougeL_recall_stderr": 0.0018886300525257722, "rougeLsum_fmeasure": 0.1642819979143946, "rougeLsum_fmeasure_stderr": 0.0016434232281316297, "rougeLsum_precision": 0.1426618162172562, "rougeLsum_precision_stderr": 0.0017500092638863146, "rougeLsum_recall": 0.23597158080838168, "rougeLsum_recall_stderr": 0.0023194696116163103}}, "2": {"tldr_en": {"bleu": 1.378473752158773, "bleu_stderr": 0.057852495901222094, "rouge1_fmeasure": 0.17636450230155626, "rouge1_fmeasure_stderr": 0.0017370013118141686, "rouge1_precision": 0.1531850945664608, "rouge1_precision_stderr": 0.0018597227456311947, "rouge1_recall": 0.25348348439962515, "rouge1_recall_stderr": 0.002462178569354662, "rouge2_fmeasure": 0.030130202799052323, "rouge2_fmeasure_stderr": 0.0007428576699961777, "rouge2_precision": 0.026242079658968805, "rouge2_precision_stderr": 0.0006882353494172086, "rouge2_recall": 0.04446587069132275, "rouge2_recall_stderr": 0.0012466905881978324, "rougeL_fmeasure": 0.12800719951389405, "rougeL_fmeasure_stderr": 0.0011698694600184394, "rougeL_precision": 0.11002572624270117, "rougeL_precision_stderr": 0.0012353961986536334, "rougeL_recall": 0.18870312855276158, "rougeL_recall_stderr": 0.0019042695681764173, "rougeLsum_fmeasure": 0.1661426728939587, "rougeLsum_fmeasure_stderr": 0.0016215271254943404, "rougeLsum_precision": 0.14429538631897362, "rougeLsum_precision_stderr": 0.0017452887830828809, "rougeLsum_recall": 0.23917835157226727, "rougeLsum_recall_stderr": 0.0023155547875129663}}, "3": {"tldr_en": {"bleu": 1.3665712885168313, "bleu_stderr": 0.05054625869004719, "rouge1_fmeasure": 0.14729757170773533, "rouge1_fmeasure_stderr": 0.001921372999251686, "rouge1_precision": 0.13208777853898093, "rouge1_precision_stderr": 0.002049533510538744, "rouge1_recall": 0.20982606982551938, "rouge1_recall_stderr": 0.002730280087040573, "rouge2_fmeasure": 0.02527715313239358, "rouge2_fmeasure_stderr": 0.0006840178036153184, "rouge2_precision": 0.022604523324713303, "rouge2_precision_stderr": 0.0006628695261598809, "rouge2_recall": 0.03683682005188189, "rouge2_recall_stderr": 0.0011310993219663713, "rougeL_fmeasure": 0.10852347778322055, "rougeL_fmeasure_stderr": 0.0013348050749369552, "rougeL_precision": 0.0967400800245236, "rougeL_precision_stderr": 0.0014580996289179358, "rougeL_recall": 0.15868318982503593, "rougeL_recall_stderr": 0.0021142856696907573, "rougeLsum_fmeasure": 0.1387267878852416, "rougeLsum_fmeasure_stderr": 0.001800410333906174, "rougeLsum_precision": 0.12435591014111426, "rougeLsum_precision_stderr": 0.0019259730066761248, "rougeLsum_recall": 0.19828906447162636, "rougeLsum_recall_stderr": 0.002593664148453481}}, "4": {"tldr_en": {"bleu": 0.28340415693385945, "bleu_stderr": 0.0209818630472089, "rouge1_fmeasure": 0.0480276943015109, "rouge1_fmeasure_stderr": 0.0016569473528718157, "rouge1_precision": 0.04463142890618361, "rouge1_precision_stderr": 0.001741701284188118, "rouge1_recall": 0.07144472650863519, "rouge1_recall_stderr": 0.00248572950199299, "rouge2_fmeasure": 0.008199708088564734, "rouge2_fmeasure_stderr": 0.0004484338719447807, "rouge2_precision": 0.008300974083624839, "rouge2_precision_stderr": 0.000734745003789633, "rouge2_recall": 0.01235281568058786, "rouge2_recall_stderr": 0.0007053407339613288, "rougeL_fmeasure": 0.035895809145146826, "rougeL_fmeasure_stderr": 0.0012083416287947339, "rougeL_precision": 0.03359100833320618, "rougeL_precision_stderr": 0.0013600937281266902, "rougeL_recall": 0.05476416358262262, "rougeL_recall_stderr": 0.00191385641317285, "rougeLsum_fmeasure": 0.045025784039735284, "rougeLsum_fmeasure_stderr": 0.0015522261133805648, "rougeLsum_precision": 0.04196635480525258, "rougeLsum_precision_stderr": 0.0016532961929028808, "rougeLsum_recall": 0.06713938944403022, "rougeLsum_recall_stderr": 0.00234026265815578}}, "5": {"tldr_en": {"bleu": 3.2595087887270587e-07, "bleu_stderr": 6.967113872587107e-07, "rouge1_fmeasure": 0.007874647645570177, "rouge1_fmeasure_stderr": 0.0007466280111597254, "rouge1_precision": 0.007542326247673535, "rouge1_precision_stderr": 0.0007723862076772796, "rouge1_recall": 0.011863074763917325, "rouge1_recall_stderr": 0.001132539384128502, "rouge2_fmeasure": 0.0012873383840521695, "rouge2_fmeasure_stderr": 0.00018108006559001093, "rouge2_precision": 0.0011764733594931802, "rouge2_precision_stderr": 0.00017931709496621105, "rouge2_recall": 0.0020717296094753803, "rouge2_recall_stderr": 0.0003549770069263661, "rougeL_fmeasure": 0.006013119766291975, "rougeL_fmeasure_stderr": 0.0005624055930450526, "rougeL_precision": 0.0057799238944086645, "rougeL_precision_stderr": 0.0005873935207958883, "rougeL_recall": 0.009182632940170325, "rougeL_recall_stderr": 0.0008882862728684434, "rougeLsum_fmeasure": 0.007421104556068207, "rougeLsum_fmeasure_stderr": 0.0007026171557778442, "rougeLsum_precision": 0.007126040645987352, "rougeLsum_precision_stderr": 0.0007301525918400855, "rougeLsum_recall": 0.011200742711953659, "rougeLsum_recall_stderr": 0.0010710809217114395}}}, "e2e_nlg_cleaned": {"0": {"generate_text_restaurant": {"bleu": 0.6349648152534446, "bleu_stderr": 0.03287841687909767, "rouge1_fmeasure": 0.127882949671846, "rouge1_fmeasure_stderr": 0.0016859147840992575, "rouge1_precision": 0.10936839240500393, "rouge1_precision_stderr": 0.001853248576660244, "rouge1_recall": 0.18609021982817653, "rouge1_recall_stderr": 0.0023119217691850223, "rouge2_fmeasure": 0.022375489011305233, "rouge2_fmeasure_stderr": 0.0005502378027146171, "rouge2_precision": 0.020158420153221522, "rouge2_precision_stderr": 0.000720482269270522, "rouge2_recall": 0.03367601166872375, "rouge2_recall_stderr": 0.0007843692042313422, "rougeL_fmeasure": 0.11889714073509729, "rougeL_fmeasure_stderr": 0.0015222587375517366, "rougeL_precision": 0.10121889590836736, "rougeL_precision_stderr": 0.0016667158118401093, "rougeL_recall": 0.1741639569635279, "rougeL_recall_stderr": 0.002146360636975801, "rougeLsum_fmeasure": 0.10970370438956814, "rougeLsum_fmeasure_stderr": 0.0014505319784011157, "rougeLsum_precision": 0.09427470423743937, "rougeLsum_precision_stderr": 0.0016340462445801774, "rougeLsum_recall": 0.1598170804332907, "rougeLsum_recall_stderr": 0.002008660791953932}}, "1": {"generate_text_restaurant": {"bleu": 7.775347596431306, "bleu_stderr": 0.14201764307377907, "rouge1_fmeasure": 0.34152125138620365, "rouge1_fmeasure_stderr": 0.0024578199246003667, "rouge1_precision": 0.40304491654710317, "rouge1_precision_stderr": 0.0029517497136412074, "rouge1_recall": 0.3439369425504163, "rouge1_recall_stderr": 0.002974280414062951, "rouge2_fmeasure": 0.13963411320567692, "rouge2_fmeasure_stderr": 0.0017257490421024213, "rouge2_precision": 0.1633721552550905, "rouge2_precision_stderr": 0.002241077608657423, "rouge2_recall": 0.1413817168022227, "rouge2_recall_stderr": 0.0018937783587177903, "rougeL_fmeasure": 0.25602284812965415, "rougeL_fmeasure_stderr": 0.0018872538891016328, "rougeL_precision": 0.30822312950006464, "rougeL_precision_stderr": 0.0024895369778903027, "rougeL_recall": 0.2575364034274509, "rougeL_recall_stderr": 0.0023188793139068695, "rougeLsum_fmeasure": 0.2875087311008179, "rougeLsum_fmeasure_stderr": 0.002180307359705676, "rougeLsum_precision": 0.34317247241797727, "rougeLsum_precision_stderr": 0.0027245390906056152, "rougeLsum_recall": 0.2890085409202441, "rougeLsum_recall_stderr": 0.0026211992163651422}}, "2": {"generate_text_restaurant": {"bleu": 8.702094051815235, "bleu_stderr": 0.11975726331361138, "rouge1_fmeasure": 0.35987913305354885, "rouge1_fmeasure_stderr": 0.0021061794635379052, "rouge1_precision": 0.4140327124495678, "rouge1_precision_stderr": 0.0028269003135836935, "rouge1_recall": 0.3574637310224284, "rouge1_recall_stderr": 0.0025778367619595055, "rouge2_fmeasure": 0.1566733423938655, "rouge2_fmeasure_stderr": 0.001627534629157482, "rouge2_precision": 0.1816443953116252, "rouge2_precision_stderr": 0.002127567008732353, "rouge2_recall": 0.1559138428565674, "rouge2_recall_stderr": 0.001788009969204406, "rougeL_fmeasure": 0.27267976272237104, "rougeL_fmeasure_stderr": 0.0017384350861842286, "rougeL_precision": 0.3164886128761678, "rougeL_precision_stderr": 0.0024168094444719833, "rougeL_recall": 0.27061716547750936, "rougeL_recall_stderr": 0.0020933632426403622, "rougeLsum_fmeasure": 0.3057513219565803, "rougeLsum_fmeasure_stderr": 0.0019480380614703004, "rougeLsum_precision": 0.3537045003109022, "rougeLsum_precision_stderr": 0.0026390939207046886, "rougeLsum_recall": 0.30330839840294527, "rougeLsum_recall_stderr": 0.0023300891867916954}}, "3": {"generate_text_restaurant": {"bleu": 8.935679704217065, "bleu_stderr": 0.12728512869296515, "rouge1_fmeasure": 0.3719952380037812, "rouge1_fmeasure_stderr": 0.001895641845233123, "rouge1_precision": 0.4248345122651244, "rouge1_precision_stderr": 0.002783517005751884, "rouge1_recall": 0.36560624271482706, "rouge1_recall_stderr": 0.0023273132754295356, "rouge2_fmeasure": 0.1625675970078791, "rouge2_fmeasure_stderr": 0.001540398050298542, "rouge2_precision": 0.18750571844872832, "rouge2_precision_stderr": 0.0020077556794866613, "rouge2_recall": 0.16033553932137123, "rouge2_recall_stderr": 0.001700539367107847, "rougeL_fmeasure": 0.27789708984208544, "rougeL_fmeasure_stderr": 0.001621043962846411, "rougeL_precision": 0.31863976423141305, "rougeL_precision_stderr": 0.002360130419869322, "rougeL_recall": 0.2731119319746551, "rougeL_recall_stderr": 0.0019331744010340963, "rougeLsum_fmeasure": 0.3134514438217236, "rougeLsum_fmeasure_stderr": 0.0017823914479985058, "rougeLsum_precision": 0.3587375972776091, "rougeLsum_precision_stderr": 0.002569555412372417, "rougeLsum_recall": 0.3079146926202692, "rougeLsum_recall_stderr": 0.00211481965225935}}, "4": {"generate_text_restaurant": {"bleu": 9.368981020287176, "bleu_stderr": 0.13397596203255155, "rouge1_fmeasure": 0.37894296212656386, "rouge1_fmeasure_stderr": 0.0019017120529326533, "rouge1_precision": 0.427231911938943, "rouge1_precision_stderr": 0.002806869876595068, "rouge1_recall": 0.3768853849161269, "rouge1_recall_stderr": 0.0023564283615197484, "rouge2_fmeasure": 0.16792240286212373, "rouge2_fmeasure_stderr": 0.001585522888624055, "rouge2_precision": 0.19151930055270516, "rouge2_precision_stderr": 0.0020762727111680148, "rouge2_recall": 0.1671448204016007, "rouge2_recall_stderr": 0.0017246392970299226, "rougeL_fmeasure": 0.27888587197458914, "rougeL_fmeasure_stderr": 0.0016561587306319508, "rougeL_precision": 0.31542100518136446, "rougeL_precision_stderr": 0.002374751033012953, "rougeL_recall": 0.2771968856765289, "rougeL_recall_stderr": 0.001959616373471557, "rougeLsum_fmeasure": 0.3190950210985673, "rougeLsum_fmeasure_stderr": 0.0018272379465288419, "rougeLsum_precision": 0.3607651533125225, "rougeLsum_precision_stderr": 0.002628321593864779, "rougeLsum_recall": 0.31688723659419427, "rougeLsum_recall_stderr": 0.0021617595289955575}}, "5": {"generate_text_restaurant": {"bleu": 9.251296218272744, "bleu_stderr": 0.12286805279115028, "rouge1_fmeasure": 0.3785615485407228, "rouge1_fmeasure_stderr": 0.001885716725079544, "rouge1_precision": 0.4214709488155353, "rouge1_precision_stderr": 0.002799583337729935, "rouge1_recall": 0.38092809398003713, "rouge1_recall_stderr": 0.002302274987523537, "rouge2_fmeasure": 0.1671450372115207, "rouge2_fmeasure_stderr": 0.0015360784681658307, "rouge2_precision": 0.18813178947437978, "rouge2_precision_stderr": 0.001991843712195447, "rouge2_recall": 0.1685061967640079, "rouge2_recall_stderr": 0.0016972094041887982, "rougeL_fmeasure": 0.27251086495483146, "rougeL_fmeasure_stderr": 0.0016098355652595167, "rougeL_precision": 0.30355254226041617, "rougeL_precision_stderr": 0.0022800526691148237, "rougeL_recall": 0.27507290296902576, "rougeL_recall_stderr": 0.0019329466500581356, "rougeLsum_fmeasure": 0.316120433201745, "rougeLsum_fmeasure_stderr": 0.0018097964539523662, "rougeLsum_precision": 0.3529888848313482, "rougeLsum_precision_stderr": 0.0026053569917332637, "rougeLsum_recall": 0.3176801179253521, "rougeLsum_recall_stderr": 0.0021230731446771904}}}, "gem_xsum": {"0": {"article_DOC_summary": {"bleu": 1.4498172161237914, "bleu_stderr": 0.08276334421676217, "rouge1_fmeasure": 0.1936785688948649, "rouge1_fmeasure_stderr": 0.0022448343696748335, "rouge1_precision": 0.1487106697446941, "rouge1_precision_stderr": 0.0021326115614756563, "rouge1_recall": 0.31515346654923965, "rouge1_recall_stderr": 0.003785942979337541, "rouge2_fmeasure": 0.036126066636598314, "rouge2_fmeasure_stderr": 0.0012555321275527444, "rouge2_precision": 0.027115888202821407, "rouge2_precision_stderr": 0.0010006343526686044, "rouge2_recall": 0.060564416076887555, "rouge2_recall_stderr": 0.0021141446634826053, "rougeL_fmeasure": 0.14451515271983895, "rougeL_fmeasure_stderr": 0.0016988930847425478, "rougeL_precision": 0.11068368046384064, "rougeL_precision_stderr": 0.0016013858263506605, "rougeL_recall": 0.23651818221742915, "rougeL_recall_stderr": 0.0029805154063543286, "rougeLsum_fmeasure": 0.1531837688337574, "rougeLsum_fmeasure_stderr": 0.00184135501071216, "rougeLsum_precision": 0.11693091578253577, "rougeLsum_precision_stderr": 0.0016744341950862681, "rougeLsum_recall": 0.25179832618018516, "rougeLsum_recall_stderr": 0.003306716543010649}}, "1": {"article_DOC_summary": {"bleu": 0.7730664035812526, "bleu_stderr": 0.09660177188089157, "rouge1_fmeasure": 0.15946390731049626, "rouge1_fmeasure_stderr": 0.002038313796166458, "rouge1_precision": 0.11392419556266271, "rouge1_precision_stderr": 0.0015555764519450581, "rouge1_recall": 0.2791607867661533, "rouge1_recall_stderr": 0.003444397914889341, "rouge2_fmeasure": 0.022429611615385675, "rouge2_fmeasure_stderr": 0.0009777840414160158, "rouge2_precision": 0.01588730539100541, "rouge2_precision_stderr": 0.0006981797018267417, "rouge2_recall": 0.0403648601213491, "rouge2_recall_stderr": 0.0017967749594685392, "rougeL_fmeasure": 0.12093891190492209, "rougeL_fmeasure_stderr": 0.0014832931572729734, "rougeL_precision": 0.08618486219524299, "rougeL_precision_stderr": 0.001120284336362133, "rougeL_recall": 0.21335087057436972, "rougeL_recall_stderr": 0.002626420813544325, "rougeLsum_fmeasure": 0.12882815359675798, "rougeLsum_fmeasure_stderr": 0.0016259049899977492, "rougeLsum_precision": 0.09176906663674463, "rougeLsum_precision_stderr": 0.0012216189004166276, "rougeLsum_recall": 0.22751366864906272, "rougeLsum_recall_stderr": 0.0029051984150897736}}, "2": {"article_DOC_summary": {"bleu": 0.7734568438683845, "bleu_stderr": 0.09465675139677089, "rouge1_fmeasure": 0.15646603684759688, "rouge1_fmeasure_stderr": 0.0019853411517890885, "rouge1_precision": 0.11107224063434518, "rouge1_precision_stderr": 0.0014917114236635345, "rouge1_recall": 0.2756739422379781, "rouge1_recall_stderr": 0.0033839183791268216, "rouge2_fmeasure": 0.021292811682357263, "rouge2_fmeasure_stderr": 0.0009852504582853296, "rouge2_precision": 0.01498382266066605, "rouge2_precision_stderr": 0.0006988231406148056, "rouge2_recall": 0.03839637372757491, "rouge2_recall_stderr": 0.0017968910329354752, "rougeL_fmeasure": 0.11913617795958936, "rougeL_fmeasure_stderr": 0.0014333144569125661, "rougeL_precision": 0.08440689353420024, "rougeL_precision_stderr": 0.001067645115096159, "rougeL_recall": 0.21106819156162235, "rougeL_recall_stderr": 0.0025206200209730257, "rougeLsum_fmeasure": 0.12696359950188804, "rougeLsum_fmeasure_stderr": 0.0016209530503488705, "rougeLsum_precision": 0.08998664439094586, "rougeLsum_precision_stderr": 0.0012071422128337866, "rougeLsum_recall": 0.22473096313724464, "rougeLsum_recall_stderr": 0.0028451219210215055}}, "3": {"article_DOC_summary": {"bleu": 0.8049127932899806, "bleu_stderr": 0.08999572400127122, "rouge1_fmeasure": 0.15050480696620397, "rouge1_fmeasure_stderr": 0.002245923779222128, "rouge1_precision": 0.10897279730905816, "rouge1_precision_stderr": 0.0017759004065014904, "rouge1_recall": 0.26038402313267656, "rouge1_recall_stderr": 0.003723420026493814, "rouge2_fmeasure": 0.021167446810664674, "rouge2_fmeasure_stderr": 0.0010290684365440164, "rouge2_precision": 0.015388816423807121, "rouge2_precision_stderr": 0.0007871454452462043, "rouge2_recall": 0.03672260583200518, "rouge2_recall_stderr": 0.0017743758343940175, "rougeL_fmeasure": 0.11505722067055536, "rougeL_fmeasure_stderr": 0.001666166199846471, "rougeL_precision": 0.0831498662555045, "rougeL_precision_stderr": 0.0013064720905479465, "rougeL_recall": 0.20027577798925122, "rougeL_recall_stderr": 0.0028563581467989313, "rougeLsum_fmeasure": 0.12206463232775676, "rougeLsum_fmeasure_stderr": 0.0018437180716233557, "rougeLsum_precision": 0.08815555951530589, "rougeLsum_precision_stderr": 0.0014283832242556695, "rougeLsum_recall": 0.2123687582228774, "rougeLsum_recall_stderr": 0.003146851802231491}}, "4": {"article_DOC_summary": {"bleu": 0.5060996776720558, "bleu_stderr": 0.14524459305217285, "rouge1_fmeasure": 0.0399627335027508, "rouge1_fmeasure_stderr": 0.0022407508420631025, "rouge1_precision": 0.033001039009821036, "rouge1_precision_stderr": 0.001984911598317947, "rouge1_recall": 0.06395810828466957, "rouge1_recall_stderr": 0.0036838268788019733, "rouge2_fmeasure": 0.006191903982694321, "rouge2_fmeasure_stderr": 0.0006692065594837194, "rouge2_precision": 0.004795603976232359, "rouge2_precision_stderr": 0.0005766690213353866, "rouge2_recall": 0.010428793445853507, "rouge2_recall_stderr": 0.0011448631084785205, "rougeL_fmeasure": 0.030706668830645573, "rougeL_fmeasure_stderr": 0.0016939631656992667, "rougeL_precision": 0.02578163296182598, "rougeL_precision_stderr": 0.0016046951439842626, "rougeL_recall": 0.04925282879879635, "rougeL_recall_stderr": 0.0028136569337347895, "rougeLsum_fmeasure": 0.03297771564483896, "rougeLsum_fmeasure_stderr": 0.0018356691307092102, "rougeLsum_precision": 0.027548269863277748, "rougeLsum_precision_stderr": 0.0017007061396866498, "rougeLsum_recall": 0.05292709977442769, "rougeLsum_recall_stderr": 0.0030508513315767424}}, "5": {"article_DOC_summary": {"bleu": 9.848915747734342e-39, "bleu_stderr": 1.2608627356057488e-33, "rouge1_fmeasure": 0.0020872492172107414, "rouge1_fmeasure_stderr": 0.0005596803323136037, "rouge1_precision": 0.0023321966082933427, "rouge1_precision_stderr": 0.0006339965279154461, "rouge1_recall": 0.001941759515769856, "rouge1_recall_stderr": 0.0005151769106853416, "rouge2_fmeasure": 3.8117019249094724e-05, "rouge2_fmeasure_stderr": 3.811701924909608e-05, "rouge2_precision": 4.2881646655231564e-05, "rouge2_precision_stderr": 4.288164665523092e-05, "rouge2_recall": 3.430531732418525e-05, "rouge2_recall_stderr": 3.430531732418596e-05, "rougeL_fmeasure": 0.0015005349761403517, "rougeL_fmeasure_stderr": 0.0004109172416804055, "rougeL_precision": 0.0016537139817559235, "rougeL_precision_stderr": 0.000456767026325873, "rougeL_recall": 0.00141925711028453, "rougeL_recall_stderr": 0.0003872429459323918, "rougeLsum_fmeasure": 0.001699768896392363, "rougeLsum_fmeasure_stderr": 0.0004586377575998677, "rougeLsum_precision": 0.0018847172795271397, "rougeLsum_precision_stderr": 0.0005162850895856037, "rougeLsum_recall": 0.001596113771235291, "rougeLsum_recall_stderr": 0.0004265754094508304}}}}
4b284b6bc4/evaluation/generation/merged.csv CHANGED
@@ -1 +1,53 @@
1
  dataset,fewshots,prompt,metric,value
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  dataset,fewshots,prompt,metric,value
2
+ e2e_nlg_cleaned,0,generate_text_restaurant,rouge2_fmeasure,0.019204879284699994
3
+ e2e_nlg_cleaned,0,median,rouge2_fmeasure,0.019204879284699994
4
+ e2e_nlg_cleaned,1,generate_text_restaurant,rouge2_fmeasure,0.16338292931453433
5
+ e2e_nlg_cleaned,1,median,rouge2_fmeasure,0.16338292931453433
6
+ e2e_nlg_cleaned,2,generate_text_restaurant,rouge2_fmeasure,0.1746241953539603
7
+ e2e_nlg_cleaned,2,median,rouge2_fmeasure,0.1746241953539603
8
+ e2e_nlg_cleaned,3,generate_text_restaurant,rouge2_fmeasure,0.17866109909559874
9
+ e2e_nlg_cleaned,3,median,rouge2_fmeasure,0.17866109909559874
10
+ e2e_nlg_cleaned,4,generate_text_restaurant,rouge2_fmeasure,0.17875573947192416
11
+ e2e_nlg_cleaned,4,median,rouge2_fmeasure,0.17875573947192416
12
+ e2e_nlg_cleaned,5,generate_text_restaurant,rouge2_fmeasure,0.1796078353066716
13
+ e2e_nlg_cleaned,5,median,rouge2_fmeasure,0.1796078353066716
14
+ e2e_nlg_cleaned,5,average,multiple,0.14903944630456484
15
+ gem_xsum,0,article_DOC_summary,rouge2_fmeasure,0.04546276335000291
16
+ gem_xsum,0,median,rouge2_fmeasure,0.04546276335000291
17
+ gem_xsum,1,article_DOC_summary,rouge2_fmeasure,0.032922044625399265
18
+ gem_xsum,1,median,rouge2_fmeasure,0.032922044625399265
19
+ gem_xsum,2,article_DOC_summary,rouge2_fmeasure,0.03002721161354905
20
+ gem_xsum,2,median,rouge2_fmeasure,0.03002721161354905
21
+ gem_xsum,3,article_DOC_summary,rouge2_fmeasure,0.026648660253339233
22
+ gem_xsum,3,median,rouge2_fmeasure,0.026648660253339233
23
+ gem_xsum,4,article_DOC_summary,rouge2_fmeasure,0.005235866586219103
24
+ gem_xsum,4,median,rouge2_fmeasure,0.005235866586219103
25
+ gem_xsum,5,article_DOC_summary,rouge2_fmeasure,0.00038400865766981235
26
+ gem_xsum,5,median,rouge2_fmeasure,0.00038400865766981235
27
+ gem_xsum,5,average,multiple,0.023446759181029897
28
+ web_nlg_en,0,PALM_prompt,rouge2_fmeasure,0.05256140915596622
29
+ web_nlg_en,0,median,rouge2_fmeasure,0.05256140915596622
30
+ web_nlg_en,1,PALM_prompt,rouge2_fmeasure,0.05300336339554896
31
+ web_nlg_en,1,median,rouge2_fmeasure,0.05300336339554896
32
+ web_nlg_en,2,PALM_prompt,rouge2_fmeasure,0.054398212153947176
33
+ web_nlg_en,2,median,rouge2_fmeasure,0.054398212153947176
34
+ web_nlg_en,3,PALM_prompt,rouge2_fmeasure,0.0540512381152691
35
+ web_nlg_en,3,median,rouge2_fmeasure,0.0540512381152691
36
+ web_nlg_en,4,PALM_prompt,rouge2_fmeasure,0.05558232740932497
37
+ web_nlg_en,4,median,rouge2_fmeasure,0.05558232740932497
38
+ web_nlg_en,5,PALM_prompt,rouge2_fmeasure,0.055498817735751244
39
+ web_nlg_en,5,median,rouge2_fmeasure,0.055498817735751244
40
+ web_nlg_en,5,average,multiple,0.05418256132763461
41
+ wiki_lingua_en,0,tldr_en,rouge2_fmeasure,0.03363388229490879
42
+ wiki_lingua_en,0,median,rouge2_fmeasure,0.03363388229490879
43
+ wiki_lingua_en,1,tldr_en,rouge2_fmeasure,0.040651231388572934
44
+ wiki_lingua_en,1,median,rouge2_fmeasure,0.040651231388572934
45
+ wiki_lingua_en,2,tldr_en,rouge2_fmeasure,0.04771532156348015
46
+ wiki_lingua_en,2,median,rouge2_fmeasure,0.04771532156348015
47
+ wiki_lingua_en,3,tldr_en,rouge2_fmeasure,0.03742000865261107
48
+ wiki_lingua_en,3,median,rouge2_fmeasure,0.03742000865261107
49
+ wiki_lingua_en,4,tldr_en,rouge2_fmeasure,0.011577372832860292
50
+ wiki_lingua_en,4,median,rouge2_fmeasure,0.011577372832860292
51
+ wiki_lingua_en,5,tldr_en,rouge2_fmeasure,0.0018806245405111135
52
+ wiki_lingua_en,5,median,rouge2_fmeasure,0.0018806245405111135
53
+ wiki_lingua_en,5,average,multiple,0.028813073545490724
4b284b6bc4/evaluation/generation/merged.json CHANGED
@@ -1 +1 @@
1
- {}
 
1
+ {"GEM/web_nlg_en": {"0": {"PALM_prompt": {"bleu": 0.3579887269792864, "bleu_stderr": 0.026290518420849302, "rouge1_fmeasure": 0.11160017527885356, "rouge1_fmeasure_stderr": 0.002086339624165716, "rouge1_precision": 0.07644779924173684, "rouge1_precision_stderr": 0.0019932242938081966, "rouge1_recall": 0.3140122825865788, "rouge1_recall_stderr": 0.00473174688731337, "rouge2_fmeasure": 0.05256140915596622, "rouge2_fmeasure_stderr": 0.0013357763866491004, "rouge2_precision": 0.036750727321617986, "rouge2_precision_stderr": 0.0014370097396917577, "rouge2_recall": 0.15168790833091741, "rouge2_recall_stderr": 0.00327314149676359, "rougeL_fmeasure": 0.10687286315069298, "rougeL_fmeasure_stderr": 0.0019210923887298273, "rougeL_precision": 0.07269010424631774, "rougeL_precision_stderr": 0.001804168947117905, "rougeL_recall": 0.3041843708869263, "rougeL_recall_stderr": 0.004620263011253791, "rougeLsum_fmeasure": 0.10569938398495801, "rougeLsum_fmeasure_stderr": 0.001946898177292707, "rougeLsum_precision": 0.07227657951942407, "rougeLsum_precision_stderr": 0.00185889534489746, "rougeLsum_recall": 0.2984357765608249, "rougeLsum_recall_stderr": 0.004438297298642072}}, "1": {"PALM_prompt": {"bleu": 0.4328473667055116, "bleu_stderr": 0.031563762685550605, "rouge1_fmeasure": 0.11463482618723982, "rouge1_fmeasure_stderr": 0.00197087410254774, "rouge1_precision": 0.0754889284841584, "rouge1_precision_stderr": 0.0016723482751950882, "rouge1_recall": 0.3568638973024857, "rouge1_recall_stderr": 0.005165847905772782, "rouge2_fmeasure": 0.05300336339554896, "rouge2_fmeasure_stderr": 0.0012527897061983034, "rouge2_precision": 0.034839983704867496, "rouge2_precision_stderr": 0.0010807913331387338, "rouge2_recall": 0.17043864684015358, "rouge2_recall_stderr": 0.0035541748574541812, "rougeL_fmeasure": 0.10863611949978016, "rougeL_fmeasure_stderr": 0.0018167063875338652, "rougeL_precision": 0.07141065068254121, "rougeL_precision_stderr": 0.0015343434909276881, "rougeL_recall": 0.3372654888293409, "rougeL_recall_stderr": 0.00480063635478594, "rougeLsum_fmeasure": 0.10860271374971142, "rougeLsum_fmeasure_stderr": 0.0018589526903866466, "rougeLsum_precision": 0.07156229719066953, "rougeLsum_precision_stderr": 0.0015834391719402176, "rougeLsum_recall": 0.3365254803044947, "rougeLsum_recall_stderr": 0.004741753523327779}}, "2": {"PALM_prompt": {"bleu": 0.5073706859748068, "bleu_stderr": 0.027251670973455218, "rouge1_fmeasure": 0.11731724091581561, "rouge1_fmeasure_stderr": 0.0018279205620823124, "rouge1_precision": 0.07477018632600506, "rouge1_precision_stderr": 0.0013374652593092398, "rouge1_recall": 0.3853613042032073, "rouge1_recall_stderr": 0.005084199730294221, "rouge2_fmeasure": 0.054398212153947176, "rouge2_fmeasure_stderr": 0.0011494317288632408, "rouge2_precision": 0.03450347196801284, "rouge2_precision_stderr": 0.0008069658633736817, "rouge2_recall": 0.18812965530793743, "rouge2_recall_stderr": 0.0036778797391502018, "rougeL_fmeasure": 0.1099832807060729, "rougeL_fmeasure_stderr": 0.0016704019125235296, "rougeL_precision": 0.07011638477370233, "rougeL_precision_stderr": 0.0012204936295153403, "rougeL_recall": 0.35849809185801473, "rougeL_recall_stderr": 0.0046303938730760524, "rougeLsum_fmeasure": 0.11132965753457637, "rougeLsum_fmeasure_stderr": 0.0017274518924526628, "rougeLsum_precision": 0.07103828558262268, "rougeLsum_precision_stderr": 0.0012708649576852631, "rougeLsum_recall": 0.36403900050794064, "rougeLsum_recall_stderr": 0.004706458824337533}}, "3": {"PALM_prompt": {"bleu": 0.5538460949994922, "bleu_stderr": 0.030619449581908684, "rouge1_fmeasure": 0.11752080822643646, "rouge1_fmeasure_stderr": 0.0017050984817057536, "rouge1_precision": 0.07426375527493968, "rouge1_precision_stderr": 0.0012328595556948653, "rouge1_recall": 0.3983512111896526, "rouge1_recall_stderr": 0.005149038831273021, "rouge2_fmeasure": 0.0540512381152691, "rouge2_fmeasure_stderr": 0.0010792706844501199, "rouge2_precision": 0.033961307188594206, "rouge2_precision_stderr": 0.0007463446959629993, "rouge2_recall": 0.1952126067967436, "rouge2_recall_stderr": 0.003765073237022532, "rougeL_fmeasure": 0.10991735862485745, "rougeL_fmeasure_stderr": 0.0015699707120844398, "rougeL_precision": 0.06949936881635287, "rougeL_precision_stderr": 0.0011319691770803394, "rougeL_recall": 0.37006814411969174, "rougeL_recall_stderr": 0.004674296339620147, "rougeLsum_fmeasure": 0.11131671255602878, "rougeLsum_fmeasure_stderr": 0.0016176588983060458, "rougeLsum_precision": 0.07044905911860123, "rougeLsum_precision_stderr": 0.001176713991457994, "rougeLsum_recall": 0.37587119622632176, "rougeLsum_recall_stderr": 0.004781764656765029}}, "4": {"PALM_prompt": {"bleu": 0.6169374982628473, "bleu_stderr": 0.028180215419142937, "rouge1_fmeasure": 0.12047953054602768, "rouge1_fmeasure_stderr": 0.001780264034076078, "rouge1_precision": 0.0767326292344124, "rouge1_precision_stderr": 0.0014332822108225588, "rouge1_recall": 0.4103530916408206, "rouge1_recall_stderr": 0.00509047707981648, "rouge2_fmeasure": 0.05558232740932497, "rouge2_fmeasure_stderr": 0.001135018703614793, "rouge2_precision": 0.03499884029225629, "rouge2_precision_stderr": 0.0008094705242409413, "rouge2_recall": 0.2026200591285694, "rouge2_recall_stderr": 0.003815007228485857, "rougeL_fmeasure": 0.11157234028587232, "rougeL_fmeasure_stderr": 0.0016088633608836555, "rougeL_precision": 0.07108155586037897, "rougeL_precision_stderr": 0.001313667101340276, "rougeL_recall": 0.3782120432625062, "rougeL_recall_stderr": 0.004617236907586467, "rougeLsum_fmeasure": 0.11358754564009298, "rougeLsum_fmeasure_stderr": 0.0016693342491592073, "rougeLsum_precision": 0.07245164237036843, "rougeLsum_precision_stderr": 0.0013644934676994799, "rougeLsum_recall": 0.38546769487210003, "rougeLsum_recall_stderr": 0.004699371603496402}}, "5": {"PALM_prompt": {"bleu": 0.632423656885609, "bleu_stderr": 0.028226340827669782, "rouge1_fmeasure": 0.11988252526283717, "rouge1_fmeasure_stderr": 0.0016490877395048458, "rouge1_precision": 0.07526495412948489, "rouge1_precision_stderr": 0.001204246999888188, "rouge1_recall": 0.42062114998353867, "rouge1_recall_stderr": 0.005033683779474927, "rouge2_fmeasure": 0.055498817735751244, "rouge2_fmeasure_stderr": 0.0010589061036010714, "rouge2_precision": 0.034675995706119275, "rouge2_precision_stderr": 0.0007442574236283384, "rouge2_recall": 0.20926298715883546, "rouge2_recall_stderr": 0.003683114100456131, "rougeL_fmeasure": 0.11135810184452252, "rougeL_fmeasure_stderr": 0.0015369060589554874, "rougeL_precision": 0.07006024451876049, "rougeL_precision_stderr": 0.0011240361260165595, "rougeL_recall": 0.38589026277969496, "rougeL_recall_stderr": 0.004490738431562092, "rougeLsum_fmeasure": 0.11309222791137312, "rougeLsum_fmeasure_stderr": 0.0015640021065237441, "rougeLsum_precision": 0.07114809662806877, "rougeLsum_precision_stderr": 0.0011495349005282837, "rougeLsum_recall": 0.3936098602902287, "rougeLsum_recall_stderr": 0.0045617868695433765}}}, "GEM/wiki_lingua_en": {"0": {"tldr_en": {"bleu": 1.479840115154661, "bleu_stderr": 0.06192138875344975, "rouge1_fmeasure": 0.1739448080653134, "rouge1_fmeasure_stderr": 0.0017812637598698018, "rouge1_precision": 0.14856509653732575, "rouge1_precision_stderr": 0.0018254841646619102, "rouge1_recall": 0.2530843717572759, "rouge1_recall_stderr": 0.002561813616922569, "rouge2_fmeasure": 0.03363388229490879, "rouge2_fmeasure_stderr": 0.0008046892555277539, "rouge2_precision": 0.028680910397583907, "rouge2_precision_stderr": 0.0007309262656611866, "rouge2_recall": 0.04964133355945333, "rouge2_recall_stderr": 0.0012722258808951824, "rougeL_fmeasure": 0.1356825751645128, "rougeL_fmeasure_stderr": 0.001248347814560266, "rougeL_precision": 0.11450942282690393, "rougeL_precision_stderr": 0.00125441708167449, "rougeL_recall": 0.20194947355319803, "rougeL_recall_stderr": 0.002036043567273002, "rougeLsum_fmeasure": 0.15980668292164993, "rougeLsum_fmeasure_stderr": 0.0016147589069422257, "rougeLsum_precision": 0.13625270653555904, "rougeLsum_precision_stderr": 0.001652617510622267, "rougeLsum_recall": 0.23341366110800826, "rougeLsum_recall_stderr": 0.002362328963636213}}, "1": {"tldr_en": {"bleu": 1.981909383109318, "bleu_stderr": 0.05332516266779298, "rouge1_fmeasure": 0.19417109314553332, "rouge1_fmeasure_stderr": 0.001883210730156311, "rouge1_precision": 0.16835595579581658, "rouge1_precision_stderr": 0.002031666300326454, "rouge1_recall": 0.2806656784585871, "rouge1_recall_stderr": 0.0027672810294863573, "rouge2_fmeasure": 0.040651231388572934, "rouge2_fmeasure_stderr": 0.000902016269913738, "rouge2_precision": 0.035233958419524904, "rouge2_precision_stderr": 0.0008649748434638386, "rouge2_recall": 0.06143446486171677, "rouge2_recall_stderr": 0.001577849822119161, "rougeL_fmeasure": 0.1387439887170405, "rougeL_fmeasure_stderr": 0.0012504777440338955, "rougeL_precision": 0.1193490581771219, "rougeL_precision_stderr": 0.0013674052231292708, "rougeL_recall": 0.20532600196964043, "rougeL_recall_stderr": 0.0021274206192724754, "rougeLsum_fmeasure": 0.18182771582932875, "rougeLsum_fmeasure_stderr": 0.0017585897563365122, "rougeLsum_precision": 0.15746346469094924, "rougeLsum_precision_stderr": 0.0019014604717123935, "rougeLsum_recall": 0.26376920789314967, "rougeLsum_recall_stderr": 0.0026265866392503625}}, "2": {"tldr_en": {"bleu": 2.3053974418740095, "bleu_stderr": 0.062242295007834764, "rouge1_fmeasure": 0.20692906953227413, "rouge1_fmeasure_stderr": 0.0018626056904197973, "rouge1_precision": 0.18001599549049535, "rouge1_precision_stderr": 0.002067162897845645, "rouge1_recall": 0.2980834661386306, "rouge1_recall_stderr": 0.002718524425472361, "rouge2_fmeasure": 0.04771532156348015, "rouge2_fmeasure_stderr": 0.0009515856581990172, "rouge2_precision": 0.04168050641100919, "rouge2_precision_stderr": 0.0009329665950699056, "rouge2_recall": 0.07039082545521896, "rouge2_recall_stderr": 0.0015628124045716759, "rougeL_fmeasure": 0.1467421325562737, "rougeL_fmeasure_stderr": 0.0012320807479736066, "rougeL_precision": 0.12674020027802077, "rougeL_precision_stderr": 0.0013962287618468083, "rougeL_recall": 0.21605276651528466, "rougeL_recall_stderr": 0.002081973016979306, "rougeLsum_fmeasure": 0.1942357263873664, "rougeLsum_fmeasure_stderr": 0.001742315830991206, "rougeLsum_precision": 0.16885381994157256, "rougeLsum_precision_stderr": 0.0019411704039521405, "rougeLsum_recall": 0.28047729608851496, "rougeLsum_recall_stderr": 0.002572231479292717}}, "3": {"tldr_en": {"bleu": 2.215156589671629, "bleu_stderr": 0.052535459933850885, "rouge1_fmeasure": 0.1696666381478316, "rouge1_fmeasure_stderr": 0.0021244410583731014, "rouge1_precision": 0.15364291614196499, "rouge1_precision_stderr": 0.002344969887146126, "rouge1_recall": 0.24376613072160627, "rouge1_recall_stderr": 0.003153183899770761, "rouge2_fmeasure": 0.03742000865261107, "rouge2_fmeasure_stderr": 0.0008889339032554892, "rouge2_precision": 0.03332167612450043, "rouge2_precision_stderr": 0.0008876153127825206, "rouge2_recall": 0.056658256520863745, "rouge2_recall_stderr": 0.001523337513038456, "rougeL_fmeasure": 0.12178749841451295, "rougeL_fmeasure_stderr": 0.0014680968157523093, "rougeL_precision": 0.11017417512546807, "rougeL_precision_stderr": 0.0017051424179066485, "rougeL_recall": 0.1791576899841882, "rougeL_recall_stderr": 0.0024356487242317496, "rougeLsum_fmeasure": 0.15898747044638634, "rougeLsum_fmeasure_stderr": 0.001985653665401712, "rougeLsum_precision": 0.14386673374882758, "rougeLsum_precision_stderr": 0.0022027329843523624, "rougeLsum_recall": 0.22930536403174942, "rougeLsum_recall_stderr": 0.00299463589962154}}, "4": {"tldr_en": {"bleu": 0.5131502979968497, "bleu_stderr": 0.03608605201687368, "rouge1_fmeasure": 0.053737140223689486, "rouge1_fmeasure_stderr": 0.0018027194165493082, "rouge1_precision": 0.049838643415747924, "rouge1_precision_stderr": 0.0018648700546221163, "rouge1_recall": 0.08127263942152925, "rouge1_recall_stderr": 0.002809816576781384, "rouge2_fmeasure": 0.011577372832860292, "rouge2_fmeasure_stderr": 0.0005741552055829621, "rouge2_precision": 0.010454163862870928, "rouge2_precision_stderr": 0.0006314369138935627, "rouge2_recall": 0.018556447747239968, "rouge2_recall_stderr": 0.0010227312175774936, "rougeL_fmeasure": 0.03964909379605471, "rougeL_fmeasure_stderr": 0.0013037832006282204, "rougeL_precision": 0.03679280580552151, "rougeL_precision_stderr": 0.0013808305653870323, "rougeL_recall": 0.06130836046537618, "rougeL_recall_stderr": 0.0021484750687546955, "rougeLsum_fmeasure": 0.05008741643259905, "rougeLsum_fmeasure_stderr": 0.0016808712625286486, "rougeLsum_precision": 0.04645621651758715, "rougeLsum_precision_stderr": 0.0017446445354945056, "rougeLsum_recall": 0.07583414283710693, "rougeLsum_recall_stderr": 0.0026217037949781957}}, "5": {"tldr_en": {"bleu": 1.248461374492507e-06, "bleu_stderr": 1.819428886922986e-06, "rouge1_fmeasure": 0.008601438929896699, "rouge1_fmeasure_stderr": 0.0008253043334091997, "rouge1_precision": 0.008297906112439548, "rouge1_precision_stderr": 0.000868651228038671, "rouge1_recall": 0.012867955004852301, "rouge1_recall_stderr": 0.0012711068547619942, "rouge2_fmeasure": 0.0018806245405111135, "rouge2_fmeasure_stderr": 0.0002580913633728736, "rouge2_precision": 0.0017481031836950151, "rouge2_precision_stderr": 0.0002752325580060782, "rouge2_recall": 0.002944671420000278, "rouge2_recall_stderr": 0.0004173458971296761, "rougeL_fmeasure": 0.006365417216740395, "rougeL_fmeasure_stderr": 0.0006037507741744518, "rougeL_precision": 0.006245166550957495, "rougeL_precision_stderr": 0.0006614482390967565, "rougeL_recall": 0.009611486285848429, "rougeL_recall_stderr": 0.0009555297578227913, "rougeLsum_fmeasure": 0.007901285151488387, "rougeLsum_fmeasure_stderr": 0.0007528984377933357, "rougeLsum_precision": 0.0076110422658591855, "rougeLsum_precision_stderr": 0.0007924852197189941, "rougeLsum_recall": 0.01188342258046937, "rougeLsum_recall_stderr": 0.001169956076721688}}}, "e2e_nlg_cleaned": {"0": {"generate_text_restaurant": {"bleu": 1.3188894749612563, "bleu_stderr": 0.044830637788985425, "rouge1_fmeasure": 0.09522733995608121, "rouge1_fmeasure_stderr": 0.0015050928361733534, "rouge1_precision": 0.12753556537606628, "rouge1_precision_stderr": 0.0029272372203605237, "rouge1_recall": 0.11864860513231083, "rouge1_recall_stderr": 0.0020905699726606005, "rouge2_fmeasure": 0.019204879284699994, "rouge2_fmeasure_stderr": 0.0006727499673039849, "rouge2_precision": 0.025675743778735428, "rouge2_precision_stderr": 0.001823036204109364, "rouge2_recall": 0.027052675155158974, "rouge2_recall_stderr": 0.0009673837604817755, "rougeL_fmeasure": 0.09185452447122286, "rougeL_fmeasure_stderr": 0.0014535063646756094, "rougeL_precision": 0.12067831736125931, "rougeL_precision_stderr": 0.0027766523050170443, "rougeL_recall": 0.11582003123688629, "rougeL_recall_stderr": 0.0020540885948462668, "rougeLsum_fmeasure": 0.08317942954808118, "rougeLsum_fmeasure_stderr": 0.0013322108791823542, "rougeLsum_precision": 0.1149608836927157, "rougeLsum_precision_stderr": 0.0028163032532190157, "rougeLsum_recall": 0.10237960786694723, "rougeLsum_recall_stderr": 0.0017967159967308072}}, "1": {"generate_text_restaurant": {"bleu": 8.268856250299471, "bleu_stderr": 0.10325577622421016, "rouge1_fmeasure": 0.38556499935543515, "rouge1_fmeasure_stderr": 0.002123280312714465, "rouge1_precision": 0.39554488581088626, "rouge1_precision_stderr": 0.0029681190545171763, "rouge1_recall": 0.43199771563925204, "rouge1_recall_stderr": 0.0028203929947275682, "rouge2_fmeasure": 0.16338292931453433, "rouge2_fmeasure_stderr": 0.0016456210441298405, "rouge2_precision": 0.16992099167287691, "rouge2_precision_stderr": 0.002094780486214531, "rouge2_recall": 0.1840776136278433, "rouge2_recall_stderr": 0.0019786136819072175, "rougeL_fmeasure": 0.27474972705805406, "rougeL_fmeasure_stderr": 0.0016803548099252608, "rougeL_precision": 0.28133109075015317, "rougeL_precision_stderr": 0.0023505923542842607, "rougeL_recall": 0.31196940533650747, "rougeL_recall_stderr": 0.0023918677481109705, "rougeLsum_fmeasure": 0.3161202268836757, "rougeLsum_fmeasure_stderr": 0.0019874232963612653, "rougeLsum_precision": 0.32483786204383086, "rougeLsum_precision_stderr": 0.002682647469519991, "rougeLsum_recall": 0.3546352816567669, "rougeLsum_recall_stderr": 0.0026072410079706235}}, "2": {"generate_text_restaurant": {"bleu": 8.752044150279037, "bleu_stderr": 0.17075525038552697, "rouge1_fmeasure": 0.38728648242815544, "rouge1_fmeasure_stderr": 0.0020736700072837727, "rouge1_precision": 0.3917573801974604, "rouge1_precision_stderr": 0.0031721059033143527, "rouge1_recall": 0.4480941133150411, "rouge1_recall_stderr": 0.002696304554783389, "rouge2_fmeasure": 0.1746241953539603, "rouge2_fmeasure_stderr": 0.0016245037242248635, "rouge2_precision": 0.1794594258413998, "rouge2_precision_stderr": 0.0022089039588187673, "rouge2_recall": 0.20362379083864202, "rouge2_recall_stderr": 0.0020059762697191225, "rougeL_fmeasure": 0.2888107866114557, "rougeL_fmeasure_stderr": 0.0016557523363002642, "rougeL_precision": 0.28989350350875787, "rougeL_precision_stderr": 0.002475041036978829, "rougeL_recall": 0.34036245653680275, "rougeL_recall_stderr": 0.0024309386217453996, "rougeLsum_fmeasure": 0.3205192518960678, "rougeLsum_fmeasure_stderr": 0.001992166841602722, "rougeLsum_precision": 0.3243897832360314, "rougeLsum_precision_stderr": 0.0028658165108453233, "rougeLsum_recall": 0.3715817073712579, "rougeLsum_recall_stderr": 0.002575962312302599}}, "3": {"generate_text_restaurant": {"bleu": 9.215917819944979, "bleu_stderr": 0.16750978186871135, "rouge1_fmeasure": 0.39498047709327316, "rouge1_fmeasure_stderr": 0.001995573628273643, "rouge1_precision": 0.3934162440731124, "rouge1_precision_stderr": 0.002870244512079275, "rouge1_recall": 0.45219902355874647, "rouge1_recall_stderr": 0.0026418133009258812, "rouge2_fmeasure": 0.17866109909559874, "rouge2_fmeasure_stderr": 0.0016358926141113962, "rouge2_precision": 0.17874070190714092, "rouge2_precision_stderr": 0.001975746196223423, "rouge2_recall": 0.20680510069987115, "rouge2_recall_stderr": 0.0020223490797835085, "rougeL_fmeasure": 0.29363065430769997, "rougeL_fmeasure_stderr": 0.0016676977783533096, "rougeL_precision": 0.290448701839473, "rougeL_precision_stderr": 0.002253601724914918, "rougeL_recall": 0.3411730757008735, "rougeL_recall_stderr": 0.0024103631845953434, "rougeLsum_fmeasure": 0.32908024305027606, "rougeLsum_fmeasure_stderr": 0.0019846013693823068, "rougeLsum_precision": 0.3277880264847081, "rougeLsum_precision_stderr": 0.00263580422255914, "rougeLsum_recall": 0.37671227249787725, "rougeLsum_recall_stderr": 0.0025378175774952855}}, "4": {"generate_text_restaurant": {"bleu": 9.29752714617784, "bleu_stderr": 0.1812520347088303, "rouge1_fmeasure": 0.3976120017477611, "rouge1_fmeasure_stderr": 0.001957196228653569, "rouge1_precision": 0.3936925579222371, "rouge1_precision_stderr": 0.0027027511186824126, "rouge1_recall": 0.4504460639681705, "rouge1_recall_stderr": 0.002571460027706377, "rouge2_fmeasure": 0.17875573947192416, "rouge2_fmeasure_stderr": 0.0016236641548062869, "rouge2_precision": 0.17770870518037865, "rouge2_precision_stderr": 0.0019011602774487565, "rouge2_recall": 0.20457826240041674, "rouge2_recall_stderr": 0.0020025597786927523, "rougeL_fmeasure": 0.2928958152017805, "rougeL_fmeasure_stderr": 0.0016441524187754983, "rougeL_precision": 0.2884163348982613, "rougeL_precision_stderr": 0.0021258462775742035, "rougeL_recall": 0.33596502253160176, "rougeL_recall_stderr": 0.0023345162307640745, "rougeLsum_fmeasure": 0.3318533780355719, "rougeLsum_fmeasure_stderr": 0.0019701005892539393, "rougeLsum_precision": 0.3286132019532869, "rougeLsum_precision_stderr": 0.0025120908786034103, "rougeLsum_recall": 0.375851104366494, "rougeLsum_recall_stderr": 0.0024995842860145783}}, "5": {"generate_text_restaurant": {"bleu": 9.362612934017736, "bleu_stderr": 0.10855778575738645, "rouge1_fmeasure": 0.3990120255227092, "rouge1_fmeasure_stderr": 0.0019178676126783694, "rouge1_precision": 0.3932319285778918, "rouge1_precision_stderr": 0.002548301621920845, "rouge1_recall": 0.4510310118426822, "rouge1_recall_stderr": 0.002590862288359499, "rouge2_fmeasure": 0.1796078353066716, "rouge2_fmeasure_stderr": 0.0016133475250028491, "rouge2_precision": 0.17695121418361603, "rouge2_precision_stderr": 0.001812178520561456, "rouge2_recall": 0.20535927412310515, "rouge2_recall_stderr": 0.002007657859036802, "rougeL_fmeasure": 0.29339815308032224, "rougeL_fmeasure_stderr": 0.0016446577983633287, "rougeL_precision": 0.2876654999305798, "rougeL_precision_stderr": 0.0020218264371153357, "rougeL_recall": 0.3353144854132937, "rougeL_recall_stderr": 0.0023517253283072198, "rougeLsum_fmeasure": 0.33456104383687457, "rougeLsum_fmeasure_stderr": 0.0019432720388950727, "rougeLsum_precision": 0.32975716195855664, "rougeLsum_precision_stderr": 0.0024048861669270427, "rougeLsum_recall": 0.3783760396312408, "rougeLsum_recall_stderr": 0.0025325615406918676}}}, "gem_xsum": {"0": {"article_DOC_summary": {"bleu": 1.768869507594394, "bleu_stderr": 0.08294604134047384, "rouge1_fmeasure": 0.2094281184112181, "rouge1_fmeasure_stderr": 0.0025790884426142523, "rouge1_precision": 0.1617749224002556, "rouge1_precision_stderr": 0.00237970226939903, "rouge1_recall": 0.335879396267748, "rouge1_recall_stderr": 0.0042531362256949, "rouge2_fmeasure": 0.04546276335000291, "rouge2_fmeasure_stderr": 0.0016156509146664276, "rouge2_precision": 0.03504593519505288, "rouge2_precision_stderr": 0.0013848794166905064, "rouge2_recall": 0.07425434107051126, "rouge2_recall_stderr": 0.002587335093539296, "rougeL_fmeasure": 0.15494261699819858, "rougeL_fmeasure_stderr": 0.002017114732994929, "rougeL_precision": 0.12002490384443826, "rougeL_precision_stderr": 0.0019143143405997343, "rougeL_recall": 0.24893620991791676, "rougeL_recall_stderr": 0.0033039443386605792, "rougeLsum_fmeasure": 0.16626632596350763, "rougeLsum_fmeasure_stderr": 0.0022125352263972875, "rougeLsum_precision": 0.12816677371397414, "rougeLsum_precision_stderr": 0.0019982013307588676, "rougeLsum_recall": 0.2682403202658057, "rougeLsum_recall_stderr": 0.003730829120509885}}, "1": {"article_DOC_summary": {"bleu": 1.264412591105362, "bleu_stderr": 0.058305941586244456, "rouge1_fmeasure": 0.17646870602303902, "rouge1_fmeasure_stderr": 0.002449669051301199, "rouge1_precision": 0.12542707936195607, "rouge1_precision_stderr": 0.0018304227801297888, "rouge1_recall": 0.30968982238658754, "rouge1_recall_stderr": 0.004101305886296608, "rouge2_fmeasure": 0.032922044625399265, "rouge2_fmeasure_stderr": 0.001308615681086968, "rouge2_precision": 0.02318241288852908, "rouge2_precision_stderr": 0.0009280565789217983, "rouge2_recall": 0.05908257096692382, "rouge2_recall_stderr": 0.002371758279207681, "rougeL_fmeasure": 0.13369172824813308, "rougeL_fmeasure_stderr": 0.0017713795391967972, "rougeL_precision": 0.0948327244688056, "rougeL_precision_stderr": 0.0013161667351585, "rougeL_recall": 0.23622628224705486, "rougeL_recall_stderr": 0.003080369698520929, "rougeLsum_fmeasure": 0.14119956709209672, "rougeLsum_fmeasure_stderr": 0.0019694996230184697, "rougeLsum_precision": 0.10008486714316106, "rougeLsum_precision_stderr": 0.0014521255501553532, "rougeLsum_recall": 0.24979473984092346, "rougeLsum_recall_stderr": 0.003444251234794886}}, "2": {"article_DOC_summary": {"bleu": 1.0987519725426753, "bleu_stderr": 0.03522818842665886, "rouge1_fmeasure": 0.16824394153138908, "rouge1_fmeasure_stderr": 0.0023490737769116094, "rouge1_precision": 0.11927780806435477, "rouge1_precision_stderr": 0.0017463880188515342, "rouge1_recall": 0.2969707610281187, "rouge1_recall_stderr": 0.004016473347933986, "rouge2_fmeasure": 0.03002721161354905, "rouge2_fmeasure_stderr": 0.0012316383364728099, "rouge2_precision": 0.021171125258080348, "rouge2_precision_stderr": 0.0008758008694717519, "rouge2_recall": 0.05384967895719866, "rouge2_recall_stderr": 0.002250067986717671, "rougeL_fmeasure": 0.13066203951989017, "rougeL_fmeasure_stderr": 0.001754156774657369, "rougeL_precision": 0.09253346970549631, "rougeL_precision_stderr": 0.001298228053049304, "rougeL_recall": 0.2315438648892566, "rougeL_recall_stderr": 0.003076756651824853, "rougeLsum_fmeasure": 0.13477311818536933, "rougeLsum_fmeasure_stderr": 0.001925520917624478, "rougeLsum_precision": 0.09538915399787434, "rougeLsum_precision_stderr": 0.001420693211469294, "rougeLsum_recall": 0.23912980735259662, "rougeLsum_recall_stderr": 0.00337672312378502}}, "3": {"article_DOC_summary": {"bleu": 0.997414867115136, "bleu_stderr": 0.05869103709177821, "rouge1_fmeasure": 0.15664009606941512, "rouge1_fmeasure_stderr": 0.0023702211117633955, "rouge1_precision": 0.11322746770756971, "rouge1_precision_stderr": 0.0018344350541308469, "rouge1_recall": 0.2726532450488621, "rouge1_recall_stderr": 0.004047751477113823, "rouge2_fmeasure": 0.026648660253339233, "rouge2_fmeasure_stderr": 0.0011756202673366485, "rouge2_precision": 0.01909537949600744, "rouge2_precision_stderr": 0.0008844078702649576, "rouge2_recall": 0.04741007757753075, "rouge2_recall_stderr": 0.0021059105310903435, "rougeL_fmeasure": 0.12256309804036948, "rougeL_fmeasure_stderr": 0.0017906867596916383, "rougeL_precision": 0.08861904770272214, "rougeL_precision_stderr": 0.0014108872664219377, "rougeL_recall": 0.21417348286977264, "rougeL_recall_stderr": 0.003130791990726019, "rougeLsum_fmeasure": 0.12555539609750047, "rougeLsum_fmeasure_stderr": 0.0019248864380343744, "rougeLsum_precision": 0.09067669669398595, "rougeLsum_precision_stderr": 0.0014934281129849842, "rougeLsum_recall": 0.21981690830637074, "rougeLsum_recall_stderr": 0.0033769616222655936}}, "4": {"article_DOC_summary": {"bleu": 0.3595681922766225, "bleu_stderr": 0.08302724390926437, "rouge1_fmeasure": 0.03912775891417527, "rouge1_fmeasure_stderr": 0.0022480249298505086, "rouge1_precision": 0.03336165212470275, "rouge1_precision_stderr": 0.0021721323035229496, "rouge1_recall": 0.06236797991424603, "rouge1_recall_stderr": 0.0036619285809241045, "rouge2_fmeasure": 0.005235866586219103, "rouge2_fmeasure_stderr": 0.0006173680600711812, "rouge2_precision": 0.003971072059610982, "rouge2_precision_stderr": 0.00047320081741051575, "rouge2_recall": 0.008875812344588924, "rouge2_recall_stderr": 0.0010797839682602972, "rougeL_fmeasure": 0.03069726623941213, "rougeL_fmeasure_stderr": 0.001712938549346239, "rougeL_precision": 0.026450065274545246, "rougeL_precision_stderr": 0.0017610900025980948, "rougeL_recall": 0.04941272986819276, "rougeL_recall_stderr": 0.0028669831282231704, "rougeLsum_fmeasure": 0.03131245006422282, "rougeLsum_fmeasure_stderr": 0.0017856839944735646, "rougeLsum_precision": 0.02698730446907224, "rougeLsum_precision_stderr": 0.001808112917392319, "rougeLsum_recall": 0.05033676703226922, "rougeLsum_recall_stderr": 0.0029803792624128944}}, "5": {"article_DOC_summary": {"bleu": 2.509258762242429e-38, "bleu_stderr": 9.767050327558104e-33, "rouge1_fmeasure": 0.0024515154297121964, "rouge1_fmeasure_stderr": 0.0006528237081721469, "rouge1_precision": 0.002809717218415156, "rouge1_precision_stderr": 0.0007799122566045872, "rouge1_recall": 0.002297493928770826, "rouge1_recall_stderr": 0.0006137569684554244, "rouge2_fmeasure": 0.00038400865766981235, "rouge2_fmeasure_stderr": 0.0002420252471501388, "rouge2_precision": 0.00044803756323120775, "rouge2_precision_stderr": 0.0002593397015980247, "rouge2_recall": 0.000357347055460263, "rouge2_recall_stderr": 0.00024108107952276452, "rougeL_fmeasure": 0.0019564763828564848, "rougeL_fmeasure_stderr": 0.0005370357747414661, "rougeL_precision": 0.002223032693562514, "rougeL_precision_stderr": 0.0006311357223918826, "rougeL_recall": 0.0018590186982094326, "rougeL_recall_stderr": 0.0005204880276058125, "rougeLsum_fmeasure": 0.0019564763828564848, "rougeLsum_fmeasure_stderr": 0.0005370357747414661, "rougeLsum_precision": 0.002223032693562514, "rougeLsum_precision_stderr": 0.0006311357223918826, "rougeLsum_recall": 0.0018590186982094326, "rougeLsum_recall_stderr": 0.0005204880276058125}}}}