boyiwei commited on
Commit
c94c38d
1 Parent(s): 35e9319
app.py CHANGED
@@ -27,9 +27,13 @@ def baseline_load_data(model, dataset, setting, criteria):
27
  df = pd.read_csv(file_path)
28
 
29
  # we only want specific columns and in a specific order
30
- column_names = ["model_name","method","rouge1","rougeL","semantic_sim","LCS(character)","LCS(word)","ACS(word)","Levenshtein Distance","Minhash Similarity"
 
 
 
 
 
31
  ]
32
-
33
  df = df[column_names]
34
 
35
  return df
 
27
  df = pd.read_csv(file_path)
28
 
29
  # we only want specific columns and in a specific order
30
+ if dataset == 'news':
31
+ column_names = ["model_name","method","rouge1","rougeL","semantic_sim","LCS(character)","LCS(word)","ACS(word)","Levenshtein Distance","Minhash Similarity",
32
+ "MMLU","MT-Bench","Blocklisted F1","In-Domain F1","Efficiency"]
33
+ elif dataset == 'books':
34
+ column_names = ["model_name","method","bleu","rouge1","rougeL","semantic_sim","LCS(character)","LCS(word)","ACS(word)","Levenshtein Distance","Minhash Similarity",
35
+ "MMLU","MT-Bench","Blocklisted rougeL","In-Domain rougeL","Efficiency"
36
  ]
 
37
  df = df[column_names]
38
 
39
  return df
versions/dbrx_books_rag_max.csv DELETED
@@ -1,4 +0,0 @@
1
- model_name,method,rouge1,rougeL,semantic_sim,LCS(character),LCS(word),ACS(word),Levenshtein Distance,Minhash Similarity
2
- dbrx_books_rag,vanilla,0.9745454545454546,0.9745454545454546,0.9914453029632568,823.0,177.0,177.0,1516.0,0.96875
3
- dbrx_books_rag,sys_prompt_bing,0.9867549668874172,0.9867549668874172,0.995707392692566,823.0,174.0,174.0,1335.0,0.96875
4
- dbrx_books_rag,sys_prompt_dbrx,0.9745454545454546,0.9745454545454546,0.9914453029632568,823.0,174.0,174.0,1515.0,0.96875
 
 
 
 
 
versions/dbrx_books_rag_mean.csv DELETED
@@ -1,4 +0,0 @@
1
- model_name,method,rouge1,rougeL,semantic_sim,LCS(character),LCS(word),ACS(word),Levenshtein Distance,Minhash Similarity
2
- dbrx_books_rag,vanilla,0.5689588316916804,0.5020024485293773,0.8000551287829876,278.262,69.392,88.592,802.502,0.4655
3
- dbrx_books_rag,sys_prompt_bing,0.5803775846370155,0.5160356258645495,0.8155042352676392,289.99,72.13,91.784,795.454,0.4735625
4
- dbrx_books_rag,sys_prompt_dbrx,0.5515556273336947,0.4784302702378168,0.7829765513241291,263.028,66.356,83.424,821.688,0.44559375
 
 
 
 
 
versions/dbrx_news_rag_max.csv CHANGED
@@ -1,4 +1,4 @@
1
- model_name,method,rouge1,rougeL,semantic_sim,LCS(character),LCS(word),ACS(word),Levenshtein Distance,Minhash Similarity
2
- dbrx_news_rag,vanilla,0.9972144846796658,0.9972144846796658,0.998486876487732,967.0,178.0,178.0,12800.0,0.953125
3
- dbrx_news_rag,sys_prompt_bing,0.9972144846796658,0.9972144846796658,0.9991275668144226,967.0,178.0,178.0,12800.0,0.9453125
4
- dbrx_news_rag,sys_prompt_dbrx,0.9972144846796658,0.9972144846796658,0.998486876487732,967.0,178.0,178.0,12799.0,0.96875
 
1
+ model_name,method,rouge1,rougeL,semantic_sim,LCS(character),LCS(word),ACS(word),Levenshtein Distance,Minhash Similarity,MMLU,MT-Bench,Blocklisted F1,In-Domain F1,Efficiency
2
+ dbrx_news_rag,vanilla,0.9972144846796658,0.9972144846796658,0.998486876487732,967.0,178.0,178.0,12800.0,0.953125,0.745,7.9,0.632,0.656,1.00
3
+ dbrx_news_rag,sys_prompt_bing,0.9972144846796658,0.9972144846796658,0.9991275668144226,967.0,178.0,178.0,12800.0,0.9453125,0.746,7.8,0.617,0.653,1.00
4
+ dbrx_news_rag,sys_prompt_dbrx,0.9972144846796658,0.9972144846796658,0.998486876487732,967.0,178.0,178.0,12799.0,0.96875,0.741,7.9,0.625,0.657,1.00
versions/dbrx_news_rag_mean.csv CHANGED
@@ -1,4 +1,4 @@
1
- model_name,method,rouge1,rougeL,semantic_sim,LCS(character),LCS(word),ACS(word),Levenshtein Distance,Minhash Similarity
2
- dbrx_news_rag,vanilla,0.7815191764156546,0.7299267357678937,0.8947334303073585,385.436,60.325,111.931,394.9,0.6435390625
3
- dbrx_news_rag,sys_prompt_bing,0.7812955037322756,0.7315094046904135,0.8909121553320438,392.926,62.206,111.914,417.025,0.6446484375
4
- dbrx_news_rag,sys_prompt_dbrx,0.7358662775642347,0.6729126845545748,0.8672493962747976,347.785,56.134,101.369,455.369,0.5995546875
 
1
+ model_name,method,rouge1,rougeL,semantic_sim,LCS(character),LCS(word),ACS(word),Levenshtein Distance,Minhash Similarity,MMLU,MT-Bench,Blocklisted F1,In-Domain F1,Efficiency
2
+ dbrx_news_rag,vanilla,0.7815191764156546,0.7299267357678937,0.8947334303073585,385.436,60.325,111.931,394.9,0.6435390625,0.745,7.9,0.632,0.656,1.00
3
+ dbrx_news_rag,sys_prompt_bing,0.7812955037322756,0.7315094046904135,0.8909121553320438,392.926,62.206,111.914,417.025,0.6446484375,0.746,7.8,0.617,0.653,1.00
4
+ dbrx_news_rag,sys_prompt_dbrx,0.7358662775642347,0.6729126845545748,0.8672493962747976,347.785,56.134,101.369,455.369,0.5995546875,0.741,7.9,0.625,0.657,1.00
versions/llama2-70b-chat-hf_books_rag_max.csv CHANGED
@@ -1,5 +1,5 @@
1
- model_name,method,rouge1,rougeL,semantic_sim,LCS(character),LCS(word),ACS(word),Levenshtein Distance,Minhash Similarity
2
- llama2-70b-chat-hf_books_rag,vanilla,0.99009900990099,0.99009900990099,0.9815567135810852,741.0,160.0,160.0,1451.0,0.984375
3
- llama2-70b-chat-hf_books_rag,sys_prompt_bing,0.99009900990099,0.99009900990099,0.9815567135810852,741.0,160.0,160.0,1513.0,0.984375
4
- llama2-70b-chat-hf_books_rag,top_k_3,0.84251968503937,0.8346456692913385,0.9569202065467834,695.0,158.0,158.0,1577.0,0.796875
5
- llama2-70b-chat-hf_books_rag,memfree_6,0.8925081433224755,0.8794788273615635,0.9494256973266602,349.0,71.0,140.0,1506.0,0.78125
 
1
+ model_name,method,rouge1,rougeL,semantic_sim,LCS(character),LCS(word),ACS(word),Levenshtein Distance,Minhash Similarity,MMLU,MT-Bench,Blocklisted rougeL,In-Domain rougeL,Efficiency
2
+ llama2-70b-chat-hf_books_rag,vanilla,0.99009900990099,0.99009900990099,0.9815567135810852,741.0,160.0,160.0,1451.0,0.984375,0.619,7.1,0.156,0.161,1.00
3
+ llama2-70b-chat-hf_books_rag,sys_prompt_bing,0.99009900990099,0.99009900990099,0.9815567135810852,741.0,160.0,160.0,1513.0,0.984375,0.614,7.2,0.136,0.144,1.00
4
+ llama2-70b-chat-hf_books_rag,top_k_3,0.84251968503937,0.8346456692913385,0.9569202065467834,695.0,158.0,158.0,1577.0,0.796875,0.361,4.8,0.145,0.146,0.99
5
+ llama2-70b-chat-hf_books_rag,memfree_6,0.8925081433224755,0.8794788273615635,0.9494256973266602,349.0,71.0,140.0,1506.0,0.78125,0.619,6.6,0.152,0.160,0.99
versions/llama2-70b-chat-hf_books_rag_mean.csv CHANGED
@@ -1,5 +1,5 @@
1
- model_name,method,rouge1,rougeL,semantic_sim,LCS(character),LCS(word),ACS(word),Levenshtein Distance,Minhash Similarity
2
- llama2-70b-chat-hf_books_rag,vanilla,0.5732462408113856,0.5222700156352323,0.7994520317018032,296.512,82.92,91.548,834.408,0.4805625
3
- llama2-70b-chat-hf_books_rag,sys_prompt_bing,0.5407283889959525,0.4833723437140989,0.7517191929742694,259.41,72.89,82.328,864.392,0.4486875
4
- llama2-70b-chat-hf_books_rag,top_k_3,0.3850462334392186,0.29023563295375826,0.6614945132676512,89.078,21.934,37.81,990.548,0.294453125
5
- llama2-70b-chat-hf_books_rag,memfree_6,0.4954678289158522,0.41822885849951075,0.7470789320915937,61.16,13.412,65.04,908.816,0.383328125
 
1
+ model_name,method,rouge1,rougeL,semantic_sim,LCS(character),LCS(word),ACS(word),Levenshtein Distance,Minhash Similarity,MMLU,MT-Bench,Blocklisted rougeL,In-Domain rougeL,Efficiency
2
+ llama2-70b-chat-hf_books_rag,vanilla,0.5732462408113856,0.5222700156352323,0.7994520317018032,296.512,82.92,91.548,834.408,0.4805625,0.619,7.1,0.156,0.161,1.00
3
+ llama2-70b-chat-hf_books_rag,sys_prompt_bing,0.5407283889959525,0.4833723437140989,0.7517191929742694,259.41,72.89,82.328,864.392,0.4486875,0.614,7.2,0.136,0.144,1.00
4
+ llama2-70b-chat-hf_books_rag,top_k_3,0.3850462334392186,0.29023563295375826,0.6614945132676512,89.078,21.934,37.81,990.548,0.294453125,0.361,4.8,0.145,0.146,0.99
5
+ llama2-70b-chat-hf_books_rag,memfree_6,0.4954678289158522,0.41822885849951075,0.7470789320915937,61.16,13.412,65.04,908.816,0.383328125,0.619,6.6,0.152,0.160,0.99
versions/llama2-70b-chat-hf_news_rag_max.csv CHANGED
@@ -1,5 +1,5 @@
1
- model_name,method,rouge1,rougeL,semantic_sim,LCS(character),LCS(word),ACS(word),Levenshtein Distance,Minhash Similarity
2
- llama2-70b-chat-hf_news_rag,vanilla,0.9583333333333334,0.9583333333333334,0.9949228167533876,771.0,159.0,159.0,1155.0,0.953125
3
- llama2-70b-chat-hf_news_rag,sys_prompt_bing,0.9583333333333334,0.9583333333333334,0.9949228167533876,774.0,157.0,157.0,981.0,0.9609375
4
- llama2-70b-chat-hf_news_rag,top_k_3,0.9207317073170732,0.9207317073170732,0.9887909293174744,734.0,151.0,151.0,1048.0,0.953125
5
- llama2-70b-chat-hf_news_rag,memfree_6,0.9042904290429044,0.8844884488448846,0.97174334526062,659.0,120.0,134.0,1155.0,0.8203125
 
1
+ model_name,method,rouge1,rougeL,semantic_sim,LCS(character),LCS(word),ACS(word),Levenshtein Distance,Minhash Similarity,MMLU,MT-Bench,Blocklisted F1,In-Domain F1,Efficiency
2
+ llama2-70b-chat-hf_news_rag,vanilla,0.9583333333333334,0.9583333333333334,0.9949228167533876,771.0,159.0,159.0,1155.0,0.953125,0.619,7.1,0.595,0.624,1.00
3
+ llama2-70b-chat-hf_news_rag,sys_prompt_bing,0.9583333333333334,0.9583333333333334,0.9949228167533876,774.0,157.0,157.0,981.0,0.9609375,0.614,7.2,0.594,0.616,1.00
4
+ llama2-70b-chat-hf_news_rag,top_k_3,0.9207317073170732,0.9207317073170732,0.9887909293174744,734.0,151.0,151.0,1048.0,0.953125,0.361,4.8,0.120,0.077,0.99
5
+ llama2-70b-chat-hf_news_rag,memfree_6,0.9042904290429044,0.8844884488448846,0.97174334526062,659.0,120.0,134.0,1155.0,0.8203125,0.619,6.6,0.514,0.601,0.99
versions/llama2-70b-chat-hf_news_rag_mean.csv CHANGED
@@ -1,5 +1,5 @@
1
- model_name,method,rouge1,rougeL,semantic_sim,LCS(character),LCS(word),ACS(word),Levenshtein Distance,Minhash Similarity
2
- llama2-70b-chat-hf_news_rag,vanilla,0.6822865714014262,0.6199068209453332,0.840910555485636,383.537,76.68,89.165,461.589,0.5946875
3
- llama2-70b-chat-hf_news_rag,sys_prompt_bing,0.630698567215431,0.5610633998461577,0.7809992222869768,329.287,63.72,78.661,508.58,0.53940625
4
- llama2-70b-chat-hf_news_rag,top_k_3,0.45021398015786224,0.33722548268443875,0.7241460259128362,112.794,21.733,36.267,678.657,0.35540625
5
- llama2-70b-chat-hf_news_rag,memfree_6,0.5686107281633525,0.49311296818266537,0.7844266164638102,83.406,11.907,61.525,553.886,0.46215625
 
1
+ model_name,method,rouge1,rougeL,semantic_sim,LCS(character),LCS(word),ACS(word),Levenshtein Distance,Minhash Similarity,MMLU,MT-Bench,Blocklisted F1,In-Domain F1,Efficiency
2
+ llama2-70b-chat-hf_news_rag,vanilla,0.6822865714014262,0.6199068209453332,0.840910555485636,383.537,76.68,89.165,461.589,0.5946875,0.619,7.1,0.595,0.624,1.00
3
+ llama2-70b-chat-hf_news_rag,sys_prompt_bing,0.630698567215431,0.5610633998461577,0.7809992222869768,329.287,63.72,78.661,508.58,0.53940625,0.614,7.2,0.594,0.616,1.00
4
+ llama2-70b-chat-hf_news_rag,top_k_3,0.45021398015786224,0.33722548268443875,0.7241460259128362,112.794,21.733,36.267,678.657,0.35540625,0.361,4.8,0.120,0.077,0.99
5
+ llama2-70b-chat-hf_news_rag,memfree_6,0.5686107281633525,0.49311296818266537,0.7844266164638102,83.406,11.907,61.525,553.886,0.46215625,0.619,6.6,0.514,0.601,0.99
versions/llama2-7b-chat-hf-newsqa_news_memorization_max.csv CHANGED
@@ -1,4 +1,4 @@
1
- model_name,method,rouge1,rougeL,semantic_sim,LCS(character),LCS(word),ACS(word),Levenshtein Distance,Minhash Similarity
2
  llama2-7b-chat-hf-newsqa_news_mem,vanilla,0.8493975903614458,0.8493975903614458,0.9233227968215942,641.0,138.0,138.0,984.0,0.75
3
  llama2-7b-chat-hf-newsqa_news_mem,sys_prompt_bing,0.8385093167701863,0.8012422360248447,0.9298239946365356,561.0,119.0,124.0,1107.0,0.8125
4
  llama2-7b-chat-hf-newsqa_news_mem,top_k_3,0.4540229885057471,0.2695035460992908,0.8801454901695251,75.0,16.0,25.0,1155.0,0.3671875
 
1
+ model_name,method,rouge1,rougeL,semantic_sim,LCS(character),LCS(word),ACS(word),Levenshtein Distance,Minhash Similarity,MMLU,MT-Bench,Blocklisted F1,In-Domain F1,Efficiency
2
  llama2-7b-chat-hf-newsqa_news_mem,vanilla,0.8493975903614458,0.8493975903614458,0.9233227968215942,641.0,138.0,138.0,984.0,0.75
3
  llama2-7b-chat-hf-newsqa_news_mem,sys_prompt_bing,0.8385093167701863,0.8012422360248447,0.9298239946365356,561.0,119.0,124.0,1107.0,0.8125
4
  llama2-7b-chat-hf-newsqa_news_mem,top_k_3,0.4540229885057471,0.2695035460992908,0.8801454901695251,75.0,16.0,25.0,1155.0,0.3671875
versions/llama2-7b-chat-hf-newsqa_news_memorization_mean.csv CHANGED
@@ -1,4 +1,4 @@
1
- model_name,method,rouge1,rougeL,semantic_sim,LCS(character),LCS(word),ACS(word),Levenshtein Distance,Minhash Similarity
2
  llama2-7b-chat-hf-newsqa_news_mem,vanilla,0.3081531364519125,0.1536234348340788,0.5817865846387756,18.516028955532576,3.4312306101344365,6.510858324715615,789.7135470527404,0.22225633402275077
3
  llama2-7b-chat-hf-newsqa_news_mem,sys_prompt_bing,0.17829751751006317,0.1004113984002821,0.21774254646167307,9.66804550155119,0.8179937952430196,1.3681489141675285,829.5211995863496,0.1533980739400207
4
  llama2-7b-chat-hf-newsqa_news_mem,top_k_3,0.2746915315959413,0.13568918469713084,0.5451532928829275,13.820062047569804,2.0206825232678387,3.542916235780765,805.828335056877,0.2031653955532575
 
1
+ model_name,method,rouge1,rougeL,semantic_sim,LCS(character),LCS(word),ACS(word),Levenshtein Distance,Minhash Similarity,MMLU,MT-Bench,Blocklisted F1,In-Domain F1,Efficiency
2
  llama2-7b-chat-hf-newsqa_news_mem,vanilla,0.3081531364519125,0.1536234348340788,0.5817865846387756,18.516028955532576,3.4312306101344365,6.510858324715615,789.7135470527404,0.22225633402275077
3
  llama2-7b-chat-hf-newsqa_news_mem,sys_prompt_bing,0.17829751751006317,0.1004113984002821,0.21774254646167307,9.66804550155119,0.8179937952430196,1.3681489141675285,829.5211995863496,0.1533980739400207
4
  llama2-7b-chat-hf-newsqa_news_mem,top_k_3,0.2746915315959413,0.13568918469713084,0.5451532928829275,13.820062047569804,2.0206825232678387,3.542916235780765,805.828335056877,0.2031653955532575
versions/llama2-7b-chat-hf_books_rag_max.csv CHANGED
@@ -1,5 +1,5 @@
1
- model_name,method,rouge1,rougeL,semantic_sim,LCS(character),LCS(word),ACS(word),Levenshtein Distance,Minhash Similarity
2
- llama2-7b-chat-hf_books_rag,vanilla,0.9142857142857144,0.9142857142857144,0.95524001121521,702.0,159.0,159.0,1752.0,0.859375
3
- llama2-7b-chat-hf_books_rag,sys_prompt_bing,0.868804664723032,0.8571428571428572,0.9534283876419068,696.0,159.0,159.0,1760.0,0.7421875
4
- llama2-7b-chat-hf_books_rag,top_k_3,0.7251732101616628,0.7113163972286374,0.95726478099823,637.0,141.0,151.0,1640.0,0.6484375
5
- llama2-7b-chat-hf_books_rag,mem_free_6,0.7452229299363057,0.7388535031847132,0.9250864386558532,228.0,67.0,133.0,1770.0,0.640625
 
1
+ model_name,method,rouge1,rougeL,semantic_sim,LCS(character),LCS(word),ACS(word),Levenshtein Distance,Minhash Similarity,MMLU,MT-Bench,Blocklisted rougeL,In-Domain rougeL,Efficiency
2
+ llama2-7b-chat-hf_books_rag,vanilla,0.9142857142857144,0.9142857142857144,0.95524001121521,702.0,159.0,159.0,1752.0,0.859375,0.482,6.3,0.153,0.162,1.00
3
+ llama2-7b-chat-hf_books_rag,sys_prompt_bing,0.868804664723032,0.8571428571428572,0.9534283876419068,696.0,159.0,159.0,1760.0,0.7421875,0.476,5.6,0.146,0.153,1.00
4
+ llama2-7b-chat-hf_books_rag,top_k_3,0.7251732101616628,0.7113163972286374,0.95726478099823,637.0,141.0,151.0,1640.0,0.6484375,0.354,3.8,0.133,0.138,0.98
5
+ llama2-7b-chat-hf_books_rag,mem_free_6,0.7452229299363057,0.7388535031847132,0.9250864386558532,228.0,67.0,133.0,1770.0,0.640625,0.482,6.4,0.147,0.164,0.92
versions/llama2-7b-chat-hf_books_rag_mean.csv CHANGED
@@ -1,5 +1,5 @@
1
- model_name,method,rouge1,rougeL,semantic_sim,LCS(character),LCS(word),ACS(word),Levenshtein Distance,Minhash Similarity
2
- llama2-7b-chat-hf_books_rag,vanilla,0.3793100004123799,0.2940219580994816,0.6501748622655869,125.372,35.716,41.978,1030.08,0.292515625
3
- llama2-7b-chat-hf_books_rag,sys_prompt_bing,0.32059864243453196,0.2259060061500794,0.5116935865692794,85.55,24.6,28.042,1054.776,0.239609375
4
- llama2-7b-chat-hf_books_rag,top_k_3,0.2612840195100785,0.1652209651123422,0.5553371990695596,38.028,8.786,13.962,1114.024,0.197859375
5
- llama2-7b-chat-hf_books_rag,mem_free_6,0.284700458755956,0.19193100960868134,0.5834064832031727,31.064,6.172,20.204,1111.524,0.205546875
 
1
+ model_name,method,rouge1,rougeL,semantic_sim,LCS(character),LCS(word),ACS(word),Levenshtein Distance,Minhash Similarity,MMLU,MT-Bench,Blocklisted rougeL,In-Domain rougeL,Efficiency
2
+ llama2-7b-chat-hf_books_rag,vanilla,0.3793100004123799,0.2940219580994816,0.6501748622655869,125.372,35.716,41.978,1030.08,0.292515625,0.482,6.3,0.153,0.162,1.00
3
+ llama2-7b-chat-hf_books_rag,sys_prompt_bing,0.32059864243453196,0.2259060061500794,0.5116935865692794,85.55,24.6,28.042,1054.776,0.239609375,0.476,5.6,0.146,0.153,1.00
4
+ llama2-7b-chat-hf_books_rag,top_k_3,0.2612840195100785,0.1652209651123422,0.5553371990695596,38.028,8.786,13.962,1114.024,0.197859375,0.354,3.8,0.133,0.138,0.98
5
+ llama2-7b-chat-hf_books_rag,mem_free_6,0.284700458755956,0.19193100960868134,0.5834064832031727,31.064,6.172,20.204,1111.524,0.205546875,0.482,6.4,0.147,0.164,0.92
versions/llama2-7b-chat-hf_news_rag_max.csv CHANGED
@@ -1,5 +1,5 @@
1
- model_name,method,rouge1,rougeL,semantic_sim,LCS(character),LCS(word),ACS(word),Levenshtein Distance,Minhash Similarity
2
- llama2-7b-chat-hf_news_rag,vanilla,0.940119760479042,0.940119760479042,0.9892346858978271,771.0,157.0,157.0,1224.0,0.9453125
3
- llama2-7b-chat-hf_news_rag,sys_prompt_bing,0.9221556886227544,0.9221556886227544,0.9781678915023804,771.0,154.0,154.0,1224.0,0.921875
4
- llama2-7b-chat-hf_news_rag,top_k_3,0.9380530973451328,0.9380530973451328,0.968783974647522,678.0,132.0,154.0,1127.0,0.8828125
5
- llama2-7b-chat-hf_news_rag,memfree_6,0.8444444444444446,0.8380952380952382,0.9544140696525574,589.0,77.0,124.0,1224.0,0.7734375
 
1
+ model_name,method,rouge1,rougeL,semantic_sim,LCS(character),LCS(word),ACS(word),Levenshtein Distance,Minhash Similarity,MMLU,MT-Bench,Blocklisted F1,In-Domain F1,Efficiency
2
+ llama2-7b-chat-hf_news_rag,vanilla,0.940119760479042,0.940119760479042,0.9892346858978271,771.0,157.0,157.0,1224.0,0.9453125,0.482,6.3,0.539,0.558,1.00
3
+ llama2-7b-chat-hf_news_rag,sys_prompt_bing,0.9221556886227544,0.9221556886227544,0.9781678915023804,771.0,154.0,154.0,1224.0,0.921875,0.476,5.6,0.543,0.564,1.00
4
+ llama2-7b-chat-hf_news_rag,top_k_3,0.9380530973451328,0.9380530973451328,0.968783974647522,678.0,132.0,154.0,1127.0,0.8828125,0.354,3.8,0.191,0.102,0.98
5
+ llama2-7b-chat-hf_news_rag,memfree_6,0.8444444444444446,0.8380952380952382,0.9544140696525574,589.0,77.0,124.0,1224.0,0.7734375,0.482,6.3,0.473,0.539,0.92
versions/llama2-7b-chat-hf_news_rag_mean.csv CHANGED
@@ -1,5 +1,5 @@
1
- model_name,method,rouge1,rougeL,semantic_sim,LCS(character),LCS(word),ACS(word),Levenshtein Distance,Minhash Similarity
2
- llama2-7b-chat-hf_news_rag,vanilla,0.4046811922623051,0.25730737423437094,0.6802140055363998,84.022,17.578,25.358,742.236,0.296875
3
- llama2-7b-chat-hf_news_rag,sys_prompt_bing,0.28949306306744516,0.1647010831039419,0.4801265420624986,33.656,6.301,9.852,790.927,0.2166875
4
- llama2-7b-chat-hf_news_rag,top_k_3,0.3298699211147509,0.18475706540907344,0.6467153981402516,32.269,6.191,11.677,792.143,0.2393828125
5
- llama2-7b-chat-hf_news_rag,memfree_6,0.34970841103663103,0.20968100081298785,0.6512703670430928,31.282,4.219,15.744,773.669,0.252
 
1
+ model_name,method,rouge1,rougeL,semantic_sim,LCS(character),LCS(word),ACS(word),Levenshtein Distance,Minhash Similarity,MMLU,MT-Bench,Blocklisted F1,In-Domain F1,Efficiency
2
+ llama2-7b-chat-hf_news_rag,vanilla,0.4046811922623051,0.25730737423437094,0.6802140055363998,84.022,17.578,25.358,742.236,0.296875,0.482,6.3,0.539,0.558,1.00
3
+ llama2-7b-chat-hf_news_rag,sys_prompt_bing,0.28949306306744516,0.1647010831039419,0.4801265420624986,33.656,6.301,9.852,790.927,0.2166875,0.476,5.6,0.543,0.564,1.00
4
+ llama2-7b-chat-hf_news_rag,top_k_3,0.3298699211147509,0.18475706540907344,0.6467153981402516,32.269,6.191,11.677,792.143,0.2393828125,0.354,3.8,0.191,0.102,0.98
5
+ llama2-7b-chat-hf_news_rag,memfree_6,0.34970841103663103,0.20968100081298785,0.6512703670430928,31.282,4.219,15.744,773.669,0.252,0.482,6.3,0.473,0.539,0.92