CoTaEval_leaderboard / versions /llama2-7b-chat-hf-newsqa_news_memorization_mean.csv
boyiwei's picture
fix
4114ce0
raw
history blame
2.17 kB
model_name,method,rouge1,rougeL,semantic_sim,LCS(character),LCS(word),ACS(word),Levenshtein Distance,Minhash Similarity,MMLU,MT-Bench,Blocklisted F1,In-Domain F1,Efficiency
llama2-7b-chat-hf-newsqa_news_mem,vanilla,0.3081531364519125,0.1536234348340788,0.5817865846387756,18.516028955532576,3.4312306101344365,6.510858324715615,789.7135470527404,0.22225633402275077,0.353,4.7,0.405,0.406,1.00
llama2-7b-chat-hf-newsqa_news_mem,sys_prompt_bing,0.17829751751006317,0.1004113984002821,0.21774254646167307,9.66804550155119,0.8179937952430196,1.3681489141675285,829.5211995863496,0.1533980739400207,0.340,3.3,0.334,0.330,1.00
llama2-7b-chat-hf-newsqa_news_mem,top_k_3,0.2746915315959413,0.13568918469713084,0.5451532928829275,13.820062047569804,2.0206825232678387,3.542916235780765,805.828335056877,0.2031653955532575,0.147,3.0,0.033,0.018,0.99
llama2-7b-chat-hf-newsqa_news_mem,memfree_6,0.29185832451701593,0.1442135600038948,0.5625074913719478,14.24095139607032,2.0692864529472597,4.100310237849017,794.6432264736297,0.2094509436401241,0.353,4.7,0.362,0.379,0.94
llama2-7b-chat-hf-newsqa_news_mem,r_cad_3,0.15491637189908622,0.09812783310896342,0.43077290016240916,15.243019648397105,2.140641158221303,2.468459152016546,798.1437435367114,0.17288488883143743,0.353,4.7,0.405,0.406,0.53
llama2-7b-chat-hf-newsqa_news_mem,grad_ascent,0.26476190082689827,0.13486983772036568,0.4920891164460113,13.102378490175802,1.9803516028955532,3.658738366080662,796.9327817993795,0.19352701654601862,0.279,3.3,0.269,0.258,1.00
llama2-7b-chat-hf-newsqa_news_mem,grad_diff,0.10566005416804924,0.06883750780260099,0.30027836965857607,8.897621509824198,0.6732161323681489,0.8169596690796277,851.0992761116856,0.08836123319544985,0.158,1.5,0.169,0.162,1.00
llama2-7b-chat-hf-newsqa_news_mem,KL,0.12852527461647723,0.08092121519474287,0.2828496025687465,8.501551189245088,0.6845915201654602,0.8872802481902792,843.0103412616339,0.09795921664943123,0.176,1.5,0.169,0.159,1.00
llama2-7b-chat-hf-newsqa_news_mem,idk,0.11152056332044802,0.06593938963617683,0.195940825350572,8.377456049638056,0.9669079627714581,1.9886246122026887,970.3960703205792,0.07705047828335057,0.331,2.4,0.283,0.247,1.00