leaderboard / plots /clustermap_instr.json
pminervini's picture
update
bd18620
{"columns":["TheBloke\/Llama-2-13B-chat-GPTQ","TheBloke\/Llama-2-7B-Chat-GPTQ","TheBloke\/Wizard-Vicuna-13B-Uncensored-GPTQ","bigscience\/bloom-7b1","bigscience\/bloom-560m","berkeley-nest\/Starling-LM-7B-alpha","EleutherAI\/gpt-neo-125m","EleutherAI\/gpt-neo-2.7B","EleutherAI\/gpt-j-6b","EleutherAI\/gpt-neo-1.3B","Gryphe\/MythoMax-L2-13b","Open-Orca\/Mistral-7B-OpenOrca","pankajmathur\/orca_mini_3b","KoboldAI\/OPT-13B-Erebus","ehartford\/dolphin-2.1-mistral-7b","togethercomputer\/LLaMA-2-7B-32K","togethercomputer\/GPT-JT-6B-v1","togethercomputer\/Llama-2-7B-32K-Instruct","HuggingFaceH4\/zephyr-7b-alpha","HuggingFaceH4\/zephyr-7b-beta","tiiuae\/falcon-7b-instruct","tiiuae\/falcon-7b","ai-forever\/mGPT","DiscoResearch\/mixtral-7b-8expert","meta-llama\/Llama-2-7b-chat-hf","meta-llama\/Llama-2-7b-hf","meta-llama\/Llama-2-13b-chat-hf","meta-llama\/Llama-2-13b-hf"],"index":["MemoTrap, Accuracy","IFEval, Prompt-Level Accuracy"],"data":[[0.641025641,0.7179487179,0.5886752137,0.6826923077,0.860042735,0.6314102564,0.8344017094,0.7756410256,0.7574786325,0.7980769231,0.6463675214,0.6346153846,0.594017094,0.7532051282,0.5897435897,0.561965812,0.6645299145,0.5758547009,0.5352564103,0.5854700855,0.858974359,0.7126068376,0.8643162393,0.5737179487,0.733974359,0.6143162393,0.6079059829,0.5758547009],[0.2735674677,0.2606284658,0.1626617375,0.0868761553,0.0850277264,0.2310536044,0.1293900185,0.1885397412,0.179297597,0.1423290203,0.2402957486,0.033271719,0.0628465804,null,0.2735674677,0.1497227357,0.146025878,0.1700554529,0.0609981516,0.0924214418,0.1534195933,0.1164510166,0.1940850277,0.1423290203,0.2865064695,0.1866913124,0.314232902,0.1829944547]]}