leaderboard / plots /clustermap_detect.json
pminervini's picture
update
e137e83
{"columns":["TheBloke\/Llama-2-13B-chat-GPTQ","TheBloke\/Llama-2-7B-Chat-GPTQ","TheBloke\/Wizard-Vicuna-13B-Uncensored-GPTQ","bigscience\/bloom-7b1","bigscience\/bloom-560m","berkeley-nest\/Starling-LM-7B-alpha","EleutherAI\/gpt-neo-125m","EleutherAI\/gpt-neo-2.7B","EleutherAI\/gpt-j-6b","EleutherAI\/gpt-neo-1.3B","Gryphe\/MythoMax-L2-13b","Open-Orca\/Mistral-7B-OpenOrca","pankajmathur\/orca_mini_3b","KoboldAI\/OPT-13B-Erebus","ehartford\/dolphin-2.1-mistral-7b","togethercomputer\/LLaMA-2-7B-32K","togethercomputer\/GPT-JT-6B-v1","togethercomputer\/Llama-2-7B-32K-Instruct","HuggingFaceH4\/zephyr-7b-alpha","HuggingFaceH4\/zephyr-7b-beta","tiiuae\/falcon-7b-instruct","tiiuae\/falcon-7b","ai-forever\/mGPT","DiscoResearch\/mixtral-7b-8expert","meta-llama\/Llama-2-7b-chat-hf","meta-llama\/Llama-2-7b-hf","meta-llama\/Llama-2-13b-chat-hf","meta-llama\/Llama-2-13b-hf"],"index":["HaluEval Dialog, EM","HaluEval Dialog, Accuracy","HaluEval Summarization, EM","HaluEval Summarization, Accuracy","HaluEval QA, EM","HaluEval QA, Accuracy","SelfCheckGPT, AVG","SelfCheckGPT, MAX"],"data":[[0.5928,0.6085,0.6471647165,0.463891675,0.5023620464,0.6674,0.4723306314,0.4795979899,0.4987491244,0.4870581126,0.7173,0.7699,0.4695408623,0.4019913507,0.7963,0.5478,0.497949795,0.6043,0.7917,0.7634,0.3878,0.4204261278,1.0,0.5007032349,0.6425,0.4997,0.6548,0.7393],[0.5928,0.6085,0.6471,0.4625,0.4998,0.6674,0.472,0.4772,0.4984,0.4836,0.7173,0.7699,0.4694,0.3997,0.7963,0.5478,0.4979,0.6043,0.7917,0.7634,0.3878,0.4203,0.0001,0.0712,0.6425,0.4997,0.6548,0.7393],[0.4645464546,0.4197617379,0.5014695908,0.4981794817,0.4977535302,0.5459,0.4976993044,0.4992513369,0.4983417139,0.476684492,0.476,0.5147,0.5153475115,0.4855491329,0.448,0.4904,0.5590155163,0.4696,0.5268,0.5238,0.4877562327,0.4893500819,null,0.5198119543,0.4906,0.4279,0.4772477248,null],[0.4645,0.4193,0.4436,0.4652,0.4653,0.5459,0.4651,0.4668,0.4658,0.4457,0.476,0.5147,0.4701,0.4536,0.448,0.4904,0.5224,0.4696,0.5268,0.5238,0.4402,0.448,0.0,0.0774,0.4906,0.4279,0.4772,null],[0.5454545455,0.4521713028,0.5207562269,0.5864054136,0.5019596021,0.5969,0.4655327664,0.4445799045,0.5139,0.4800705834,0.3992,0.4502,0.4446,0.3532004407,0.4208,0.4555455546,0.5093,0.3233,0.6235,0.5197994987,0.2968,0.4672,0.5253588517,0.4947589099,0.5305273834,0.4566,0.5729718916,0.6879],[0.5454,0.4519,0.5206,0.5806,0.4995,0.5969,0.4653,0.4376,0.5139,0.4625,0.3992,0.4502,0.4446,0.3526,0.4208,0.4555,0.5093,0.3233,0.6235,0.5185,0.2968,0.4672,0.0549,0.0708,0.5231,0.4566,0.5728,0.6879],[0.0900088111,0.0378151261,0.1105999164,0.012605042,0.063491636,0.0504201681,0.0865653082,0.0168067227,0.0042016807,0.0885709807,0.0782453898,0.987394958,0.0336183781,0.0797089047,0.0358374659,0.3529411765,0.012605042,0.1020439002,0.0210084034,0.0758733581,0.0291536913,0.0210084034,0.0084033613,0.6692579906,0.0413645344,0.1043527865,0.0,null],[0.2447584892,0.281512605,0.2558918005,0.3277310924,0.2040760759,0.2941176471,0.2339250687,0.2394957983,0.2058823529,0.2829557667,0.2295219446,0.987394958,0.1199278912,0.3158274114,0.1444317716,0.4453781513,0.2352941176,0.2793721332,0.1848739496,0.2734539158,0.076986247,0.1974789916,0.1554621849,0.8670519793,0.151334934,0.2598217688,0.0588235294,null]]}