<link rel="stylesheet" href="/front/build/kube-9ffb4fa/style.css" />
<link rel="preconnect" href="https://fonts.gstatic.com" />
<link
href="https://fonts.googleapis.com/css2?family=Source+Sans+Pro:ital,wght@0,200;0,300;0,400;0,600;0,700;0,900;1,200;1,300;1,400;1,600;1,700;1,900&display=swap"
rel="stylesheet"
/>
<link
href="https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:wght@400;600;700&display=swap"
rel="stylesheet"
/>
<link
rel="preload"
href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.12.0/katex.min.css"
as="style"
onload="this.onload=null;this.rel='stylesheet'"
/>
<noscript>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.12.0/katex.min.css" />
</noscript>
<link rel="canonical" href="https://huggingface.co/prometheus-eval/prometheus-8x7b-v2.0/blob/main/README.md">
<title>README.md · prometheus-eval/prometheus-8x7b-v2.0 at main</title>
<script
defer
data-domain="huggingface.co"
event-loggedIn="false"
src="/js/script.pageview-props.js"
></script>
<script>
window.plausible =
window.plausible ||
function () {
(window.plausible.q = window.plausible.q || []).push(arguments);
};
</script>
<script type="text/javascript" src="https://de5282c3ca0c.edge.sdk.awswaf.com/de5282c3ca0c/526cf06acb0d/challenge.js" defer></script>
</head>
<body class="flex flex-col min-h-screen bg-white dark:bg-gray-950 text-black ViewerBlobPage">
<div class="flex min-h-screen flex-col">
<div class="SVELTE_HYDRATER contents" data-target="MainHeader" data-props="{"classNames":"","isWide":false,"isZh":false}"><header class="border-b border-gray-100 "><div class="w-full px-4 container flex h-16 items-center"><div class="flex flex-1 items-center"><a class="mr-5 flex flex-none items-center lg:mr-6" href="/"><img alt="Hugging Face's logo" class="w-7 md:mr-2" src="/front/assets/huggingface_logo-noborder.svg">
<span class="hidden whitespace-nowrap text-lg font-bold md:block">Hugging Face</span></a>
<div class="relative flex-1 lg:max-w-sm mr-2 sm:mr-4 md:mr-3 xl:mr-6"><input autocomplete="off" class="w-full dark:bg-gray-950 pl-8 form-input-alt h-9 pr-3 focus:shadow-xl " name="" placeholder="Search models, datasets, users..." spellcheck="false" type="text" value="">
<svg class="absolute left-2.5 text-gray-400 top-1/2 transform -translate-y-1/2" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M30 28.59L22.45 21A11 11 0 1 0 21 22.45L28.59 30zM5 14a9 9 0 1 1 9 9a9 9 0 0 1-9-9z" fill="currentColor"></path></svg>
</div>
<div class="flex flex-none items-center justify-center p-0.5 place-self-stretch lg:hidden"><button class="relative z-40 flex h-6 w-8 items-center justify-center" type="button"><svg width="1em" height="1em" viewBox="0 0 10 10" class="text-xl" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" preserveAspectRatio="xMidYMid meet" fill="currentColor"><path fill-rule="evenodd" clip-rule="evenodd" d="M1.65039 2.9999C1.65039 2.8066 1.80709 2.6499 2.00039 2.6499H8.00039C8.19369 2.6499 8.35039 2.8066 8.35039 2.9999C8.35039 3.1932 8.19369 3.3499 8.00039 3.3499H2.00039C1.80709 3.3499 1.65039 3.1932 1.65039 2.9999ZM1.65039 4.9999C1.65039 4.8066 1.80709 4.6499 2.00039 4.6499H8.00039C8.19369 4.6499 8.35039 4.8066 8.35039 4.9999C8.35039 5.1932 8.19369 5.3499 8.00039 5.3499H2.00039C1.80709 5.3499 1.65039 5.1932 1.65039 4.9999ZM2.00039 6.6499C1.80709 6.6499 1.65039 6.8066 1.65039 6.9999C1.65039 7.1932 1.80709 7.3499 2.00039 7.3499H8.00039C8.19369 7.3499 8.35039 7.1932 8.35039 6.9999C8.35039 6.8066 8.19369 6.6499 8.00039 6.6499H2.00039Z"></path></svg>
</button>
</div></div>
<nav aria-label="Main" class="ml-auto hidden lg:block"><ul class="flex items-center space-x-1.5 xl:space-x-2"><li><a class="group flex items-center px-2 py-0.5 dark:hover:text-gray-400 hover:text-indigo-700" href="/models"><svg class="mr-1.5 text-gray-400 group-hover:text-indigo-500" style="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 24 24"><path class="uim-quaternary" d="M20.23 7.24L12 12L3.77 7.24a1.98 1.98 0 0 1 .7-.71L11 2.76c.62-.35 1.38-.35 2 0l6.53 3.77c.29.173.531.418.7.71z" opacity=".25" fill="currentColor"></path><path class="uim-tertiary" d="M12 12v9.5a2.09 2.09 0 0 1-.91-.21L4.5 17.48a2.003 2.003 0 0 1-1-1.73v-7.5a2.06 2.06 0 0 1 .27-1.01L12 12z" opacity=".5" fill="currentColor"></path><path class="uim-primary" d="M20.5 8.25v7.5a2.003 2.003 0 0 1-1 1.73l-6.62 3.82c-.275.13-.576.198-.88.2V12l8.23-4.76c.175.308.268.656.27 1.01z" fill="currentColor"></path></svg>
Models</a>
</li><li><a class="group flex items-center px-2 py-0.5 dark:hover:text-gray-400 hover:text-red-700" href="/datasets"><svg class="mr-1.5 text-gray-400 group-hover:text-red-500" style="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 25 25"><ellipse cx="12.5" cy="5" fill="currentColor" fill-opacity="0.25" rx="7.5" ry="2"></ellipse><path d="M12.5 15C16.6421 15 20 14.1046 20 13V20C20 21.1046 16.6421 22 12.5 22C8.35786 22 5 21.1046 5 20V13C5 14.1046 8.35786 15 12.5 15Z" fill="currentColor" opacity="0.5"></path><path d="M12.5 7C16.6421 7 20 6.10457 20 5V11.5C20 12.6046 16.6421 13.5 12.5 13.5C8.35786 13.5 5 12.6046 5 11.5V5C5 6.10457 8.35786 7 12.5 7Z" fill="currentColor" opacity="0.5"></path><path d="M5.23628 12C5.08204 12.1598 5 12.8273 5 13C5 14.1046 8.35786 15 12.5 15C16.6421 15 20 14.1046 20 13C20 12.8273 19.918 12.1598 19.7637 12C18.9311 12.8626 15.9947 13.5 12.5 13.5C9.0053 13.5 6.06886 12.8626 5.23628 12Z" fill="currentColor"></path></svg>
Datasets</a>
</li><li><a class="group flex items-center px-2 py-0.5 dark:hover:text-gray-400 hover:text-blue-700" href="/spaces"><svg class="mr-1.5 text-gray-400 group-hover:text-blue-500" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" viewBox="0 0 25 25"><path opacity=".5" d="M6.016 14.674v4.31h4.31v-4.31h-4.31ZM14.674 14.674v4.31h4.31v-4.31h-4.31ZM6.016 6.016v4.31h4.31v-4.31h-4.31Z" fill="currentColor"></path><path opacity=".75" fill-rule="evenodd" clip-rule="evenodd" d="M3 4.914C3 3.857 3.857 3 4.914 3h6.514c.884 0 1.628.6 1.848 1.414a5.171 5.171 0 0 1 7.31 7.31c.815.22 1.414.964 1.414 1.848v6.514A1.914 1.914 0 0 1 20.086 22H4.914A1.914 1.914 0 0 1 3 20.086V4.914Zm3.016 1.102v4.31h4.31v-4.31h-4.31Zm0 12.968v-4.31h4.31v4.31h-4.31Zm8.658 0v-4.31h4.31v4.31h-4.31Zm0-10.813a2.155 2.155 0 1 1 4.31 0 2.155 2.155 0 0 1-4.31 0Z" fill="currentColor"></path><path opacity=".25" d="M16.829 6.016a2.155 2.155 0 1 0 0 4.31 2.155 2.155 0 0 0 0-4.31Z" fill="currentColor"></path></svg>
Spaces</a>
</li><li><a class="group flex items-center px-2 py-0.5 dark:hover:text-gray-400 hover:text-yellow-700" href="/posts"><svg class="mr-1.5 text-gray-400 group-hover:text-yellow-500 !text-yellow-500" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" viewBox="0 0 12 12" preserveAspectRatio="xMidYMid meet"><path fill="currentColor" fill-rule="evenodd" d="M3.73 2.4A4.25 4.25 0 1 1 6 10.26H2.17l-.13-.02a.43.43 0 0 1-.3-.43l.01-.06a.43.43 0 0 1 .12-.22l.84-.84A4.26 4.26 0 0 1 3.73 2.4Z" clip-rule="evenodd"></path></svg>
Posts</a>
</li><li><a class="group flex items-center px-2 py-0.5 dark:hover:text-gray-400 hover:text-yellow-700" href="/docs"><svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" class="mr-1.5 text-gray-400 group-hover:text-yellow-500" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path opacity="0.5" d="M20.9022 5.10334L10.8012 10.8791L7.76318 9.11193C8.07741 8.56791 8.5256 8.11332 9.06512 7.7914L15.9336 3.73907C17.0868 3.08811 18.5002 3.26422 19.6534 3.91519L19.3859 3.73911C19.9253 4.06087 20.5879 4.56025 20.9022 5.10334Z" fill="currentColor"></path><path d="M10.7999 10.8792V28.5483C10.2136 28.5475 9.63494 28.4139 9.10745 28.1578C8.5429 27.8312 8.074 27.3621 7.74761 26.7975C7.42122 26.2327 7.24878 25.5923 7.24756 24.9402V10.9908C7.25062 10.3319 7.42358 9.68487 7.74973 9.1123L10.7999 10.8792Z" fill="currentColor" fill-opacity="0.75"></path><path fill-rule="evenodd" clip-rule="evenodd" d="M21.3368 10.8499V6.918C21.3331 6.25959 21.16 5.61234 20.8346 5.03949L10.7971 10.8727L10.8046 10.874L21.3368 10.8499Z" fill="currentColor"></path><path opacity="0.5" d="M21.7937 10.8488L10.7825 10.8741V28.5486L21.7937 28.5234C23.3344 28.5234 24.5835 27.2743 24.5835 25.7335V13.6387C24.5835 12.0979 23.4365 11.1233 21.7937 10.8488Z" fill="currentColor"></path></svg>
Docs</a>
</li>
<li class="max-2xl:hidden"><div class="relative ">
<button class="px-2 py-0.5 group hover:text-green-700 dark:hover:text-gray-400 flex items-center " type="button">
<svg class="mr-1.5 text-gray-400 group-hover:text-green-500" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 24 24"><path class="uim-tertiary" d="M19 6H5a3 3 0 0 0-3 3v2.72L8.837 14h6.326L22 11.72V9a3 3 0 0 0-3-3z" opacity=".5" fill="currentColor"></path><path class="uim-primary" d="M10 6V5h4v1h2V5a2.002 2.002 0 0 0-2-2h-4a2.002 2.002 0 0 0-2 2v1h2zm-1.163 8L2 11.72V18a3.003 3.003 0 0 0 3 3h14a3.003 3.003 0 0 0 3-3v-6.28L15.163 14H8.837z" fill="currentColor"></path></svg>
Solutions
</button>
</div></li>
<li><a class="group flex items-center px-2 py-0.5 hover:text-gray-500 dark:hover:text-gray-400" href="/pricing">Pricing
</a></li>
<li><div class="relative group">
<button class="px-2 py-0.5 hover:text-gray-500 dark:hover:text-gray-600 flex items-center " type="button">
<svg class="mr-1.5 text-gray-500 w-5 group-hover:text-gray-400 dark:text-gray-300 dark:group-hover:text-gray-400" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" viewBox="0 0 32 18" preserveAspectRatio="xMidYMid meet"><path fill-rule="evenodd" clip-rule="evenodd" d="M14.4504 3.30221C14.4504 2.836 14.8284 2.45807 15.2946 2.45807H28.4933C28.9595 2.45807 29.3374 2.836 29.3374 3.30221C29.3374 3.76842 28.9595 4.14635 28.4933 4.14635H15.2946C14.8284 4.14635 14.4504 3.76842 14.4504 3.30221Z" fill="currentColor"></path><path fill-rule="evenodd" clip-rule="evenodd" d="M14.4504 9.00002C14.4504 8.53382 14.8284 8.15588 15.2946 8.15588H28.4933C28.9595 8.15588 29.3374 8.53382 29.3374 9.00002C29.3374 9.46623 28.9595 9.84417 28.4933 9.84417H15.2946C14.8284 9.84417 14.4504 9.46623 14.4504 9.00002Z" fill="currentColor"></path><path fill-rule="evenodd" clip-rule="evenodd" d="M14.4504 14.6978C14.4504 14.2316 14.8284 13.8537 15.2946 13.8537H28.4933C28.9595 13.8537 29.3374 14.2316 29.3374 14.6978C29.3374 15.164 28.9595 15.542 28.4933 15.542H15.2946C14.8284 15.542 14.4504 15.164 14.4504 14.6978Z" fill="currentColor"></path><path fill-rule="evenodd" clip-rule="evenodd" d="M1.94549 6.87377C2.27514 6.54411 2.80962 6.54411 3.13928 6.87377L6.23458 9.96907L9.32988 6.87377C9.65954 6.54411 10.194 6.54411 10.5237 6.87377C10.8533 7.20343 10.8533 7.73791 10.5237 8.06756L6.23458 12.3567L1.94549 8.06756C1.61583 7.73791 1.61583 7.20343 1.94549 6.87377Z" fill="currentColor"></path></svg>
</button>
</div></li>
<li><hr class="h-5 w-0.5 border-none bg-gray-100 dark:bg-gray-800"></li>
<li><a class="block cursor-pointer px-2 py-0.5 hover:text-gray-500 dark:hover:text-gray-400" href="/login">Log In
</a></li>
<li><a class="rounded-full border border-transparent bg-gray-900 px-3 py-1 leading-none text-white hover:border-black hover:bg-white hover:text-black" href="/join">Sign Up
</a></li></ul></nav></div></header></div>
<div class="SVELTE_HYDRATER contents" data-target="SSOBanner" data-props="{}"></div>
<main class="flex flex-1 flex-col"><div class="SVELTE_HYDRATER contents" data-target="ModelHeader" data-props="{"activeTab":"files","author":{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/6469949654873f0043b09c22/ooULtecFGFMPRkUxevBty.jpeg","fullname":"prometheus-eval","name":"prometheus-eval","type":"org","isHf":false,"isEnterprise":false},"canReadRepoSettings":false,"canWriteRepoContent":false,"canDisable":false,"model":{"author":"prometheus-eval","cardData":{"tags":["text2text-generation"],"datasets":["prometheus-eval/Feedback-Collection","prometheus-eval/Preference-Collection"],"license":"apache-2.0","language":["en"],"pipeline_tag":"text2text-generation","library_name":"transformers","metrics":["pearsonr","spearmanr","kendall-tau","accuracy"]},"cardExists":true,"config":{"architectures":["MixtralForCausalLM"],"model_type":"mixtral","tokenizer_config":{"bos_token":"<s>","eos_token":"</s>","pad_token":null,"unk_token":"<unk>","use_default_system_prompt":false,"chat_template":"{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}"}},"createdAt":"2024-02-20T16:10:25.000Z","discussionsDisabled":false,"downloads":249,"downloadsAllTime":416,"id":"prometheus-eval/prometheus-8x7b-v2.0","isLikedByUser":false,"isWatchedByUser":false,"inference":"Yes","lastModified":"2024-05-03T11:10:05.000Z","likes":11,"pipeline_tag":"text2text-generation","library_name":"transformers","librariesOther":[],"model-index":null,"private":false,"repoType":"model","gated":false,"pwcLink":{"error":"Unknown error, can't generate link to Papers With Code."},"tags":["transformers","safetensors","mixtral","text-generation","text2text-generation","en","dataset:prometheus-eval/Feedback-Collection","dataset:prometheus-eval/Preference-Collection","arxiv:2405.01535","arxiv:2310.08491","license:apache-2.0","autotrain_compatible","endpoints_compatible","text-generation-inference","region:us"],"tag_objs":[{"id":"text2text-generation","label":"Text2Text Generation","type":"pipeline_tag","subType":"nlp"},{"id":"transformers","label":"Transformers","type":"library"},{"id":"safetensors","label":"Safetensors","type":"library"},{"id":"dataset:prometheus-eval/Feedback-Collection","label":"prometheus-eval/Feedback-Collection","type":"dataset","extra":{"disabled":false}},{"id":"dataset:prometheus-eval/Preference-Collection","label":"prometheus-eval/Preference-Collection","type":"dataset","extra":{"disabled":false}},{"id":"en","label":"English","type":"language"},{"id":"mixtral","label":"mixtral","type":"other"},{"id":"text-generation","label":"text-generation","type":"other"},{"id":"autotrain_compatible","label":"AutoTrain Compatible","type":"other"},{"id":"endpoints_compatible","label":"Inference Endpoints","type":"other"},{"id":"text-generation-inference","label":"text-generation-inference","type":"other"},{"id":"arxiv:2405.01535","label":"arxiv:2405.01535","type":"arxiv","extra":{"paperTitle":"Prometheus 2: An Open Source Language Model Specialized in Evaluating\n Other Language Models"}},{"id":"arxiv:2310.08491","label":"arxiv:2310.08491","type":"arxiv","extra":{"paperTitle":"Prometheus: Inducing Fine-grained Evaluation Capability in Language\n Models"}},{"id":"license:apache-2.0","label":"apache-2.0","type":"license"},{"type":"region","label":"🇺🇸 Region: US","id":"region:us"}],"transformersInfo":{"auto_model":"AutoModelForCausalLM","pipeline_tag":"text-generation","processor":"AutoTokenizer"},"safetensors":{"parameters":{"BF16":46702792704},"total":46702792704,"sharded":true}},"discussionsStats":{"closed":0,"open":0,"total":0}}"><header class="from-gray-50-to-white border-b border-gray-100 bg-gradient-to-t via-white dark:via-gray-950 pt-6 sm:pt-9"><div class="container relative "><h1 class="flex flex-wrap items-center leading-tight mb-3 text-lg max-sm:gap-y-1.5 md:text-xl">
<div class="group flex flex-none items-center"><div class="relative mr-1.5 flex items-center">
<img alt="" class="w-3.5 h-3.5 rounded flex-none" src="https://cdn-avatars.huggingface.co/v1/production/uploads/6469949654873f0043b09c22/ooULtecFGFMPRkUxevBty.jpeg" crossorigin="anonymous"></div>
<a href="/prometheus-eval" class="text-gray-400 hover:text-blue-600">prometheus-eval</a>
<div class="mx-0.5 text-gray-300">/</div></div>
</div>
</div></div>
<div class="relative mb-2 flex flex-wrap items-center"><a class="truncate text-gray-800 hover:underline" href="/prometheus-eval/prometheus-8x7b-v2.0/tree/main">prometheus-8x7b-v2.0</a>
<span class="mx-1 text-gray-300">/</span>
<span class="dark:text-gray-300">README.md</span>
<div class="SVELTE_HYDRATER contents" data-target="CopyButton" data-props="{"value":"README.md","classNames":"text-xs ml-2","title":"Copy path"}"><button class="relative text-xs ml-2 inline-flex cursor-pointer items-center text-sm focus:outline-none mx-0.5 text-gray-600 " title="Copy path" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg>
</button></div></div></div>
</header>
<div class="SVELTE_HYDRATER contents" data-target="LastCommit" data-props="{"commitLast":{"date":"2024-05-03T11:09:55.000Z","verified":"verified","subject":"Update README.md","authors":[{"_id":"6469949654873f0043b09c22","avatar":"https://cdn-avatars.huggingface.co/v1/production/uploads/6469949654873f0043b09c22/Lk7IJAR16Wa_sGJ2g81AQ.jpeg","isHf":false,"user":"seungone"}],"commit":{"id":"e0bb4692356a1738acf25f15180e9f025725b0f2","parentIds":["283c80e2fa829dc655397c244bc1ef6db977a4dd"]},"title":"Update README.md"},"repo":{"name":"prometheus-eval/prometheus-8x7b-v2.0","type":"model"}}"><div class="from-gray-100-to-white flex items-baseline rounded-t-lg border border-b-0 bg-gradient-to-t px-3 py-2 dark:border-gray-800"><img class="mr-2.5 mt-0.5 h-4 w-4 self-center rounded-full" alt="seungone's picture" src="https://cdn-avatars.huggingface.co/v1/production/uploads/6469949654873f0043b09c22/Lk7IJAR16Wa_sGJ2g81AQ.jpeg">
<div class="mr-5 flex flex-none items-center truncate"><a class="hover:underline" href="/seungone">seungone
</a>
</div>
<div class="mr-4 truncate font-mono text-sm text-gray-500 hover:prose-a:underline"><!-- HTML_TAG_START -->Update README.md<!-- HTML_TAG_END --></div>
<a class="rounded border bg-gray-50 px-1.5 text-sm hover:underline dark:border-gray-800 dark:bg-gray-900" href="/prometheus-eval/prometheus-8x7b-v2.0/commit/e0bb4692356a1738acf25f15180e9f025725b0f2">e0bb469</a>
<span class="mx-2 text-green-500 dark:text-green-600 px-1.5 border-green-100 dark:border-green-800 rounded-full border text-xs uppercase" title="This commit is signed and the signature is verified">verified</span>
<time class="ml-auto hidden flex-none truncate pl-2 text-gray-500 dark:text-gray-400 lg:block" datetime="2024-05-03T11:09:55" title="Fri, 03 May 2024 11:09:55 GMT">4 days ago</time></div></div>
<div class="flex flex-wrap items-center border px-3 py-1.5 text-sm text-gray-800 dark:border-gray-800 dark:bg-gray-900"><div class="flex items-center gap-3 text-sm font-medium"><a class="rounded-md px-1.5 capitalize bg-gray-200 dark:bg-gray-800" href="/prometheus-eval/prometheus-8x7b-v2.0/blob/main/README.md">preview</a>
<a class="rounded-md px-1.5 capitalize " href="/prometheus-eval/prometheus-8x7b-v2.0/blob/main/README.md?code=true">code</a></div>
<div class="mx-4 text-gray-200">|</div>
<a class="my-1 mr-4 flex items-center hover:underline " href="/prometheus-eval/prometheus-8x7b-v2.0/raw/main/README.md"><svg class="mr-1.5" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32" style="transform: rotate(360deg);"><path d="M31 16l-7 7l-1.41-1.41L28.17 16l-5.58-5.59L24 9l7 7z" fill="currentColor"></path><path d="M1 16l7-7l1.41 1.41L3.83 16l5.58 5.59L8 23l-7-7z" fill="currentColor"></path><path d="M12.419 25.484L17.639 6l1.932.518L14.35 26z" fill="currentColor"></path></svg>
raw
</a><a class="my-1 mr-4 flex items-center hover:underline " href="/prometheus-eval/prometheus-8x7b-v2.0/commits/main/README.md"><svg class="mr-1.5" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32" style="transform: rotate(360deg);"><path d="M16 4C9.383 4 4 9.383 4 16s5.383 12 12 12s12-5.383 12-12S22.617 4 16 4zm0 2c5.535 0 10 4.465 10 10s-4.465 10-10 10S6 21.535 6 16S10.465 6 16 6zm-1 2v9h7v-2h-5V8z" fill="currentColor"></path></svg>
history
</a><a class="my-1 mr-4 flex items-center hover:underline " href="/prometheus-eval/prometheus-8x7b-v2.0/blame/main/README.md"><svg class="mr-1.5" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32" style="transform: rotate(360deg);"><path d="M16 2a14 14 0 1 0 14 14A14 14 0 0 0 16 2zm0 26a12 12 0 1 1 12-12a12 12 0 0 1-12 12z" fill="currentColor"></path><path d="M11.5 11a2.5 2.5 0 1 0 2.5 2.5a2.48 2.48 0 0 0-2.5-2.5z" fill="currentColor"></path><path d="M20.5 11a2.5 2.5 0 1 0 2.5 2.5a2.48 2.48 0 0 0-2.5-2.5z" fill="currentColor"></path></svg>
blame
</a><a class="my-1 mr-4 flex items-center hover:underline text-green-600 dark:text-gray-300" href="/prometheus-eval/prometheus-8x7b-v2.0/edit/main/README.md"><svg class="mr-1.5" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M2 26h28v2H2z" fill="currentColor"></path><path d="M25.4 9c.8-.8.8-2 0-2.8l-3.6-3.6c-.8-.8-2-.8-2.8 0l-15 15V24h6.4l15-15zm-5-5L24 7.6l-3 3L17.4 7l3-3zM6 22v-3.6l10-10l3.6 3.6l-10 10H6z" fill="currentColor"></path></svg>
contribute
</a><a class="my-1 mr-4 flex items-center hover:underline " href="/prometheus-eval/prometheus-8x7b-v2.0/delete/main/README.md"><svg class="mr-1.5" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M12 12h2v12h-2z" fill="currentColor"></path><path d="M18 12h2v12h-2z" fill="currentColor"></path><path d="M4 6v2h2v20a2 2 0 0 0 2 2h16a2 2 0 0 0 2-2V8h2V6zm4 22V8h16v20z" fill="currentColor"></path><path d="M12 2h8v2h-8z" fill="currentColor"></path></svg>
delete
</a>
<div class="mr-4 flex items-center text-gray-400"><svg class="text-gray-300 text-sm mr-1.5 -translate-y-px" width="1em" height="1em" viewBox="0 0 22 28" fill="none" xmlns="http://www.w3.org/2000/svg"><path fill-rule="evenodd" clip-rule="evenodd" d="M15.3634 10.3639C15.8486 10.8491 15.8486 11.6357 15.3634 12.1209L10.9292 16.5551C10.6058 16.8785 10.0814 16.8785 9.7579 16.5551L7.03051 13.8277C6.54532 13.3425 6.54532 12.5558 7.03051 12.0707C7.51569 11.5855 8.30234 11.5855 8.78752 12.0707L9.7579 13.041C10.0814 13.3645 10.6058 13.3645 10.9292 13.041L13.6064 10.3639C14.0916 9.8787 14.8782 9.8787 15.3634 10.3639Z" fill="currentColor"></path><path fill-rule="evenodd" clip-rule="evenodd" d="M10.6666 27.12C4.93329 25.28 0 19.2267 0 12.7867V6.52001C0 5.40001 0.693334 4.41334 1.73333 4.01334L9.73333 1.01334C10.3333 0.786673 11 0.786673 11.6 1.02667L19.6 4.02667C20.1083 4.21658 20.5465 4.55701 20.8562 5.00252C21.1659 5.44803 21.3324 5.97742 21.3333 6.52001V12.7867C21.3333 19.24 16.4 25.28 10.6666 27.12Z" fill="currentColor" fill-opacity="0.22"></path><path d="M10.0845 1.94967L10.0867 1.94881C10.4587 1.8083 10.8666 1.81036 11.2286 1.95515L11.2387 1.95919L11.2489 1.963L19.2489 4.963L19.25 4.96342C19.5677 5.08211 19.8416 5.29488 20.0351 5.57333C20.2285 5.85151 20.3326 6.18203 20.3333 6.52082C20.3333 6.52113 20.3333 6.52144 20.3333 6.52176L20.3333 12.7867C20.3333 18.6535 15.8922 24.2319 10.6666 26.0652C5.44153 24.2316 1 18.6409 1 12.7867V6.52001C1 5.82357 1.42893 5.20343 2.08883 4.94803L10.0845 1.94967Z" stroke="currentColor" stroke-opacity="0.30" stroke-width="2"></path></svg>
No virus
</div>
<div class="dark:text-gray-300 sm:ml-auto">7.82 kB</div></div>
<div class="relative min-h-[100px] rounded-b-lg border border-t-0 leading-tight dark:border-gray-800 dark:bg-gray-925">
<div class="py-4 px-4 sm:px-6 prose hf-sanitized hf-sanitized-ufoVvyihP985epOyGg37L"><div class="not-prose -mx-6 -mt-4 mb-8 max-h-[300px] min-w-full overflow-auto border-b bg-gradient-to-t from-gray-50 px-6 pb-5 pt-4 font-mono text-xs transition-all dark:from-gray-900 dark:to-gray-950"><div class="mb-2 inline-block rounded-lg border px-2 py-1 font-mono text-xs leading-none">metadata</div>
<pre><!-- HTML_TAG_START --><span class="hljs-attr">tags:</span>
- text2text-generation datasets: - prometheus-eval/Feedback-Collection - prometheus-eval/Preference-Collection license: apache-2.0 language: - en pipeline_tag: text2text-generation library_name: transformers metrics: - pearsonr - spearmanr - kendall-tau - accuracy
<!-- HTML_TAG_START --><h2 class="relative group flex items-center">
<a rel="nofollow" href="#links-for-reference" class="block pr-1.5 text-lg md:absolute md:p-1.5 md:opacity-0 md:group-hover:opacity-100 md:right-full" id="links-for-reference">
<span class="header-link"><svg viewBox="0 0 256 256" preserveAspectRatio="xMidYMid meet" height="1em" width="1em" role="img" aria-hidden="true" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns="http://www.w3.org/2000/svg" class="text-gray-500 hover:text-black dark:hover:text-gray-200 w-4"><path fill="currentColor" d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z"></path></svg></span>
</a>
<span>
Links for Reference
</span>
- Homepage: In Progress
- Repository:https://github.com/prometheus-eval/prometheus-eval
- Paper:https://arxiv.org/abs/2405.01535
- Point of Contact:seungone@cmu.edu
TL;DR
Prometheus 2 is an alternative of GPT-4 evaluation when doing fine-grained evaluation of an underlying LLM & a Reward model for Reinforcement Learning from Human Feedback (RLHF).
Prometheus 2 is a language model using Mistral-Instruct as a base model. It is fine-tuned on 100K feedback within the Feedback Collection and 200K feedback within the Preference Collection. It is also made by weight merging to support both absolute grading (direct assessment) and relative grading (pairwise ranking). The surprising thing is that we find weight merging also improves performance on each format.
Model Details
Model Description
- Model type: Language model
- Language(s) (NLP): English
- License: Apache 2.0
- Related Models: All Prometheus Checkpoints
- Resources for more information:
Prometheus is trained with two different sizes (7B and 8x7B). You could check the 7B sized LM on this page. Also, check out our dataset as well on this page and this page.
Prompt Format
We have made wrapper functions and classes to conveniently use Prometheus 2 at our github repository. We highly recommend you use it!
However, if you just want to use the model for your use case, please refer to the prompt format below. Note that absolute grading and relative grading requires different prompt templates and system prompts.
Absolute Grading (Direct Assessment)
Prometheus requires 4 components in the input: An instruction, a response to evaluate, a score rubric, and a reference answer. You could refer to the prompt format below. You should fill in the instruction, response, reference answer, criteria description, and score description for score in range of 1 to 5.
Fix the components with {text} inside.
###Task Description: An instruction (might include an Input inside it), a response to evaluate, a reference answer that gets a score of 5, and a score rubric representing a evaluation criteria are given. 1. Write a detailed feedback that assess the quality of the response strictly based on the given score rubric, not evaluating in general. 2. After writing a feedback, write a score that is an integer between 1 and 5. You should refer to the score rubric. 3. The output format should look as follows: \"Feedback: (write a feedback for criteria) [RESULT] (an integer number between 1 and 5)\" 4. Please do not generate any other opening, closing, and explanations.
###The instruction to evaluate: {orig_instruction}
###Response to evaluate: {orig_response}
###Reference Answer (Score 5): {orig_reference_answer}
###Score Rubrics: [{orig_criteria}] Score 1: {orig_score1_description} Score 2: {orig_score2_description} Score 3: {orig_score3_description} Score 4: {orig_score4_description} Score 5: {orig_score5_description}
###Feedback:
After this, you should apply the conversation template of Mistral (not applying it might lead to unexpected behaviors). You can find the conversation class at this link.
conv = get_conv_template("mistral") conv.set_system_message("You are a fair judge assistant tasked with providing clear, objective feedback based on specific criteria, ensuring each assessment reflects the absolute standards set for performance.") conv.append_message(conv.roles[0], dialogs['instruction']) conv.append_message(conv.roles[1], None) prompt = conv.get_prompt()
x = tokenizer(prompt,truncation=False)
As a result, a feedback and score decision will be generated, divided by a separating phrase [RESULT]
Relative Grading (Pairwise Ranking)
Prometheus requires 4 components in the input: An instruction, 2 responses to evaluate, a score rubric, and a reference answer. You could refer to the prompt format below. You should fill in the instruction, 2 responses, reference answer, and criteria description.
Fix the components with {text} inside.
###Task Description: An instruction (might include an Input inside it), a response to evaluate, and a score rubric representing a evaluation criteria are given. 1. Write a detailed feedback that assess the quality of two responses strictly based on the given score rubric, not evaluating in general. 2. After writing a feedback, choose a better response between Response A and Response B. You should refer to the score rubric. 3. The output format should look as follows: "Feedback: (write a feedback for criteria) [RESULT] (A or B)" 4. Please do not generate any other opening, closing, and explanations.
###Instruction: {orig_instruction}
###Response A: {orig_response_A}
###Response B: {orig_response_B}
###Reference Answer: {orig_reference_answer}
###Score Rubric: {orig_criteria}
###Feedback:
After this, you should apply the conversation template of Mistral (not applying it might lead to unexpected behaviors). You can find the conversation class at this link.
conv = get_conv_template("mistral") conv.set_system_message("You are a fair judge assistant assigned to deliver insightful feedback that compares individual performances, highlighting how each stands relative to others within the same cohort.") conv.append_message(conv.roles[0], dialogs['instruction']) conv.append_message(conv.roles[1], None) prompt = conv.get_prompt()
x = tokenizer(prompt,truncation=False)
As a result, a feedback and score decision will be generated, divided by a separating phrase [RESULT]
License
Feedback Collection, Preference Collection, and Prometheus 2 are subject to OpenAI's Terms of Use for the generated data. If you suspect any violations, please reach out to us.
Citation
If you find the following model helpful, please consider citing our paper!
BibTeX:
@misc{kim2023prometheus,
title={Prometheus: Inducing Fine-grained Evaluation Capability in Language Models},
author={Seungone Kim and Jamin Shin and Yejin Cho and Joel Jang and Shayne Longpre and Hwaran Lee and Sangdoo Yun and Seongjin Shin and Sungdong Kim and James Thorne and Minjoon Seo},
year={2023},
eprint={2310.08491},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{kim2024prometheus,
title={Prometheus 2: An Open Source Language Model Specialized in Evaluating Other Language Models},
author={Seungone Kim and Juyoung Suk and Shayne Longpre and Bill Yuchen Lin and Jamin Shin and Sean Welleck and Graham Neubig and Moontae Lee and Kyungjae Lee and Minjoon Seo},
year={2024},
eprint={2405.01535},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
</div>
<script>
import("/front/build/kube-9ffb4fa/index.js");
window.moonSha = "kube-9ffb4fa/";
window.hubConfig = JSON.parse(`{"features":{"signupDisabled":false},"sshGitUrl":"git@hf.co","moonHttpUrl":"https://huggingface.co","captchaApiKey":"bd5f2066-93dc-4bdd-a64b-a24646ca3859","captchaDisabledOnSignup":true,"datasetViewerPublicUrl":"https://datasets-server.huggingface.co","stripePublicKey":"pk_live_x2tdjFXBCvXo2FFmMybezpeM00J6gPCAAc","environment":"production","userAgent":"HuggingFace (production)","spacesIframeDomain":"hf.space","spacesApiUrl":"https://api.hf.space"}`);
</script>
<!-- Stripe -->
<script>
if (["hf.co", "huggingface.co"].includes(window.location.hostname)) {
const script = document.createElement("script");
script.src = "https://js.stripe.com/v3/";
script.async = true;
document.head.appendChild(script);
}
</script>
</body>