reward-bench / src /rm-training-data.csv
natolambert's picture
add contamination note
b64c62d
raw
history blame
2.48 kB
Reward Model,Preference Datasets Used
RLHFlow/ArmoRM-Llama3-8B-v0.1,"HelpSteer, UltraFeedback, BeaverTails, Argilla-Capybara, Argilla-Math-Preferences, CodeUltraFeedback, Argilla-OpenOrca"
RLHFlow/pair-preference-model-LLaMA3-8B,"Filtered HH-RLHF, SHP, HelpSteer, SafeRLHF-30k, UltraFeedback, UltraInteract, CodeUltraFeedback, Argilla-Math, OpenOrca, Capybara"
sfairXC/FsfairX-LLaMA3-RM-v0.1,"Filtered HH-RLHF, SHP, HelpSteer, SafeRLHF-30k, UltraFeedback, UltraInteract, CodeUltraFeedback, Argilla-Math, OpenOrca, Capybara"
openbmb/Eurus-RM-7b,"UltraInteract, UltraFeedback, UltrSafety"
Nexusflow/Starling-RM-34B,Nectar
weqweasdas/RM-Mistral-7B,"HH-RLHF, Capybara, Orca, SHP, UltraFeedback, HelpSetter, PKU-SafeRLHF, PKU-SafeRLHF-30k"
hendrydong/Mistral-RM-for-RAFT-GSHF-v0,Undisclosed
stabilityai/stablelm-2-12b-chat,"HH-RLHF, argilla/dpo-mix-7k, and other Undisclosed"
Ray2333/reward-model-Mistral-7B-instruct...,"Summarize, WebGPT, Dahoas/instruct-synthetic-prompt-responses, HH-RLHF, ChatBotArena Conversations, UltraFeedback, Nectar"
allenai/tulu-2-dpo-70b,UltraFeedback
meta-llama/Meta-Llama-3-70B-Instruct,Undisclosed
prometheus-eval/prometheus-8x7b-v2.0,Preference Collction (relabeled mix)
NousResearch/Nous-Hermes-2-Mistral-7B-DPO,Undisclosed
mistralai/Mixtral-8x7B-Instruct-v0.1,Undisclosed
upstage/SOLAR-10.7B-Instruct-v1.0,"OpenOrca, Intel-Orca, UltraFeedback"
HuggingFaceH4/zephyr-7b-alpha,UltraFeedback
allenai/tulu-2-dpo-13b,UltraFeedback
0-hero/Matter-0.1-7B-boost-DPO-preview,Undisclosed
prometheus-eval/prometheus-7b-v2.0,Preference Collction (relabeled mix)
HuggingFaceH4/starchat2-15b-v0.1,"UltraFeedback, Orca"
HuggingFaceH4/zephyr-7b-beta,UltraFeedback
allenai/tulu-2-dpo-7b,UltraFeedback
jondurbin/bagel-dpo-34b-v0.5,"Airoboros 3.2, Contextual DPO, HelpSteer, Orca, Gutenberg-DPO, Python DPO, Toxic DPO, Truthy, UltraFeedback"
berkeley-nest/Starling-RM-7B-alpha,Nectar
NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO,Undisclosed
0-hero/Matter-0.1-7B-DPO-preview,Undisclosed
stabilityai/stablelm-zephyr-3b,"UltraFeedback, Orca"
Qwen/Qwen1.5-14B-Chat,Undisclosed
CohereForAI/c4ai-command-r-plus,Undisclosed
OpenAssistant/oasst-rm-2.1-pythia-1.4b-epoch-2.5,"WebGPT, HH-RLHF, SHP, WebGPT, Summarize"
Qwen/Qwen1.5-7B-Chat,Undisclosed
weqweasdas/RM-Gemma-7B,"HH-RLHF, SHP, UltraFeedback, Capybara, HelpSteer, Orca"
openbmb/Eurus-7b-kto,"UltraInteract, UltraFeedback"
Qwen/Qwen1.5-72B-Chat,Undisclosed
openbmb/UltraRM-13b,"UltraFeedback, HH-RLHF, SHP, Summarize"