Adding Evaluation Results (#2)

Browse files

- Adding Evaluation Results (5a2444834232e41353e466b99c404d93a8db1261)

Co-authored-by: Open LLM Leaderboard PR Bot <leaderboard-pr-bot@users.noreply.huggingface.co>

Files changed (1) hide show

README.md +41 -34

README.md CHANGED Viewed

@@ -1,29 +1,29 @@
 ---
-license: llama2
 language:
-  - en
-tags:
-  - mistral
-  - merge
 library_name: transformers
 pipeline_tag: text-generation
 base_model:
-  - Weyaxi/OpenHermes-2.5-neural-chat-v3-3-openchat-3.5-1210-Slerp
-  - ehartford/dolphin-2.1-mistral-7b
-  - Open-Orca/Mistral-7B-OpenOrca
-  - bhenrym14/mistral-7b-platypus-fp16
-  - ehartford/samantha-1.2-mistral-7b
-  - iteknium/CollectiveCognition-v1.1-Mistral-7B
-  - HuggingFaceH4/zephyr-7b-alpha
-datasets:
-  - stingning/ultrachat
-  - garage-bAInd/Open-Platypus
-  - Open-Orca/OpenOrca
-  - TIGER-Lab/MathInstruct
-  - OpenAssistant/oasst_top1_2023-08-25
-  - teknium/openhermes
-  - meta-math/MetaMathQA
-  - Open-Orca/SlimOrca
 model-index:
 - name: sethuiyer/SynthIQ-7b
   results:
@@ -42,8 +42,7 @@ model-index:
       value: 65.87
       name: normalized accuracy
     source:
-      url: >-
-        https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=sethuiyer/SynthIQ-7b
       name: Open LLM Leaderboard
   - task:
       type: text-generation
@@ -59,8 +58,7 @@ model-index:
       value: 85.82
       name: normalized accuracy
     source:
-      url: >-
-        https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=sethuiyer/SynthIQ-7b
       name: Open LLM Leaderboard
   - task:
       type: text-generation
@@ -77,8 +75,7 @@ model-index:
       value: 64.75
       name: accuracy
     source:
-      url: >-
-        https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=sethuiyer/SynthIQ-7b
       name: Open LLM Leaderboard
   - task:
       type: text-generation
@@ -94,8 +91,7 @@ model-index:
     - type: mc2
       value: 57
     source:
-      url: >-
-        https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=sethuiyer/SynthIQ-7b
       name: Open LLM Leaderboard
   - task:
       type: text-generation
@@ -112,8 +108,7 @@ model-index:
       value: 78.69
       name: accuracy
     source:
-      url: >-
-        https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=sethuiyer/SynthIQ-7b
       name: Open LLM Leaderboard
   - task:
       type: text-generation
@@ -130,8 +125,7 @@ model-index:
       value: 64.06
       name: accuracy
     source:
-      url: >-
-        https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=sethuiyer/SynthIQ-7b
       name: Open LLM Leaderboard
 ---
@@ -220,4 +214,17 @@ License is LLama2 license as uukuguy/speechless-mistral-six-in-one-7b is llama2
 Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_sethuiyer__SynthIQ-7b)
 # [Nous Benchmark Evalation Results](https://huggingface.co/spaces/mlabonne/Yet_Another_LLM_Leaderboard)
-Detailed results can be found [here](https://gist.github.com/sethuiyer/f47dee388a4e95d46181c98d37d66a58)

 ---
 language:
+- en
+license: llama2
 library_name: transformers
+tags:
+- mistral
+- merge
+datasets:
+- stingning/ultrachat
+- garage-bAInd/Open-Platypus
+- Open-Orca/OpenOrca
+- TIGER-Lab/MathInstruct
+- OpenAssistant/oasst_top1_2023-08-25
+- teknium/openhermes
+- meta-math/MetaMathQA
+- Open-Orca/SlimOrca
 pipeline_tag: text-generation
 base_model:
+- Weyaxi/OpenHermes-2.5-neural-chat-v3-3-openchat-3.5-1210-Slerp
+- ehartford/dolphin-2.1-mistral-7b
+- Open-Orca/Mistral-7B-OpenOrca
+- bhenrym14/mistral-7b-platypus-fp16
+- ehartford/samantha-1.2-mistral-7b
+- iteknium/CollectiveCognition-v1.1-Mistral-7B
+- HuggingFaceH4/zephyr-7b-alpha
 model-index:
 - name: sethuiyer/SynthIQ-7b
   results:
       value: 65.87
       name: normalized accuracy
     source:
+      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=sethuiyer/SynthIQ-7b
       name: Open LLM Leaderboard
   - task:
       type: text-generation
       value: 85.82
       name: normalized accuracy
     source:
+      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=sethuiyer/SynthIQ-7b
       name: Open LLM Leaderboard
   - task:
       type: text-generation
       value: 64.75
       name: accuracy
     source:
+      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=sethuiyer/SynthIQ-7b
       name: Open LLM Leaderboard
   - task:
       type: text-generation
     - type: mc2
       value: 57
     source:
+      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=sethuiyer/SynthIQ-7b
       name: Open LLM Leaderboard
   - task:
       type: text-generation
       value: 78.69
       name: accuracy
     source:
+      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=sethuiyer/SynthIQ-7b
       name: Open LLM Leaderboard
   - task:
       type: text-generation
       value: 64.06
       name: accuracy
     source:
+      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=sethuiyer/SynthIQ-7b
       name: Open LLM Leaderboard
 ---
 Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_sethuiyer__SynthIQ-7b)
 # [Nous Benchmark Evalation Results](https://huggingface.co/spaces/mlabonne/Yet_Another_LLM_Leaderboard)
+Detailed results can be found [here](https://gist.github.com/sethuiyer/f47dee388a4e95d46181c98d37d66a58)
+# [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
+Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_sethuiyer__SynthIQ-7b)
+|             Metric              |Value|
+|---------------------------------|----:|
+|Avg.                             |69.37|
+|AI2 Reasoning Challenge (25-Shot)|65.87|
+|HellaSwag (10-Shot)              |85.82|
+|MMLU (5-Shot)                    |64.75|
+|TruthfulQA (0-shot)              |57.00|
+|Winogrande (5-shot)              |78.69|
+|GSM8k (5-shot)                   |64.06|