CultriX
/

Qwen2.5-14B-Wernicke-SFT

@@ -1,18 +1,24 @@
 ---
-datasets:
-- CultriX/uptodate-small
-- ajibawa-2023/Python-Code-23k-ShareGPT
-- isaiahbjork/chain-of-thought-sharegpt
-- shibing624/sharegpt_gpt4
-- theblackcat102/sharegpt-english
-base_model:
-- CultriX/Qwen2.5-14B-Wernicke
 tags:
 - medical
 - gpt4
 - sft
 - mergekit
 - merge
 model-index:
 - name: Qwen2.5-14B-Wernicke
   results:
@@ -28,9 +34,11 @@ model-index:
     - type: inst_level_strict_acc and prompt_level_strict_acc
       value: 52.35
       name: strict accuracy
     source:
-      url: >-
-        https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=CultriX/Qwen2.5-14B-Wernicke
       name: Open LLM Leaderboard
   - task:
       type: text-generation
@@ -44,9 +52,11 @@ model-index:
     - type: acc_norm
       value: 50.64
       name: normalized accuracy
     source:
-      url: >-
-        https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=CultriX/Qwen2.5-14B-Wernicke
       name: Open LLM Leaderboard
   - task:
       type: text-generation
@@ -60,9 +70,11 @@ model-index:
     - type: exact_match
       value: 30.06
       name: exact match
     source:
-      url: >-
-        https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=CultriX/Qwen2.5-14B-Wernicke
       name: Open LLM Leaderboard
   - task:
       type: text-generation
@@ -76,9 +88,11 @@ model-index:
     - type: acc_norm
       value: 19.13
       name: acc_norm
     source:
-      url: >-
-        https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=CultriX/Qwen2.5-14B-Wernicke
       name: Open LLM Leaderboard
   - task:
       type: text-generation
@@ -92,9 +106,11 @@ model-index:
     - type: acc_norm
       value: 18.25
       name: acc_norm
     source:
-      url: >-
-        https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=CultriX/Qwen2.5-14B-Wernicke
       name: Open LLM Leaderboard
   - task:
       type: text-generation
@@ -110,16 +126,12 @@ model-index:
     - type: acc
       value: 49.15
       name: accuracy
     source:
-      url: >-
-        https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=CultriX/Qwen2.5-14B-Wernicke
       name: Open LLM Leaderboard
-license: apache-2.0
-language:
-- en
-metrics:
-- accuracy
-pipeline_tag: text-generation
 ---
 # merge
@@ -166,4 +178,17 @@ Detailed results can be found [here](https://huggingface.co/datasets/open-llm-le
 |MATH Lvl 5 (4-Shot)|30.06|
 |GPQA (0-shot)      |19.13|
 |MuSR (0-shot)      |18.25|
-|MMLU-PRO (5-shot)  |49.15|

 ---
+language:
+- en
+license: apache-2.0
 tags:
 - medical
 - gpt4
 - sft
 - mergekit
 - merge
+base_model:
+- CultriX/Qwen2.5-14B-Wernicke
+datasets:
+- CultriX/uptodate-small
+- ajibawa-2023/Python-Code-23k-ShareGPT
+- isaiahbjork/chain-of-thought-sharegpt
+- shibing624/sharegpt_gpt4
+- theblackcat102/sharegpt-english
+metrics:
+- accuracy
+pipeline_tag: text-generation
 model-index:
 - name: Qwen2.5-14B-Wernicke
   results:
     - type: inst_level_strict_acc and prompt_level_strict_acc
       value: 52.35
       name: strict accuracy
+    - type: inst_level_strict_acc and prompt_level_strict_acc
+      value: 49.37
+      name: strict accuracy
     source:
+      url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=CultriX/Qwen2.5-14B-Wernicke
       name: Open LLM Leaderboard
   - task:
       type: text-generation
     - type: acc_norm
       value: 50.64
       name: normalized accuracy
+    - type: acc_norm
+      value: 49.33
+      name: normalized accuracy
     source:
+      url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=CultriX/Qwen2.5-14B-Wernicke
       name: Open LLM Leaderboard
   - task:
       type: text-generation
     - type: exact_match
       value: 30.06
       name: exact match
+    - type: exact_match
+      value: 35.8
+      name: exact match
     source:
+      url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=CultriX/Qwen2.5-14B-Wernicke
       name: Open LLM Leaderboard
   - task:
       type: text-generation
     - type: acc_norm
       value: 19.13
       name: acc_norm
+    - type: acc_norm
+      value: 13.87
+      name: acc_norm
     source:
+      url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=CultriX/Qwen2.5-14B-Wernicke
       name: Open LLM Leaderboard
   - task:
       type: text-generation
     - type: acc_norm
       value: 18.25
       name: acc_norm
+    - type: acc_norm
+      value: 7.55
+      name: acc_norm
     source:
+      url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=CultriX/Qwen2.5-14B-Wernicke
       name: Open LLM Leaderboard
   - task:
       type: text-generation
     - type: acc
       value: 49.15
       name: accuracy
+    - type: acc
+      value: 45.22
+      name: accuracy
     source:
+      url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=CultriX/Qwen2.5-14B-Wernicke
       name: Open LLM Leaderboard
 ---
 # merge
 |MATH Lvl 5 (4-Shot)|30.06|
 |GPQA (0-shot)      |19.13|
 |MuSR (0-shot)      |18.25|
+|MMLU-PRO (5-shot)  |49.15|
+# [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard)
+Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_CultriX__Qwen2.5-14B-Wernicke-SFT)
+|      Metric       |Value|
+|-------------------|----:|
+|Avg.               |33.52|
+|IFEval (0-Shot)    |49.37|
+|BBH (3-Shot)       |49.33|
+|MATH Lvl 5 (4-Shot)|35.80|
+|GPQA (0-shot)      |13.87|
+|MuSR (0-shot)      | 7.55|
+|MMLU-PRO (5-shot)  |45.22|