ValiantLabs
/

Llama3.1-8B-ShiningValiant2

Model card Files Files and versions Community

sequelbox commited on 22 days ago

Commit

2bcf208

•

1 Parent(s): 3602f94

eval

Browse files

Files changed (1) hide show

README.md +91 -23

README.md CHANGED Viewed

@@ -32,7 +32,6 @@ datasets:
 - sequelbox/Spurline
 - sequelbox/Supernova
 model_type: llama
-license: llama3.1
 model-index:
 - name: Llama3.1-8B-ShiningValiant2
   results:
@@ -60,30 +59,111 @@ model-index:
     - type: acc
       value: 68.75
       name: acc
     - type: acc
       value: 73.23
       name: acc
     - type: acc
-      value: 46.0
       name: acc
     - type: acc
       value: 44.33
       name: acc
     - type: acc
       value: 53.19
       name: acc
     - type: acc
       value: 37.25
       name: acc
     - type: acc
       value: 42.38
       name: acc
     - type: acc
-      value: 56.0
       name: acc
     - type: acc
-      value: 63.0
       name: acc
     - type: acc
       value: 63.16
       name: acc
@@ -97,7 +177,7 @@ model-index:
         num_few_shot: 0
     metrics:
     - type: inst_level_strict_acc and prompt_level_strict_acc
-      value: 65.24
       name: strict accuracy
     source:
       url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-ShiningValiant2
@@ -127,7 +207,7 @@ model-index:
         num_few_shot: 4
     metrics:
     - type: exact_match
-      value: 11.63
       name: exact match
     source:
       url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-ShiningValiant2
@@ -142,7 +222,7 @@ model-index:
         num_few_shot: 0
     metrics:
     - type: acc_norm
-      value: 8.95
       name: acc_norm
     source:
       url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-ShiningValiant2
@@ -157,7 +237,7 @@ model-index:
         num_few_shot: 0
     metrics:
     - type: acc_norm
-      value: 7.19
       name: acc_norm
     source:
       url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-ShiningValiant2
@@ -174,11 +254,12 @@ model-index:
         num_few_shot: 5
     metrics:
     - type: acc
-      value: 26.38
       name: accuracy
     source:
       url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-ShiningValiant2
       name: Open LLM Leaderboard
 ---
@@ -252,17 +333,4 @@ Shining Valiant 2 is created by [Valiant Labs.](http://valiantlabs.ca/)
 We care about open source.
 For everyone to use.
-We encourage others to finetune further from our models.
-# [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard)
-Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/ValiantLabs__Llama3.1-8B-ShiningValiant2-details)
-|      Metric       |Value|
-|-------------------|----:|
-|Avg.               |24.37|
-|IFEval (0-Shot)    |64.96|
-|BBH (3-Shot)       |26.35|
-|MATH Lvl 5 (4-Shot)|12.92|
-|GPQA (0-shot)      | 8.05|
-|MuSR (0-shot)      | 7.46|
-|MMLU-PRO (5-shot)  |26.46|

 - sequelbox/Spurline
 - sequelbox/Supernova
 model_type: llama
 model-index:
 - name: Llama3.1-8B-ShiningValiant2
   results:
     - type: acc
       value: 68.75
       name: acc
+  - task:
+      type: text-generation
+      name: Text Generation
+    dataset:
+      name: MMLU High School Biology (5-Shot)
+      type: MMLU
+      args:
+        num_few_shot: 5
+    metrics:
     - type: acc
       value: 73.23
       name: acc
+  - task:
+      type: text-generation
+      name: Text Generation
+    dataset:
+      name: MMLU College Chemistry (5-Shot)
+      type: MMLU
+      args:
+        num_few_shot: 5
+    metrics:
     - type: acc
+      value: 46.00
       name: acc
+  - task:
+      type: text-generation
+      name: Text Generation
+    dataset:
+      name: MMLU High School Chemistry (5-Shot)
+      type: MMLU
+      args:
+        num_few_shot: 5
+    metrics:
     - type: acc
       value: 44.33
       name: acc
+  - task:
+      type: text-generation
+      name: Text Generation
+    dataset:
+      name: MMLU Conceptual Physics (5-Shot)
+      type: MMLU
+      args:
+        num_few_shot: 5
+    metrics:
     - type: acc
       value: 53.19
       name: acc
+  - task:
+      type: text-generation
+      name: Text Generation
+    dataset:
+      name: MMLU College Physics (5-Shot)
+      type: MMLU
+      args:
+        num_few_shot: 5
+    metrics:
     - type: acc
       value: 37.25
       name: acc
+  - task:
+      type: text-generation
+      name: Text Generation
+    dataset:
+      name: MMLU High School Physics (5-Shot)
+      type: MMLU
+      args:
+        num_few_shot: 5
+    metrics:
     - type: acc
       value: 42.38
       name: acc
+  - task:
+      type: text-generation
+      name: Text Generation
+    dataset:
+      name: MMLU College Computer Science (5-Shot)
+      type: MMLU
+      args:
+        num_few_shot: 5
+    metrics:
     - type: acc
+      value: 56.00
       name: acc
+  - task:
+      type: text-generation
+      name: Text Generation
+    dataset:
+      name: MMLU High School Computer Science (5-Shot)
+      type: MMLU
+      args:
+        num_few_shot: 5
+    metrics:
     - type: acc
+      value: 63.00
       name: acc
+  - task:
+      type: text-generation
+      name: Text Generation
+    dataset:
+      name: MMLU Astronomy (5-shot)
+      type: MMLU
+      args:
+        num_few_shot: 5
+    metrics:
     - type: acc
       value: 63.16
       name: acc
         num_few_shot: 0
     metrics:
     - type: inst_level_strict_acc and prompt_level_strict_acc
+      value: 64.96
       name: strict accuracy
     source:
       url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-ShiningValiant2
         num_few_shot: 4
     metrics:
     - type: exact_match
+      value: 12.92
       name: exact match
     source:
       url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-ShiningValiant2
         num_few_shot: 0
     metrics:
     - type: acc_norm
+      value: 8.05
       name: acc_norm
     source:
       url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-ShiningValiant2
         num_few_shot: 0
     metrics:
     - type: acc_norm
+      value: 7.46
       name: acc_norm
     source:
       url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-ShiningValiant2
         num_few_shot: 5
     metrics:
     - type: acc
+      value: 26.46
       name: accuracy
     source:
       url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-ShiningValiant2
       name: Open LLM Leaderboard
+license: llama3.1
 ---
 We care about open source.
 For everyone to use.
+We encourage others to finetune further from our models.