Files changed (1) hide show
  1. README.md +22 -14
README.md CHANGED
@@ -1,4 +1,6 @@
1
  ---
 
 
2
  model-index:
3
  - name: miqu-1-70b-sf
4
  results:
@@ -17,8 +19,7 @@ model-index:
17
  value: 73.04
18
  name: normalized accuracy
19
  source:
20
- url: >-
21
- https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=152334H/miqu-1-70b-sf
22
  name: Open LLM Leaderboard
23
  - task:
24
  type: text-generation
@@ -34,8 +35,7 @@ model-index:
34
  value: 88.61
35
  name: normalized accuracy
36
  source:
37
- url: >-
38
- https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=152334H/miqu-1-70b-sf
39
  name: Open LLM Leaderboard
40
  - task:
41
  type: text-generation
@@ -52,8 +52,7 @@ model-index:
52
  value: 75.49
53
  name: accuracy
54
  source:
55
- url: >-
56
- https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=152334H/miqu-1-70b-sf
57
  name: Open LLM Leaderboard
58
  - task:
59
  type: text-generation
@@ -69,8 +68,7 @@ model-index:
69
  - type: mc2
70
  value: 69.38
71
  source:
72
- url: >-
73
- https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=152334H/miqu-1-70b-sf
74
  name: Open LLM Leaderboard
75
  - task:
76
  type: text-generation
@@ -87,8 +85,7 @@ model-index:
87
  value: 85.32
88
  name: accuracy
89
  source:
90
- url: >-
91
- https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=152334H/miqu-1-70b-sf
92
  name: Open LLM Leaderboard
93
  - task:
94
  type: text-generation
@@ -105,11 +102,8 @@ model-index:
105
  value: 67.7
106
  name: accuracy
107
  source:
108
- url: >-
109
- https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=152334H/miqu-1-70b-sf
110
  name: Open LLM Leaderboard
111
- language:
112
- - en
113
  ---
114
 
115
  update: added NOMERGE license
@@ -290,3 +284,17 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
290
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
291
  SOFTWARE.
292
  ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ language:
3
+ - en
4
  model-index:
5
  - name: miqu-1-70b-sf
6
  results:
 
19
  value: 73.04
20
  name: normalized accuracy
21
  source:
22
+ url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=152334H/miqu-1-70b-sf
 
23
  name: Open LLM Leaderboard
24
  - task:
25
  type: text-generation
 
35
  value: 88.61
36
  name: normalized accuracy
37
  source:
38
+ url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=152334H/miqu-1-70b-sf
 
39
  name: Open LLM Leaderboard
40
  - task:
41
  type: text-generation
 
52
  value: 75.49
53
  name: accuracy
54
  source:
55
+ url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=152334H/miqu-1-70b-sf
 
56
  name: Open LLM Leaderboard
57
  - task:
58
  type: text-generation
 
68
  - type: mc2
69
  value: 69.38
70
  source:
71
+ url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=152334H/miqu-1-70b-sf
 
72
  name: Open LLM Leaderboard
73
  - task:
74
  type: text-generation
 
85
  value: 85.32
86
  name: accuracy
87
  source:
88
+ url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=152334H/miqu-1-70b-sf
 
89
  name: Open LLM Leaderboard
90
  - task:
91
  type: text-generation
 
102
  value: 67.7
103
  name: accuracy
104
  source:
105
+ url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=152334H/miqu-1-70b-sf
 
106
  name: Open LLM Leaderboard
 
 
107
  ---
108
 
109
  update: added NOMERGE license
 
284
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
285
  SOFTWARE.
286
  ```
287
+
288
+ # [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
289
+ Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_152334H__miqu-1-70b-sf)
290
+
291
+ | Metric |Value|
292
+ |---------------------------------|----:|
293
+ |Avg. |76.59|
294
+ |AI2 Reasoning Challenge (25-Shot)|73.04|
295
+ |HellaSwag (10-Shot) |88.61|
296
+ |MMLU (5-Shot) |75.49|
297
+ |TruthfulQA (0-shot) |69.38|
298
+ |Winogrande (5-shot) |85.32|
299
+ |GSM8k (5-shot) |67.70|
300
+