Update README.md
Browse files
README.md
CHANGED
@@ -103,7 +103,33 @@ Model evaluation metrics and results.
|
|
103 |
|
104 |
### Benchmark Results
|
105 |
|
106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
|
108 |
## Usage and Limitations
|
109 |
|
|
|
103 |
|
104 |
### Benchmark Results
|
105 |
|
106 |
+
| Evaluation | Metric | Shots | 7b |
|
107 |
+
|-----------------------|------------------------|-------|--------|
|
108 |
+
| Default Metric | ACC | | |
|
109 |
+
| Knowledge (5-shot) | MMLU | | 61.76 |
|
110 |
+
| | KMMLU | | 42.75 |
|
111 |
+
| | CMLU | | 50.93 |
|
112 |
+
| | JMLU | | - |
|
113 |
+
| | C-Eval | | 50.07 |
|
114 |
+
| | HAERAE (0-shot) | | 63.89 |
|
115 |
+
| KOBest (5-shot) | BoolQ | | 85.47 |
|
116 |
+
| | COPA | | 83.5 |
|
117 |
+
| | Hellaswag (acc-norm) | | 63.2 |
|
118 |
+
| | Sentineg | | 97.98 |
|
119 |
+
| | WiC | | 70.95 |
|
120 |
+
| JP Eval Harness | JcommonsenseQA | 3-shot| 85.97 |
|
121 |
+
| (Prompt ver 0.3) | JNLI | 3-shot| 39.11 |
|
122 |
+
| | MARC_JA | 3-shot| 96.48 |
|
123 |
+
| | JSQUAD | 2-shot| 70.69 |
|
124 |
+
| | JAQKET | 1-shot| 81.53 |
|
125 |
+
| | MGSM | 5-shot| 28.8 |
|
126 |
+
| XWinograd (5-shot) | EN | | 90.71 |
|
127 |
+
| | FR | | 80.72 |
|
128 |
+
| | JP | | 84.15 |
|
129 |
+
| | PT | | 80.99 |
|
130 |
+
| | RU | | 76.51 |
|
131 |
+
| | ZH | | 76.98
|
132 |
+
|
133 |
|
134 |
## Usage and Limitations
|
135 |
|