Files changed (1) hide show
  1. README.md +128 -12
README.md CHANGED
@@ -1,5 +1,16 @@
1
  ---
 
 
2
  license: llama2
 
 
 
 
 
 
 
 
 
3
  datasets:
4
  - ACE05
5
  - bc5cdr
@@ -9,20 +20,112 @@ datasets:
9
  - rams
10
  - tacred
11
  - wnut_17
12
- language:
13
- - en
14
  metrics:
15
  - f1
16
  pipeline_tag: text-generation
17
- tags:
18
- - code
19
- - text-generation-inference
20
- - Information Extraction
21
- - IE
22
- - Named Entity Recogniton
23
- - Event Extraction
24
- - Relation Extraction
25
- - LLaMA
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  ---
27
 
28
  <p align="center">
@@ -178,4 +281,17 @@ For more info, read our [📖Paper](https://arxiv.org/abs/2310.03668).
178
  archivePrefix={arXiv},
179
  primaryClass={cs.CL}
180
  }
181
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ language:
3
+ - en
4
  license: llama2
5
+ tags:
6
+ - code
7
+ - text-generation-inference
8
+ - Information Extraction
9
+ - IE
10
+ - Named Entity Recogniton
11
+ - Event Extraction
12
+ - Relation Extraction
13
+ - LLaMA
14
  datasets:
15
  - ACE05
16
  - bc5cdr
 
20
  - rams
21
  - tacred
22
  - wnut_17
 
 
23
  metrics:
24
  - f1
25
  pipeline_tag: text-generation
26
+ model-index:
27
+ - name: GoLLIE-7B
28
+ results:
29
+ - task:
30
+ type: text-generation
31
+ name: Text Generation
32
+ dataset:
33
+ name: AI2 Reasoning Challenge (25-Shot)
34
+ type: ai2_arc
35
+ config: ARC-Challenge
36
+ split: test
37
+ args:
38
+ num_few_shot: 25
39
+ metrics:
40
+ - type: acc_norm
41
+ value: 36.09
42
+ name: normalized accuracy
43
+ source:
44
+ url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=HiTZ/GoLLIE-7B
45
+ name: Open LLM Leaderboard
46
+ - task:
47
+ type: text-generation
48
+ name: Text Generation
49
+ dataset:
50
+ name: HellaSwag (10-Shot)
51
+ type: hellaswag
52
+ split: validation
53
+ args:
54
+ num_few_shot: 10
55
+ metrics:
56
+ - type: acc_norm
57
+ value: 57.93
58
+ name: normalized accuracy
59
+ source:
60
+ url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=HiTZ/GoLLIE-7B
61
+ name: Open LLM Leaderboard
62
+ - task:
63
+ type: text-generation
64
+ name: Text Generation
65
+ dataset:
66
+ name: MMLU (5-Shot)
67
+ type: cais/mmlu
68
+ config: all
69
+ split: test
70
+ args:
71
+ num_few_shot: 5
72
+ metrics:
73
+ - type: acc
74
+ value: 29.38
75
+ name: accuracy
76
+ source:
77
+ url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=HiTZ/GoLLIE-7B
78
+ name: Open LLM Leaderboard
79
+ - task:
80
+ type: text-generation
81
+ name: Text Generation
82
+ dataset:
83
+ name: TruthfulQA (0-shot)
84
+ type: truthful_qa
85
+ config: multiple_choice
86
+ split: validation
87
+ args:
88
+ num_few_shot: 0
89
+ metrics:
90
+ - type: mc2
91
+ value: 39.27
92
+ source:
93
+ url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=HiTZ/GoLLIE-7B
94
+ name: Open LLM Leaderboard
95
+ - task:
96
+ type: text-generation
97
+ name: Text Generation
98
+ dataset:
99
+ name: Winogrande (5-shot)
100
+ type: winogrande
101
+ config: winogrande_xl
102
+ split: validation
103
+ args:
104
+ num_few_shot: 5
105
+ metrics:
106
+ - type: acc
107
+ value: 58.96
108
+ name: accuracy
109
+ source:
110
+ url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=HiTZ/GoLLIE-7B
111
+ name: Open LLM Leaderboard
112
+ - task:
113
+ type: text-generation
114
+ name: Text Generation
115
+ dataset:
116
+ name: GSM8k (5-shot)
117
+ type: gsm8k
118
+ config: main
119
+ split: test
120
+ args:
121
+ num_few_shot: 5
122
+ metrics:
123
+ - type: acc
124
+ value: 3.26
125
+ name: accuracy
126
+ source:
127
+ url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=HiTZ/GoLLIE-7B
128
+ name: Open LLM Leaderboard
129
  ---
130
 
131
  <p align="center">
 
281
  archivePrefix={arXiv},
282
  primaryClass={cs.CL}
283
  }
284
+ ```
285
+ # [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
286
+ Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_HiTZ__GoLLIE-7B)
287
+
288
+ | Metric |Value|
289
+ |---------------------------------|----:|
290
+ |Avg. |37.48|
291
+ |AI2 Reasoning Challenge (25-Shot)|36.09|
292
+ |HellaSwag (10-Shot) |57.93|
293
+ |MMLU (5-Shot) |29.38|
294
+ |TruthfulQA (0-shot) |39.27|
295
+ |Winogrande (5-shot) |58.96|
296
+ |GSM8k (5-shot) | 3.26|
297
+