hunterhector commited on
Commit
4beacbb
1 Parent(s): cf78de9

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +110 -5
README.md CHANGED
@@ -40,16 +40,121 @@ model-index:
40
  - name: accuracy
41
  type: accuracy # Required. Example: wer. Use metric id from https://hf.co/metrics
42
  value: 41.148 # Required. Example: 41.148
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  - task:
44
  type: text-generation # Required. Example: automatic-speech-recognition
45
  dataset:
46
  type: openai_humanneval # Required. Example: common_voice. Use dataset id from https://hf.co/datasets
47
  name: OpenAI HumanEval # Required. A pretty name for the dataset. Example: Common Voice (French)
48
  metrics:
49
- - name: pass@1
50
  type: pass@1 # Required. Example: wer. Use metric id from https://hf.co/metrics
51
  value: 31.707 # Required. Example: 41.148
52
- - name: pass@10
53
  type: pass@10
54
  value: 65.755
55
  - task:
@@ -58,12 +163,12 @@ model-index:
58
  type: mbpp # Required. Example: common_voice. Use dataset id from https://hf.co/datasets
59
  name: Mostly Basic Python Problems (mbpp) # Required. A pretty name for the dataset. Example: Common Voice (French)
60
  metrics:
61
- - name: pass@1
62
  type: pass@1 # Required. Example: wer. Use metric id from https://hf.co/metrics
63
  value: 39.4 # Required. Example: 41.148
64
- - name: pass@10
65
  type: pass@10
66
- value: 59.895
67
  ---
68
 
69
  # CrystalChat
 
40
  - name: accuracy
41
  type: accuracy # Required. Example: wer. Use metric id from https://hf.co/metrics
42
  value: 41.148 # Required. Example: 41.148
43
+ - task:
44
+ type: multiple-choice # Required. Example: automatic-speech-recognition
45
+ dataset:
46
+ type: mmlu # Required. Example: common_voice. Use dataset id from https://hf.co/datasets
47
+ name: Measuring Massive Multitask Language Understanding (MMLU) # Required. A pretty name for the dataset. Example: Common Voice (French)
48
+ metrics:
49
+ - name: accuracy
50
+ type: accuracy # Required. Example: wer. Use metric id from https://hf.co/metrics
51
+ value: 52.789 # Required. Example: 41.148
52
+ - task:
53
+ type: multiple-choice # Required. Example: automatic-speech-recognition
54
+ dataset:
55
+ type: truthful_qa # Required. Example: common_voice. Use dataset id from https://hf.co/datasets
56
+ name: Truthful QA # Required. A pretty name for the dataset. Example: Common Voice (French)
57
+ metrics:
58
+ - name: accuracy
59
+ type: accuracy # Required. Example: wer. Use metric id from https://hf.co/metrics
60
+ value: 47.29 # Required. Example: 41.148
61
+ - task:
62
+ type: multiple-choice # Required. Example: automatic-speech-recognition
63
+ dataset:
64
+ type: winogrande # Required. Example: common_voice. Use dataset id from https://hf.co/datasets
65
+ name: Winogrande # Required. A pretty name for the dataset. Example: Common Voice (French)
66
+ metrics:
67
+ - name: accuracy
68
+ type: accuracy # Required. Example: wer. Use metric id from https://hf.co/metrics
69
+ value: 70.639 # Required. Example: 41.148
70
+ - task:
71
+ type: multiple-choice # Required. Example: automatic-speech-recognition
72
+ dataset:
73
+ type: copa # Required. Example: common_voice. Use dataset id from https://hf.co/datasets
74
+ name: COPA # Required. A pretty name for the dataset. Example: Common Voice (French)
75
+ metrics:
76
+ - name: accuracy
77
+ type: accuracy # Required. Example: wer. Use metric id from https://hf.co/metrics
78
+ value: 85 # Required. Example: 41.148
79
+ - task:
80
+ type: text-classification # Required. Example: automatic-speech-recognition
81
+ dataset:
82
+ type: boolq # Required. Example: common_voice. Use dataset id from https://hf.co/datasets
83
+ name: Boolq # Required. A pretty name for the dataset. Example: Common Voice (French)
84
+ metrics:
85
+ - name: accuracy
86
+ type: accuracy # Required. Example: wer. Use metric id from https://hf.co/metrics
87
+ value: 82.783 # Required. Example: 41.148
88
+ - task:
89
+ type: question-answering # Required. Example: automatic-speech-recognition
90
+ dataset:
91
+ type: openbookqa # Required. Example: common_voice. Use dataset id from https://hf.co/datasets
92
+ name: Openbook QA # Required. A pretty name for the dataset. Example: Common Voice (French)
93
+ metrics:
94
+ - name: accuracy
95
+ type: accuracy # Required. Example: wer. Use metric id from https://hf.co/metrics
96
+ value: 42 # Required. Example: 41.148
97
+ - task:
98
+ type: multiple-choice # Required. Example: automatic-speech-recognition
99
+ dataset:
100
+ type: hellaSwag # Required. Example: common_voice. Use dataset id from https://hf.co/datasets
101
+ name: HellaSwag # Required. A pretty name for the dataset. Example: Common Voice (French)
102
+ metrics:
103
+ - name: accuracy (10-shot)
104
+ type: accuracy # Required. Example: wer. Use metric id from https://hf.co/metrics
105
+ value: 76.12 # Required. Example: 41.148
106
+ - name: accuracy (0-shot)
107
+ type: accuracy # Required. Example: wer. Use metric id from https://hf.co/metrics
108
+ value: 73.312 # Required. Example: 41.148
109
+ - task:
110
+ type: question-answering # Required. Example: automatic-speech-recognition
111
+ dataset:
112
+ type: piqa # Required. Example: common_voice. Use dataset id from https://hf.co/datasets
113
+ name: PIQA # Required. A pretty name for the dataset. Example: Common Voice (French)
114
+ metrics:
115
+ - name: accuracy
116
+ type: accuracy # Required. Example: wer. Use metric id from https://hf.co/metrics
117
+ value: 77.856 # Required. Example: 41.148
118
+ - task:
119
+ type: question-answering # Required. Example: automatic-speech-recognition
120
+ dataset:
121
+ type: ai2_arc # Required. Example: common_voice. Use dataset id from https://hf.co/datasets
122
+ name: ARC (Easy) # Required. A pretty name for the dataset. Example: Common Voice (French)
123
+ metrics:
124
+ - name: accuracy
125
+ type: accuracy # Required. Example: wer. Use metric id from https://hf.co/metrics
126
+ value: 70.328 # Required. Example: 41.148
127
+ - task:
128
+ type: question-answering # Required. Example: automatic-speech-recognition
129
+ dataset:
130
+ type: ai2_arc # Required. Example: common_voice. Use dataset id from https://hf.co/datasets
131
+ name: ARC (Challenge) # Required. A pretty name for the dataset. Example: Common Voice (French)
132
+ metrics:
133
+ - name: accuracy (25-shot)
134
+ type: accuracy # Required. Example: wer. Use metric id from https://hf.co/metrics
135
+ value: 51.706 # Required. Example: 41.148
136
+ - name: accuracy (0-shot)
137
+ type: accuracy # Required. Example: wer. Use metric id from https://hf.co/metrics
138
+ value: 44.625 # Required. Example: 41.148
139
+ - task:
140
+ type: text-generation # Required. Example: automatic-speech-recognition
141
+ dataset:
142
+ type: gsm8k # Required. Example: common_voice. Use dataset id from https://hf.co/datasets
143
+ name: GSM8K (Grade School Math 8K) # Required. A pretty name for the dataset. Example: Common Voice (French)
144
+ metrics:
145
+ - name: Accuracy (5 shot)
146
+ type: accuracy # Required. Example: wer. Use metric id from https://hf.co/metrics
147
+ value: 28.052 # Required. Example: 41.148
148
  - task:
149
  type: text-generation # Required. Example: automatic-speech-recognition
150
  dataset:
151
  type: openai_humanneval # Required. Example: common_voice. Use dataset id from https://hf.co/datasets
152
  name: OpenAI HumanEval # Required. A pretty name for the dataset. Example: Common Voice (French)
153
  metrics:
154
+ - name: pass@1 (t=0.01)
155
  type: pass@1 # Required. Example: wer. Use metric id from https://hf.co/metrics
156
  value: 31.707 # Required. Example: 41.148
157
+ - name: pass@10 (t=0.8)
158
  type: pass@10
159
  value: 65.755
160
  - task:
 
163
  type: mbpp # Required. Example: common_voice. Use dataset id from https://hf.co/datasets
164
  name: Mostly Basic Python Problems (mbpp) # Required. A pretty name for the dataset. Example: Common Voice (French)
165
  metrics:
166
+ - name: pass@1 (t=0.01)
167
  type: pass@1 # Required. Example: wer. Use metric id from https://hf.co/metrics
168
  value: 39.4 # Required. Example: 41.148
169
+ - name: pass@10 (t=0.8)
170
  type: pass@10
171
+ value: 59.895
172
  ---
173
 
174
  # CrystalChat