Update Circumflex_TR results

#11
results/zero-shot/CerebrumTech__cere-llama-3-8b-tr.json CHANGED
@@ -190,8 +190,8 @@
190
  {
191
  "name": "circumflex_tr",
192
  "task": "multiple_choice",
193
- "acc": 0.6142857142857143,
194
- "acc_norm": 0.6142857142857143
195
  }
196
  ]
197
  }
 
190
  {
191
  "name": "circumflex_tr",
192
  "task": "multiple_choice",
193
+ "acc": 0.5857142857142857,
194
+ "acc_norm": 0.5857142857142857
195
  }
196
  ]
197
  }
results/zero-shot/Llama-3.3-70B-Instruct.json CHANGED
@@ -192,8 +192,8 @@
192
  {
193
  "name": "circumflex_tr",
194
  "task": "multiple_choice",
195
- "acc": 0.6,
196
- "acc_norm": 0.6
197
  }
198
  ]
199
  }
 
192
  {
193
  "name": "circumflex_tr",
194
  "task": "multiple_choice",
195
+ "acc": 0.6714285714285714,
196
+ "acc_norm": 0.6714285714285714
197
  }
198
  ]
199
  }
results/zero-shot/Ministral-8B-Instruct.json CHANGED
@@ -189,8 +189,8 @@
189
  {
190
  "name": "circumflex_tr",
191
  "task": "multiple_choice",
192
- "acc": 0.5571428571428572,
193
- "acc_norm": 0.5571428571428572
194
  }
195
  ]
196
  }
 
189
  {
190
  "name": "circumflex_tr",
191
  "task": "multiple_choice",
192
+ "acc": 0.5857142857142857,
193
+ "acc_norm": 0.5857142857142857
194
  }
195
  ]
196
  }
results/zero-shot/Mistral-7B-Instruct-v0.3.json CHANGED
@@ -189,8 +189,8 @@
189
  {
190
  "name": "circumflex_tr",
191
  "task": "multiple_choice",
192
- "acc": 0.5142857142857142,
193
- "acc_norm": 0.5142857142857142
194
  }
195
  ]
196
  }
 
189
  {
190
  "name": "circumflex_tr",
191
  "task": "multiple_choice",
192
+ "acc": 0.5,
193
+ "acc_norm": 0.5
194
  }
195
  ]
196
  }
results/zero-shot/Mistral-7B-v0.3.json CHANGED
@@ -189,8 +189,8 @@
189
  {
190
  "name": "circumflex_tr",
191
  "task": "multiple_choice",
192
- "acc": 0.5142857142857142,
193
- "acc_norm": 0.5142857142857142
194
  }
195
  ]
196
  }
 
189
  {
190
  "name": "circumflex_tr",
191
  "task": "multiple_choice",
192
+ "acc": 0.5714285714285714,
193
+ "acc_norm": 0.5714285714285714
194
  }
195
  ]
196
  }
results/zero-shot/Qwen2.5-0.5B-Instruct.json CHANGED
@@ -189,8 +189,8 @@
189
  {
190
  "name": "circumflex_tr",
191
  "task": "multiple_choice",
192
- "acc": 0.5857142857142857,
193
- "acc_norm": 0.5857142857142857
194
  }
195
  ]
196
  }
 
189
  {
190
  "name": "circumflex_tr",
191
  "task": "multiple_choice",
192
+ "acc": 0.5428571428571428,
193
+ "acc_norm": 0.5428571428571428
194
  }
195
  ]
196
  }
results/zero-shot/Qwen2.5-1.5B-Instruct.json CHANGED
@@ -189,8 +189,8 @@
189
  {
190
  "name": "circumflex_tr",
191
  "task": "multiple_choice",
192
- "acc": 0.5142857142857142,
193
- "acc_norm": 0.5142857142857142
194
  }
195
  ]
196
  }
 
189
  {
190
  "name": "circumflex_tr",
191
  "task": "multiple_choice",
192
+ "acc": 0.4857142857142857,
193
+ "acc_norm": 0.4857142857142857
194
  }
195
  ]
196
  }
results/zero-shot/Qwen2.5-1.5B.json CHANGED
@@ -189,8 +189,8 @@
189
  {
190
  "name": "circumflex_tr",
191
  "task": "multiple_choice",
192
- "acc": 0.5142857142857142,
193
- "acc_norm": 0.5142857142857142
194
  }
195
  ]
196
  }
 
189
  {
190
  "name": "circumflex_tr",
191
  "task": "multiple_choice",
192
+ "acc": 0.5,
193
+ "acc_norm": 0.5
194
  }
195
  ]
196
  }
results/zero-shot/Qwen2.5-14B-Instruct.json CHANGED
@@ -191,8 +191,8 @@
191
  {
192
  "name": "circumflex_tr",
193
  "task": "multiple_choice",
194
- "acc": 0.5428571428571428,
195
- "acc_norm": 0.5428571428571428
196
  }
197
  ]
198
  }
 
191
  {
192
  "name": "circumflex_tr",
193
  "task": "multiple_choice",
194
+ "acc": 0.5857142857142857,
195
+ "acc_norm": 0.5857142857142857
196
  }
197
  ]
198
  }
results/zero-shot/Qwen2.5-14B.json CHANGED
@@ -191,8 +191,8 @@
191
  {
192
  "name": "circumflex_tr",
193
  "task": "multiple_choice",
194
- "acc": 0.5714285714285714,
195
- "acc_norm": 0.5714285714285714
196
  }
197
  ]
198
  }
 
191
  {
192
  "name": "circumflex_tr",
193
  "task": "multiple_choice",
194
+ "acc": 0.5857142857142857,
195
+ "acc_norm": 0.5857142857142857
196
  }
197
  ]
198
  }
results/zero-shot/Qwen2.5-3B-Instruct.json CHANGED
@@ -189,8 +189,8 @@
189
  {
190
  "name": "circumflex_tr",
191
  "task": "multiple_choice",
192
- "acc": 0.6285714285714286,
193
- "acc_norm": 0.6285714285714286
194
  }
195
  ]
196
  }
 
189
  {
190
  "name": "circumflex_tr",
191
  "task": "multiple_choice",
192
+ "acc": 0.5428571428571428,
193
+ "acc_norm": 0.5428571428571428
194
  }
195
  ]
196
  }
results/zero-shot/Qwen2.5-3B.json CHANGED
@@ -189,8 +189,8 @@
189
  {
190
  "name": "circumflex_tr",
191
  "task": "multiple_choice",
192
- "acc": 0.6285714285714286,
193
- "acc_norm": 0.6285714285714286
194
  }
195
  ]
196
  }
 
189
  {
190
  "name": "circumflex_tr",
191
  "task": "multiple_choice",
192
+ "acc": 0.5571428571428572,
193
+ "acc_norm": 0.5571428571428572
194
  }
195
  ]
196
  }
results/zero-shot/Qwen2.5-7B-Instruct.json CHANGED
@@ -189,8 +189,8 @@
189
  {
190
  "name": "circumflex_tr",
191
  "task": "multiple_choice",
192
- "acc": 0.5142857142857142,
193
- "acc_norm": 0.5142857142857142
194
  }
195
  ]
196
  }
 
189
  {
190
  "name": "circumflex_tr",
191
  "task": "multiple_choice",
192
+ "acc": 0.5428571428571428,
193
+ "acc_norm": 0.5428571428571428
194
  }
195
  ]
196
  }
results/zero-shot/Qwen2.5-7B.json CHANGED
@@ -189,8 +189,8 @@
189
  {
190
  "name": "circumflex_tr",
191
  "task": "multiple_choice",
192
- "acc": 0.5428571428571428,
193
- "acc_norm": 0.5428571428571428
194
  }
195
  ]
196
  }
 
189
  {
190
  "name": "circumflex_tr",
191
  "task": "multiple_choice",
192
+ "acc": 0.5714285714285714,
193
+ "acc_norm": 0.5714285714285714
194
  }
195
  ]
196
  }
results/zero-shot/aya-23-35B.json CHANGED
@@ -191,8 +191,8 @@
191
  {
192
  "name": "circumflex_tr",
193
  "task": "multiple_choice",
194
- "acc": 0.5285714285714286,
195
- "acc_norm": 0.5285714285714286
196
  }
197
  ]
198
  }
 
191
  {
192
  "name": "circumflex_tr",
193
  "task": "multiple_choice",
194
+ "acc": 0.6,
195
+ "acc_norm": 0.6
196
  }
197
  ]
198
  }
results/zero-shot/aya-23-8b.json CHANGED
@@ -185,8 +185,8 @@
185
  {
186
  "name": "circumflex_tr",
187
  "task": "multiple_choice",
188
- "acc": 0.4857142857142857,
189
- "acc_norm": 0.4857142857142857
190
  }
191
  ]
192
  }
 
185
  {
186
  "name": "circumflex_tr",
187
  "task": "multiple_choice",
188
+ "acc": 0.5857142857142857,
189
+ "acc_norm": 0.5857142857142857
190
  }
191
  ]
192
  }
results/zero-shot/aya-expanse-32b.json CHANGED
@@ -190,8 +190,8 @@
190
  {
191
  "name": "circumflex_tr",
192
  "task": "multiple_choice",
193
- "acc": 0.6285714285714286,
194
- "acc_norm": 0.6285714285714286
195
  }
196
  ]
197
  }
 
190
  {
191
  "name": "circumflex_tr",
192
  "task": "multiple_choice",
193
+ "acc": 0.5714285714285714,
194
+ "acc_norm": 0.5714285714285714
195
  }
196
  ]
197
  }
results/zero-shot/aya-expanse-8b.json CHANGED
@@ -183,8 +183,8 @@
183
  {
184
  "name": "circumflex_tr",
185
  "task": "multiple_choice",
186
- "acc": 0.5571428571428572,
187
- "acc_norm": 0.5571428571428572
188
  }
189
  ]
190
  }
 
183
  {
184
  "name": "circumflex_tr",
185
  "task": "multiple_choice",
186
+ "acc": 0.5428571428571428,
187
+ "acc_norm": 0.5428571428571428
188
  }
189
  ]
190
  }
results/zero-shot/aya101.json CHANGED
@@ -189,8 +189,8 @@
189
  {
190
  "name": "circumflex_tr",
191
  "task": "multiple_choice",
192
- "acc": 0.5571428571428572,
193
- "acc_norm": 0.5571428571428572
194
  }
195
  ]
196
  }
 
189
  {
190
  "name": "circumflex_tr",
191
  "task": "multiple_choice",
192
+ "acc": 0.5714285714285714,
193
+ "acc_norm": 0.5714285714285714
194
  }
195
  ]
196
  }
results/zero-shot/commencis-7b.json CHANGED
@@ -189,8 +189,8 @@
189
  {
190
  "name": "circumflex_tr",
191
  "task": "multiple_choice",
192
- "acc": 0.44285714285714284,
193
- "acc_norm": 0.44285714285714284
194
  }
195
  ]
196
  }
 
189
  {
190
  "name": "circumflex_tr",
191
  "task": "multiple_choice",
192
+ "acc": 0.5857142857142857,
193
+ "acc_norm": 0.5857142857142857
194
  }
195
  ]
196
  }
results/zero-shot/kanarya-2b.json CHANGED
@@ -188,8 +188,8 @@
188
  {
189
  "name": "circumflex_tr",
190
  "task": "multiple_choice",
191
- "acc": 0.4857142857142857,
192
- "acc_norm": 0.4857142857142857
193
  }
194
  ]
195
  }
 
188
  {
189
  "name": "circumflex_tr",
190
  "task": "multiple_choice",
191
+ "acc": 0.5428571428571428,
192
+ "acc_norm": 0.5428571428571428
193
  }
194
  ]
195
  }
results/zero-shot/llama-3-8b-instruct.json CHANGED
@@ -184,8 +184,8 @@
184
  {
185
  "name": "circumflex_tr",
186
  "task": "multiple_choice",
187
- "acc": 0.5571428571428572,
188
- "acc_norm": 0.5571428571428572
189
  }
190
  ]
191
  }
 
184
  {
185
  "name": "circumflex_tr",
186
  "task": "multiple_choice",
187
+ "acc": 0.6142857142857143,
188
+ "acc_norm": 0.6142857142857143
189
  }
190
  ]
191
  }
results/zero-shot/llama-3-8b.json CHANGED
@@ -183,8 +183,8 @@
183
  {
184
  "name": "circumflex_tr",
185
  "task": "multiple_choice",
186
- "acc": 0.4857142857142857,
187
- "acc_norm": 0.4857142857142857
188
  }
189
  ]
190
  }
 
183
  {
184
  "name": "circumflex_tr",
185
  "task": "multiple_choice",
186
+ "acc": 0.5428571428571428,
187
+ "acc_norm": 0.5428571428571428
188
  }
189
  ]
190
  }
results/zero-shot/llama-3.1-8b-instruct.json CHANGED
@@ -183,8 +183,8 @@
183
  {
184
  "name": "circumflex_tr",
185
  "task": "multiple_choice",
186
- "acc": 0.5714285714285714,
187
- "acc_norm": 0.5714285714285714
188
  }
189
  ]
190
  }
 
183
  {
184
  "name": "circumflex_tr",
185
  "task": "multiple_choice",
186
+ "acc": 0.6428571428571429,
187
+ "acc_norm": 0.6428571428571429
188
  }
189
  ]
190
  }
results/zero-shot/llama-3.1-8b.json CHANGED
@@ -183,8 +183,8 @@
183
  {
184
  "name": "circumflex_tr",
185
  "task": "multiple_choice",
186
- "acc": 0.5571428571428572,
187
- "acc_norm": 0.5571428571428572
188
  }
189
  ]
190
  }
 
183
  {
184
  "name": "circumflex_tr",
185
  "task": "multiple_choice",
186
+ "acc": 0.5857142857142857,
187
+ "acc_norm": 0.5857142857142857
188
  }
189
  ]
190
  }
results/zero-shot/llama-3.2-1b.json CHANGED
@@ -215,8 +215,8 @@
215
  {
216
  "name": "circumflex_tr",
217
  "task": "multiple_choice",
218
- "acc": 0.5,
219
- "acc_norm": 0.5
220
  }
221
  ]
222
  }
 
215
  {
216
  "name": "circumflex_tr",
217
  "task": "multiple_choice",
218
+ "acc": 0.5285714285714286,
219
+ "acc_norm": 0.5285714285714286
220
  }
221
  ]
222
  }
results/zero-shot/llama-3.2-3b-instruct.json CHANGED
@@ -215,8 +215,8 @@
215
  {
216
  "name": "circumflex_tr",
217
  "task": "multiple_choice",
218
- "acc": 0.5142857142857142,
219
- "acc_norm": 0.5142857142857142
220
  }
221
  ]
222
  }
 
215
  {
216
  "name": "circumflex_tr",
217
  "task": "multiple_choice",
218
+ "acc": 0.5428571428571428,
219
+ "acc_norm": 0.5428571428571428
220
  }
221
  ]
222
  }
results/zero-shot/mistral-7b.json CHANGED
@@ -189,8 +189,8 @@
189
  {
190
  "name": "circumflex_tr",
191
  "task": "multiple_choice",
192
- "acc": 0.5571428571428572,
193
- "acc_norm": 0.5571428571428572
194
  }
195
  ]
196
  }
 
189
  {
190
  "name": "circumflex_tr",
191
  "task": "multiple_choice",
192
+ "acc": 0.5714285714285714,
193
+ "acc_norm": 0.5714285714285714
194
  }
195
  ]
196
  }
results/zero-shot/trendyol-7b.json CHANGED
@@ -189,8 +189,8 @@
189
  {
190
  "name": "circumflex_tr",
191
  "task": "multiple_choice",
192
- "acc": 0.5428571428571428,
193
- "acc_norm": 0.5428571428571428
194
  }
195
  ]
196
  }
 
189
  {
190
  "name": "circumflex_tr",
191
  "task": "multiple_choice",
192
+ "acc": 0.5714285714285714,
193
+ "acc_norm": 0.5714285714285714
194
  }
195
  ]
196
  }
results/zero-shot/turna.json CHANGED
@@ -189,8 +189,8 @@
189
  {
190
  "name": "circumflex_tr",
191
  "task": "multiple_choice",
192
- "acc": 0.4714285714285714,
193
- "acc_norm": 0.4714285714285714
194
  }
195
  ]
196
  }
 
189
  {
190
  "name": "circumflex_tr",
191
  "task": "multiple_choice",
192
+ "acc": 0.5142857142857142,
193
+ "acc_norm": 0.5142857142857142
194
  }
195
  ]
196
  }