alielfilali01 commited on
Commit
df97369
1 Parent(s): 24a5f16

Create assets/results/results.json

Browse files
Files changed (1) hide show
  1. assets/results/results.json +602 -0
assets/results/results.json ADDED
@@ -0,0 +1,602 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "claude-3.5-sonnet Scores": {
4
+ "3C3H Scores": {
5
+ "Correctness": 0.7026,
6
+ "Completeness": 0.7014,
7
+ "Conciseness": 0.1631,
8
+ "Helpfulness": 0.6784,
9
+ "Honesty": 0.6972,
10
+ "Harmlessness": 0.7026,
11
+ "3C3H Score": 0.6076
12
+ },
13
+ "Tasks Scores": {
14
+ "Question Answering (QA)": 0.7151,
15
+ "Reasoning": 0.64,
16
+ "Orthographic and Grammatical Analysis": 0.0887,
17
+ "Safety": 0.4729
18
+ }
19
+ },
20
+ "Meta": {
21
+ "Model Name": "CohereForAI/aya-expanse-32b",
22
+ "License": "cc-by-nc-4.0",
23
+ "Revision": "main",
24
+ "Precision": "float16",
25
+ "Params": 32.0,
26
+ "Total Entries": 279,
27
+ "Successful Entries": 278,
28
+ "Failed Entries": 1,
29
+ "Success Ratio": 0.9964
30
+ }
31
+ },
32
+ {
33
+ "claude-3.5-sonnet Scores": {
34
+ "3C3H Scores": {
35
+ "Correctness": 0.5612,
36
+ "Completeness": 0.5612,
37
+ "Conciseness": 0.1172,
38
+ "Helpfulness": 0.5468,
39
+ "Honesty": 0.5519,
40
+ "Harmlessness": 0.5594,
41
+ "3C3H Score": 0.4829
42
+ },
43
+ "Tasks Scores": {
44
+ "Question Answering (QA)": 0.5526,
45
+ "Reasoning": 0.5561,
46
+ "Orthographic and Grammatical Analysis": 0.0,
47
+ "Safety": 0.4271
48
+ }
49
+ },
50
+ "Meta": {
51
+ "Model Name": "CohereForAI/aya-expanse-8b",
52
+ "License": "cc-by-nc-4.0",
53
+ "Revision": "main",
54
+ "Precision": "float16",
55
+ "Params": 8.0,
56
+ "Total Entries": 279,
57
+ "Successful Entries": 278,
58
+ "Failed Entries": 1,
59
+ "Success Ratio": 0.9964
60
+ }
61
+ },
62
+ {
63
+ "claude-3.5-sonnet Scores": {
64
+ "3C3H Scores": {
65
+ "Correctness": 0.4648,
66
+ "Completeness": 0.46,
67
+ "Conciseness": 0.1251,
68
+ "Helpfulness": 0.4415,
69
+ "Honesty": 0.4495,
70
+ "Harmlessness": 0.4639,
71
+ "3C3H Score": 0.4008
72
+ },
73
+ "Tasks Scores": {
74
+ "Question Answering (QA)": 0.5056,
75
+ "Reasoning": 0.3817,
76
+ "Orthographic and Grammatical Analysis": 0.0,
77
+ "Safety": 0.2917
78
+ }
79
+ },
80
+ "Meta": {
81
+ "Model Name": "FreedomIntelligence/AceGPT-13B-chat",
82
+ "License": "apache-2.0",
83
+ "Revision": "main",
84
+ "Precision": "float16",
85
+ "Params": 13.0,
86
+ "Total Entries": 279,
87
+ "Successful Entries": 279,
88
+ "Failed Entries": 0,
89
+ "Success Ratio": 1.0
90
+ }
91
+ },
92
+ {
93
+ "claude-3.5-sonnet Scores": {
94
+ "3C3H Scores": {
95
+ "Correctness": 0.4158,
96
+ "Completeness": 0.4158,
97
+ "Conciseness": 0.0941,
98
+ "Helpfulness": 0.3817,
99
+ "Honesty": 0.3934,
100
+ "Harmlessness": 0.4158,
101
+ "3C3H Score": 0.3527
102
+ },
103
+ "Tasks Scores": {
104
+ "Question Answering (QA)": 0.4017,
105
+ "Reasoning": 0.4367,
106
+ "Orthographic and Grammatical Analysis": 0.0,
107
+ "Safety": 0.2104
108
+ }
109
+ },
110
+ "Meta": {
111
+ "Model Name": "FreedomIntelligence/AceGPT-7B-chat",
112
+ "License": "apache-2.0",
113
+ "Revision": "main",
114
+ "Precision": "float16",
115
+ "Params": 7.0,
116
+ "Total Entries": 279,
117
+ "Successful Entries": 279,
118
+ "Failed Entries": 0,
119
+ "Success Ratio": 1.0
120
+ }
121
+ },
122
+ {
123
+ "claude-3.5-sonnet Scores": {
124
+ "3C3H Scores": {
125
+ "Correctness": 0.5568,
126
+ "Completeness": 0.546,
127
+ "Conciseness": 0.2094,
128
+ "Helpfulness": 0.5302,
129
+ "Honesty": 0.5391,
130
+ "Harmlessness": 0.5568,
131
+ "3C3H Score": 0.4897
132
+ },
133
+ "Tasks Scores": {
134
+ "Question Answering (QA)": 0.6084,
135
+ "Reasoning": 0.4717,
136
+ "Orthographic and Grammatical Analysis": 0.0,
137
+ "Safety": 0.4083
138
+ }
139
+ },
140
+ "Meta": {
141
+ "Model Name": "FreedomIntelligence/AceGPT-v2-8B-Chat",
142
+ "License": "apache-2.0",
143
+ "Revision": "main",
144
+ "Precision": "float16",
145
+ "Params": 8.0,
146
+ "Total Entries": 279,
147
+ "Successful Entries": 279,
148
+ "Failed Entries": 0,
149
+ "Success Ratio": 1.0
150
+ }
151
+ },
152
+ {
153
+ "claude-3.5-sonnet Scores": {
154
+ "3C3H Scores": {
155
+ "Correctness": 0.1547,
156
+ "Completeness": 0.1439,
157
+ "Conciseness": 0.0369,
158
+ "Helpfulness": 0.116,
159
+ "Honesty": 0.1286,
160
+ "Harmlessness": 0.1538,
161
+ "3C3H Score": 0.1223
162
+ },
163
+ "Tasks Scores": {
164
+ "Question Answering (QA)": 0.1201,
165
+ "Reasoning": 0.1094,
166
+ "Orthographic and Grammatical Analysis": 0.0,
167
+ "Safety": 0.3771
168
+ }
169
+ },
170
+ "Meta": {
171
+ "Model Name": "Qwen/Qwen2.5-0.5B-Instruct",
172
+ "License": "apache-2.0",
173
+ "Revision": "main",
174
+ "Precision": "bfloat16",
175
+ "Params": 0.465,
176
+ "Total Entries": 279,
177
+ "Successful Entries": 278,
178
+ "Failed Entries": 1,
179
+ "Success Ratio": 0.9964
180
+ }
181
+ },
182
+ {
183
+ "claude-3.5-sonnet Scores": {
184
+ "3C3H Scores": {
185
+ "Correctness": 0.4468,
186
+ "Completeness": 0.4432,
187
+ "Conciseness": 0.1278,
188
+ "Helpfulness": 0.4179,
189
+ "Honesty": 0.4271,
190
+ "Harmlessness": 0.4459,
191
+ "3C3H Score": 0.3848
192
+ },
193
+ "Tasks Scores": {
194
+ "Question Answering (QA)": 0.3684,
195
+ "Reasoning": 0.4983,
196
+ "Orthographic and Grammatical Analysis": 0.0,
197
+ "Safety": 0.6812
198
+ }
199
+ },
200
+ "Meta": {
201
+ "Model Name": "Qwen/Qwen2.5-3B-Instruct",
202
+ "License": "apache-2.0",
203
+ "Revision": "main",
204
+ "Precision": "bfloat16",
205
+ "Params": 3.0,
206
+ "Total Entries": 279,
207
+ "Successful Entries": 279,
208
+ "Failed Entries": 0,
209
+ "Success Ratio": 1.0
210
+ }
211
+ },
212
+ {
213
+ "claude-3.5-sonnet Scores": {
214
+ "3C3H Scores": {
215
+ "Correctness": 0.7192,
216
+ "Completeness": 0.718,
217
+ "Conciseness": 0.1906,
218
+ "Helpfulness": 0.6986,
219
+ "Honesty": 0.7094,
220
+ "Harmlessness": 0.7192,
221
+ "3C3H Score": 0.6258
222
+ },
223
+ "Tasks Scores": {
224
+ "Question Answering (QA)": 0.6677,
225
+ "Reasoning": 0.7594,
226
+ "Orthographic and Grammatical Analysis": 0.1075,
227
+ "Safety": 0.6083
228
+ }
229
+ },
230
+ "Meta": {
231
+ "Model Name": "Qwen/Qwen2.5-72B-Instruct",
232
+ "License": "qwen",
233
+ "Revision": "main",
234
+ "Precision": "bfloat16",
235
+ "Params": 72.0,
236
+ "Total Entries": 279,
237
+ "Successful Entries": 279,
238
+ "Failed Entries": 0,
239
+ "Success Ratio": 1.0
240
+ }
241
+ },
242
+ {
243
+ "claude-3.5-sonnet Scores": {
244
+ "3C3H Scores": {
245
+ "Correctness": 0.6499,
246
+ "Completeness": 0.6487,
247
+ "Conciseness": 0.2016,
248
+ "Helpfulness": 0.6386,
249
+ "Honesty": 0.638,
250
+ "Harmlessness": 0.6499,
251
+ "3C3H Score": 0.5711
252
+ },
253
+ "Tasks Scores": {
254
+ "Question Answering (QA)": 0.6395,
255
+ "Reasoning": 0.6122,
256
+ "Orthographic and Grammatical Analysis": 0.0,
257
+ "Safety": 0.7792
258
+ }
259
+ },
260
+ "Meta": {
261
+ "Model Name": "google/gemma-2-27b-it",
262
+ "License": "gemma",
263
+ "Revision": "main",
264
+ "Precision": "bfloat16",
265
+ "Params": 27.0,
266
+ "Total Entries": 279,
267
+ "Successful Entries": 279,
268
+ "Failed Entries": 0,
269
+ "Success Ratio": 1.0
270
+ }
271
+ },
272
+ {
273
+ "claude-3.5-sonnet Scores": {
274
+ "3C3H Scores": {
275
+ "Correctness": 0.589,
276
+ "Completeness": 0.589,
277
+ "Conciseness": 0.1834,
278
+ "Helpfulness": 0.5797,
279
+ "Honesty": 0.5744,
280
+ "Harmlessness": 0.589,
281
+ "3C3H Score": 0.5174
282
+ },
283
+ "Tasks Scores": {
284
+ "Question Answering (QA)": 0.5462,
285
+ "Reasoning": 0.6011,
286
+ "Orthographic and Grammatical Analysis": 0.0,
287
+ "Safety": 0.7854
288
+ }
289
+ },
290
+ "Meta": {
291
+ "Model Name": "google/gemma-2-9b-it",
292
+ "License": "gemma",
293
+ "Revision": "main",
294
+ "Precision": "bfloat16",
295
+ "Params": 9.0,
296
+ "Total Entries": 279,
297
+ "Successful Entries": 279,
298
+ "Failed Entries": 0,
299
+ "Success Ratio": 1.0
300
+ }
301
+ },
302
+ {
303
+ "claude-3.5-sonnet Scores": {
304
+ "3C3H Scores": {
305
+ "Correctness": 0.5579,
306
+ "Completeness": 0.5544,
307
+ "Conciseness": 0.1682,
308
+ "Helpfulness": 0.5352,
309
+ "Honesty": 0.5436,
310
+ "Harmlessness": 0.5579,
311
+ "3C3H Score": 0.4862
312
+ },
313
+ "Tasks Scores": {
314
+ "Question Answering (QA)": 0.5925,
315
+ "Reasoning": 0.48,
316
+ "Orthographic and Grammatical Analysis": 0.0,
317
+ "Safety": 0.45
318
+ }
319
+ },
320
+ "Meta": {
321
+ "Model Name": "inceptionai/jais-adapted-13b-chat",
322
+ "License": "apache-2.0",
323
+ "Revision": "main",
324
+ "Precision": "float32",
325
+ "Params": 13.0,
326
+ "Total Entries": 279,
327
+ "Successful Entries": 279,
328
+ "Failed Entries": 0,
329
+ "Success Ratio": 1.0
330
+ }
331
+ },
332
+ {
333
+ "claude-3.5-sonnet Scores": {
334
+ "3C3H Scores": {
335
+ "Correctness": 0.6679,
336
+ "Completeness": 0.6655,
337
+ "Conciseness": 0.1804,
338
+ "Helpfulness": 0.6326,
339
+ "Honesty": 0.652,
340
+ "Harmlessness": 0.6679,
341
+ "3C3H Score": 0.5777
342
+ },
343
+ "Tasks Scores": {
344
+ "Question Answering (QA)": 0.6864,
345
+ "Reasoning": 0.5711,
346
+ "Orthographic and Grammatical Analysis": 0.0578,
347
+ "Safety": 0.5771
348
+ }
349
+ },
350
+ "Meta": {
351
+ "Model Name": "inceptionai/jais-adapted-70b-chat",
352
+ "License": "apache-2.0",
353
+ "Revision": "main",
354
+ "Precision": "float32",
355
+ "Params": 70.0,
356
+ "Total Entries": 279,
357
+ "Successful Entries": 279,
358
+ "Failed Entries": 0,
359
+ "Success Ratio": 1.0
360
+ }
361
+ },
362
+ {
363
+ "claude-3.5-sonnet Scores": {
364
+ "3C3H Scores": {
365
+ "Correctness": 0.5211,
366
+ "Completeness": 0.5102,
367
+ "Conciseness": 0.1339,
368
+ "Helpfulness": 0.4798,
369
+ "Honesty": 0.5093,
370
+ "Harmlessness": 0.5202,
371
+ "3C3H Score": 0.4457
372
+ },
373
+ "Tasks Scores": {
374
+ "Question Answering (QA)": 0.5144,
375
+ "Reasoning": 0.4844,
376
+ "Orthographic and Grammatical Analysis": 0.0269,
377
+ "Safety": 0.4312
378
+ }
379
+ },
380
+ "Meta": {
381
+ "Model Name": "inceptionai/jais-family-13b-chat",
382
+ "License": "apache-2.0",
383
+ "Revision": "main",
384
+ "Precision": "float32",
385
+ "Params": 13.0,
386
+ "Total Entries": 279,
387
+ "Successful Entries": 277,
388
+ "Failed Entries": 2,
389
+ "Success Ratio": 0.9928
390
+ }
391
+ },
392
+ {
393
+ "claude-3.5-sonnet Scores": {
394
+ "3C3H Scores": {
395
+ "Correctness": 0.3729,
396
+ "Completeness": 0.3669,
397
+ "Conciseness": 0.0887,
398
+ "Helpfulness": 0.3441,
399
+ "Honesty": 0.3543,
400
+ "Harmlessness": 0.3711,
401
+ "3C3H Score": 0.3163
402
+ },
403
+ "Tasks Scores": {
404
+ "Question Answering (QA)": 0.348,
405
+ "Reasoning": 0.3761,
406
+ "Orthographic and Grammatical Analysis": 0.0,
407
+ "Safety": 0.3417
408
+ }
409
+ },
410
+ "Meta": {
411
+ "Model Name": "inceptionai/jais-family-2p7b-chat",
412
+ "License": "apache-2.0",
413
+ "Revision": "main",
414
+ "Precision": "float32",
415
+ "Params": 3.0,
416
+ "Total Entries": 279,
417
+ "Successful Entries": 278,
418
+ "Failed Entries": 1,
419
+ "Success Ratio": 0.9964
420
+ }
421
+ },
422
+ {
423
+ "claude-3.5-sonnet Scores": {
424
+ "3C3H Scores": {
425
+ "Correctness": 0.5806,
426
+ "Completeness": 0.5759,
427
+ "Conciseness": 0.1526,
428
+ "Helpfulness": 0.5475,
429
+ "Honesty": 0.5621,
430
+ "Harmlessness": 0.5806,
431
+ "3C3H Score": 0.4999
432
+ },
433
+ "Tasks Scores": {
434
+ "Question Answering (QA)": 0.5812,
435
+ "Reasoning": 0.5239,
436
+ "Orthographic and Grammatical Analysis": 0.0282,
437
+ "Safety": 0.5187
438
+ }
439
+ },
440
+ "Meta": {
441
+ "Model Name": "inceptionai/jais-family-30b-8k-chat",
442
+ "License": "apache-2.0",
443
+ "Revision": "main",
444
+ "Precision": "float32",
445
+ "Params": 30.0,
446
+ "Total Entries": 279,
447
+ "Successful Entries": 279,
448
+ "Failed Entries": 0,
449
+ "Success Ratio": 1.0
450
+ }
451
+ },
452
+ {
453
+ "claude-3.5-sonnet Scores": {
454
+ "3C3H Scores": {
455
+ "Correctness": 0.4755,
456
+ "Completeness": 0.4731,
457
+ "Conciseness": 0.1243,
458
+ "Helpfulness": 0.4522,
459
+ "Honesty": 0.4597,
460
+ "Harmlessness": 0.4755,
461
+ "3C3H Score": 0.41
462
+ },
463
+ "Tasks Scores": {
464
+ "Question Answering (QA)": 0.4743,
465
+ "Reasoning": 0.4633,
466
+ "Orthographic and Grammatical Analysis": 0.0,
467
+ "Safety": 0.3542
468
+ }
469
+ },
470
+ "Meta": {
471
+ "Model Name": "inceptionai/jais-family-6p7b-chat",
472
+ "License": "apache-2.0",
473
+ "Revision": "main",
474
+ "Precision": "float32",
475
+ "Params": 7.0,
476
+ "Total Entries": 279,
477
+ "Successful Entries": 279,
478
+ "Failed Entries": 0,
479
+ "Success Ratio": 1.0
480
+ }
481
+ },
482
+ {
483
+ "claude-3.5-sonnet Scores": {
484
+ "3C3H Scores": {
485
+ "Correctness": 0.6392,
486
+ "Completeness": 0.6129,
487
+ "Conciseness": 0.27,
488
+ "Helpfulness": 0.6016,
489
+ "Honesty": 0.6171,
490
+ "Harmlessness": 0.6383,
491
+ "3C3H Score": 0.5632
492
+ },
493
+ "Tasks Scores": {
494
+ "Question Answering (QA)": 0.6465,
495
+ "Reasoning": 0.6283,
496
+ "Orthographic and Grammatical Analysis": 0.0591,
497
+ "Safety": 0.4625
498
+ }
499
+ },
500
+ "Meta": {
501
+ "Model Name": "meta-llama/Llama-3.1-70B-Instruct",
502
+ "License": "llama3.1",
503
+ "Revision": "main",
504
+ "Precision": "bfloat16",
505
+ "Params": 70.0,
506
+ "Total Entries": 279,
507
+ "Successful Entries": 279,
508
+ "Failed Entries": 0,
509
+ "Success Ratio": 1.0
510
+ }
511
+ },
512
+ {
513
+ "claude-3.5-sonnet Scores": {
514
+ "3C3H Scores": {
515
+ "Correctness": 0.4421,
516
+ "Completeness": 0.4409,
517
+ "Conciseness": 0.1416,
518
+ "Helpfulness": 0.3967,
519
+ "Honesty": 0.4065,
520
+ "Harmlessness": 0.4421,
521
+ "3C3H Score": 0.3783
522
+ },
523
+ "Tasks Scores": {
524
+ "Question Answering (QA)": 0.3826,
525
+ "Reasoning": 0.45,
526
+ "Orthographic and Grammatical Analysis": 0.0,
527
+ "Safety": 0.6625
528
+ }
529
+ },
530
+ "Meta": {
531
+ "Model Name": "meta-llama/Llama-3.1-8B-Instruct",
532
+ "License": "llama3.1",
533
+ "Revision": "main",
534
+ "Precision": "bfloat16",
535
+ "Params": 8.0,
536
+ "Total Entries": 279,
537
+ "Successful Entries": 279,
538
+ "Failed Entries": 0,
539
+ "Success Ratio": 1.0
540
+ }
541
+ },
542
+ {
543
+ "claude-3.5-sonnet Scores": {
544
+ "3C3H Scores": {
545
+ "Correctness": 0.2359,
546
+ "Completeness": 0.2058,
547
+ "Conciseness": 0.0581,
548
+ "Helpfulness": 0.1781,
549
+ "Honesty": 0.2106,
550
+ "Harmlessness": 0.2341,
551
+ "3C3H Score": 0.1871
552
+ },
553
+ "Tasks Scores": {
554
+ "Question Answering (QA)": 0.198,
555
+ "Reasoning": 0.2328,
556
+ "Orthographic and Grammatical Analysis": 0.0,
557
+ "Safety": 0.2229
558
+ }
559
+ },
560
+ "Meta": {
561
+ "Model Name": "meta-llama/Meta-Llama-3-8B-Instruct",
562
+ "License": "llama3",
563
+ "Revision": "main",
564
+ "Precision": "bfloat16",
565
+ "Params": 14.963,
566
+ "Total Entries": 279,
567
+ "Successful Entries": 277,
568
+ "Failed Entries": 2,
569
+ "Success Ratio": 0.9928
570
+ }
571
+ },
572
+ {
573
+ "claude-3.5-sonnet Scores": {
574
+ "3C3H Scores": {
575
+ "Correctness": 0.5204,
576
+ "Completeness": 0.1295,
577
+ "Conciseness": 0.4149,
578
+ "Helpfulness": 0.2332,
579
+ "Honesty": 0.4814,
580
+ "Harmlessness": 0.5204,
581
+ "3C3H Score": 0.3833
582
+ },
583
+ "Tasks Scores": {
584
+ "Question Answering (QA)": 0.4053,
585
+ "Reasoning": 0.3806,
586
+ "Orthographic and Grammatical Analysis": 0.0,
587
+ "Safety": 0.8188
588
+ }
589
+ },
590
+ "Meta": {
591
+ "Model Name": "silma-ai/SILMA-9B-Instruct-v1.0",
592
+ "License": "gemma",
593
+ "Revision": "main",
594
+ "Precision": "bfloat16",
595
+ "Params": 9.0,
596
+ "Total Entries": 279,
597
+ "Successful Entries": 278,
598
+ "Failed Entries": 1,
599
+ "Success Ratio": 0.9964
600
+ }
601
+ }
602
+ ]