leaderboard-pr-bot commited on
Commit
225363c
1 Parent(s): ecf4024

Adding Evaluation Results

Browse files

This is an automated PR created with https://huggingface.co/spaces/Weyaxi/open-llm-leaderboard-results-pr

The purpose of this PR is to add evaluation results from the Open LLM Leaderboard to your model card.

If you encounter any issues, please report them to https://huggingface.co/spaces/Weyaxi/open-llm-leaderboard-results-pr/discussions

Files changed (1) hide show
  1. README.md +32 -0
README.md CHANGED
@@ -22,6 +22,9 @@ model-index:
22
  - type: inst_level_strict_acc and prompt_level_strict_acc
23
  value: 78.41
24
  name: strict accuracy
 
 
 
25
  source:
26
  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2
27
  name: Open LLM Leaderboard
@@ -37,6 +40,9 @@ model-index:
37
  - type: acc_norm
38
  value: 33.29
39
  name: normalized accuracy
 
 
 
40
  source:
41
  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2
42
  name: Open LLM Leaderboard
@@ -49,6 +55,9 @@ model-index:
49
  args:
50
  num_few_shot: 4
51
  metrics:
 
 
 
52
  - type: exact_match
53
  value: 0.0
54
  name: exact match
@@ -64,6 +73,9 @@ model-index:
64
  args:
65
  num_few_shot: 0
66
  metrics:
 
 
 
67
  - type: acc_norm
68
  value: 6.49
69
  name: acc_norm
@@ -79,6 +91,9 @@ model-index:
79
  args:
80
  num_few_shot: 0
81
  metrics:
 
 
 
82
  - type: acc_norm
83
  value: 13.96
84
  name: acc_norm
@@ -99,6 +114,9 @@ model-index:
99
  - type: acc
100
  value: 34.76
101
  name: accuracy
 
 
 
102
  source:
103
  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2
104
  name: Open LLM Leaderboard
@@ -345,3 +363,17 @@ Detailed results can be found [here](https://huggingface.co/datasets/open-llm-le
345
  |MuSR (0-shot) |13.96|
346
  |MMLU-PRO (5-shot) |34.76|
347
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  - type: inst_level_strict_acc and prompt_level_strict_acc
23
  value: 78.41
24
  name: strict accuracy
25
+ - type: inst_level_strict_acc and prompt_level_strict_acc
26
+ value: 78.14
27
+ name: strict accuracy
28
  source:
29
  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2
30
  name: Open LLM Leaderboard
 
40
  - type: acc_norm
41
  value: 33.29
42
  name: normalized accuracy
43
+ - type: acc_norm
44
+ value: 33.33
45
+ name: normalized accuracy
46
  source:
47
  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2
48
  name: Open LLM Leaderboard
 
55
  args:
56
  num_few_shot: 4
57
  metrics:
58
+ - type: exact_match
59
+ value: 0.0
60
+ name: exact match
61
  - type: exact_match
62
  value: 0.0
63
  name: exact match
 
73
  args:
74
  num_few_shot: 0
75
  metrics:
76
+ - type: acc_norm
77
+ value: 6.49
78
+ name: acc_norm
79
  - type: acc_norm
80
  value: 6.49
81
  name: acc_norm
 
91
  args:
92
  num_few_shot: 0
93
  metrics:
94
+ - type: acc_norm
95
+ value: 13.96
96
+ name: acc_norm
97
  - type: acc_norm
98
  value: 13.96
99
  name: acc_norm
 
114
  - type: acc
115
  value: 34.76
116
  name: accuracy
117
+ - type: acc
118
+ value: 34.66
119
+ name: accuracy
120
  source:
121
  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2
122
  name: Open LLM Leaderboard
 
363
  |MuSR (0-shot) |13.96|
364
  |MMLU-PRO (5-shot) |34.76|
365
 
366
+
367
+ # [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard)
368
+ Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_Goekdeniz-Guelmez__Josiefied-Qwen2.5-7B-Instruct-abliterated-v2)
369
+
370
+ | Metric |Value|
371
+ |-------------------|----:|
372
+ |Avg. |27.76|
373
+ |IFEval (0-Shot) |78.14|
374
+ |BBH (3-Shot) |33.33|
375
+ |MATH Lvl 5 (4-Shot)| 0.00|
376
+ |GPQA (0-shot) | 6.49|
377
+ |MuSR (0-shot) |13.96|
378
+ |MMLU-PRO (5-shot) |34.66|
379
+