|
--- |
|
license: apache-2.0 |
|
library_name: transformers |
|
tags: |
|
- merge |
|
pipeline_tag: text-generation |
|
model-index: |
|
- name: TheTop-5x7B-Instruct-S4-v0.1 |
|
results: |
|
- task: |
|
type: text-generation |
|
name: Text Generation |
|
dataset: |
|
name: AI2 Reasoning Challenge (25-Shot) |
|
type: ai2_arc |
|
config: ARC-Challenge |
|
split: test |
|
args: |
|
num_few_shot: 25 |
|
metrics: |
|
- type: acc_norm |
|
value: 72.18 |
|
name: normalized accuracy |
|
source: |
|
url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=MaziyarPanahi/TheTop-5x7B-Instruct-S4-v0.1 |
|
name: Open LLM Leaderboard |
|
- task: |
|
type: text-generation |
|
name: Text Generation |
|
dataset: |
|
name: HellaSwag (10-Shot) |
|
type: hellaswag |
|
split: validation |
|
args: |
|
num_few_shot: 10 |
|
metrics: |
|
- type: acc_norm |
|
value: 88.29 |
|
name: normalized accuracy |
|
source: |
|
url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=MaziyarPanahi/TheTop-5x7B-Instruct-S4-v0.1 |
|
name: Open LLM Leaderboard |
|
- task: |
|
type: text-generation |
|
name: Text Generation |
|
dataset: |
|
name: MMLU (5-Shot) |
|
type: cais/mmlu |
|
config: all |
|
split: test |
|
args: |
|
num_few_shot: 5 |
|
metrics: |
|
- type: acc |
|
value: 65.03 |
|
name: accuracy |
|
source: |
|
url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=MaziyarPanahi/TheTop-5x7B-Instruct-S4-v0.1 |
|
name: Open LLM Leaderboard |
|
- task: |
|
type: text-generation |
|
name: Text Generation |
|
dataset: |
|
name: TruthfulQA (0-shot) |
|
type: truthful_qa |
|
config: multiple_choice |
|
split: validation |
|
args: |
|
num_few_shot: 0 |
|
metrics: |
|
- type: mc2 |
|
value: 65.56 |
|
source: |
|
url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=MaziyarPanahi/TheTop-5x7B-Instruct-S4-v0.1 |
|
name: Open LLM Leaderboard |
|
- task: |
|
type: text-generation |
|
name: Text Generation |
|
dataset: |
|
name: Winogrande (5-shot) |
|
type: winogrande |
|
config: winogrande_xl |
|
split: validation |
|
args: |
|
num_few_shot: 5 |
|
metrics: |
|
- type: acc |
|
value: 85.16 |
|
name: accuracy |
|
source: |
|
url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=MaziyarPanahi/TheTop-5x7B-Instruct-S4-v0.1 |
|
name: Open LLM Leaderboard |
|
- task: |
|
type: text-generation |
|
name: Text Generation |
|
dataset: |
|
name: GSM8k (5-shot) |
|
type: gsm8k |
|
config: main |
|
split: test |
|
args: |
|
num_few_shot: 5 |
|
metrics: |
|
- type: acc |
|
value: 73.39 |
|
name: accuracy |
|
source: |
|
url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=MaziyarPanahi/TheTop-5x7B-Instruct-S4-v0.1 |
|
name: Open LLM Leaderboard |
|
--- |
|
|
|
Merge of top 7B models and the SLERP of other 7B models |
|
|
|
> mergekit is a toolkit for merging pre-trained language models. mergekit uses an out-of-core approach to perform unreasonably elaborate merges in resource-constrained situations. Merges can be run entirely on CPU or accelerated with as little as 8 GB of VRAM. Many merging algorithms are supported, with more coming as they catch my attention. |
|
> |
|
> ## Eval |
|
|
|
|
|
![image/png](https://cdn-uploads.huggingface.co/production/uploads/5fd5e18a90b6dc4633f6d292/3a2An3rpaLMusQrtQ74Up.png) |
|
|
|
|
|
```python |
|
{ |
|
"all": { |
|
"acc": 0.6568351479800627, |
|
"acc_stderr": 0.03199600851869088, |
|
"acc_norm": 0.6554901222242155, |
|
"acc_norm_stderr": 0.03267670432184765, |
|
"mc1": 0.5104039167686658, |
|
"mc1_stderr": 0.017499711430249268, |
|
"mc2": 0.6556430108444109, |
|
"mc2_stderr": 0.015519025079862213 |
|
}, |
|
"harness|arc:challenge|25": { |
|
"acc": 0.6919795221843004, |
|
"acc_stderr": 0.013491429517292038, |
|
"acc_norm": 0.7218430034129693, |
|
"acc_norm_stderr": 0.013094469919538812 |
|
}, |
|
"harness|hellaswag|10": { |
|
"acc": 0.7202748456482773, |
|
"acc_stderr": 0.0044794676194648, |
|
"acc_norm": 0.8828918542123083, |
|
"acc_norm_stderr": 0.003208919510309931 |
|
}, |
|
"harness|hendrycksTest-abstract_algebra|5": { |
|
"acc": 0.33, |
|
"acc_stderr": 0.047258156262526045, |
|
"acc_norm": 0.33, |
|
"acc_norm_stderr": 0.047258156262526045 |
|
}, |
|
"harness|hendrycksTest-anatomy|5": { |
|
"acc": 0.6518518518518519, |
|
"acc_stderr": 0.041153246103369526, |
|
"acc_norm": 0.6518518518518519, |
|
"acc_norm_stderr": 0.041153246103369526 |
|
}, |
|
"harness|hendrycksTest-astronomy|5": { |
|
"acc": 0.7039473684210527, |
|
"acc_stderr": 0.03715062154998904, |
|
"acc_norm": 0.7039473684210527, |
|
"acc_norm_stderr": 0.03715062154998904 |
|
}, |
|
"harness|hendrycksTest-business_ethics|5": { |
|
"acc": 0.66, |
|
"acc_stderr": 0.04760952285695238, |
|
"acc_norm": 0.66, |
|
"acc_norm_stderr": 0.04760952285695238 |
|
}, |
|
"harness|hendrycksTest-clinical_knowledge|5": { |
|
"acc": 0.6981132075471698, |
|
"acc_stderr": 0.02825420034443866, |
|
"acc_norm": 0.6981132075471698, |
|
"acc_norm_stderr": 0.02825420034443866 |
|
}, |
|
"harness|hendrycksTest-college_biology|5": { |
|
"acc": 0.7708333333333334, |
|
"acc_stderr": 0.03514697467862388, |
|
"acc_norm": 0.7708333333333334, |
|
"acc_norm_stderr": 0.03514697467862388 |
|
}, |
|
"harness|hendrycksTest-college_chemistry|5": { |
|
"acc": 0.48, |
|
"acc_stderr": 0.050211673156867795, |
|
"acc_norm": 0.48, |
|
"acc_norm_stderr": 0.050211673156867795 |
|
}, |
|
"harness|hendrycksTest-college_computer_science|5": { |
|
"acc": 0.52, |
|
"acc_stderr": 0.050211673156867795, |
|
"acc_norm": 0.52, |
|
"acc_norm_stderr": 0.050211673156867795 |
|
}, |
|
"harness|hendrycksTest-college_mathematics|5": { |
|
"acc": 0.27, |
|
"acc_stderr": 0.044619604333847394, |
|
"acc_norm": 0.27, |
|
"acc_norm_stderr": 0.044619604333847394 |
|
}, |
|
"harness|hendrycksTest-college_medicine|5": { |
|
"acc": 0.6705202312138728, |
|
"acc_stderr": 0.03583901754736412, |
|
"acc_norm": 0.6705202312138728, |
|
"acc_norm_stderr": 0.03583901754736412 |
|
}, |
|
"harness|hendrycksTest-college_physics|5": { |
|
"acc": 0.4019607843137255, |
|
"acc_stderr": 0.04878608714466996, |
|
"acc_norm": 0.4019607843137255, |
|
"acc_norm_stderr": 0.04878608714466996 |
|
}, |
|
"harness|hendrycksTest-computer_security|5": { |
|
"acc": 0.75, |
|
"acc_stderr": 0.04351941398892446, |
|
"acc_norm": 0.75, |
|
"acc_norm_stderr": 0.04351941398892446 |
|
}, |
|
"harness|hendrycksTest-conceptual_physics|5": { |
|
"acc": 0.5914893617021276, |
|
"acc_stderr": 0.032134180267015755, |
|
"acc_norm": 0.5914893617021276, |
|
"acc_norm_stderr": 0.032134180267015755 |
|
}, |
|
"harness|hendrycksTest-econometrics|5": { |
|
"acc": 0.5087719298245614, |
|
"acc_stderr": 0.04702880432049615, |
|
"acc_norm": 0.5087719298245614, |
|
"acc_norm_stderr": 0.04702880432049615 |
|
}, |
|
"harness|hendrycksTest-electrical_engineering|5": { |
|
"acc": 0.5724137931034483, |
|
"acc_stderr": 0.04122737111370332, |
|
"acc_norm": 0.5724137931034483, |
|
"acc_norm_stderr": 0.04122737111370332 |
|
}, |
|
"harness|hendrycksTest-elementary_mathematics|5": { |
|
"acc": 0.42592592592592593, |
|
"acc_stderr": 0.02546714904546955, |
|
"acc_norm": 0.42592592592592593, |
|
"acc_norm_stderr": 0.02546714904546955 |
|
}, |
|
"harness|hendrycksTest-formal_logic|5": { |
|
"acc": 0.49206349206349204, |
|
"acc_stderr": 0.044715725362943486, |
|
"acc_norm": 0.49206349206349204, |
|
"acc_norm_stderr": 0.044715725362943486 |
|
}, |
|
"harness|hendrycksTest-global_facts|5": { |
|
"acc": 0.37, |
|
"acc_stderr": 0.04852365870939099, |
|
"acc_norm": 0.37, |
|
"acc_norm_stderr": 0.04852365870939099 |
|
}, |
|
"harness|hendrycksTest-high_school_biology|5": { |
|
"acc": 0.7903225806451613, |
|
"acc_stderr": 0.023157879349083525, |
|
"acc_norm": 0.7903225806451613, |
|
"acc_norm_stderr": 0.023157879349083525 |
|
}, |
|
"harness|hendrycksTest-high_school_chemistry|5": { |
|
"acc": 0.5073891625615764, |
|
"acc_stderr": 0.035176035403610105, |
|
"acc_norm": 0.5073891625615764, |
|
"acc_norm_stderr": 0.035176035403610105 |
|
}, |
|
"harness|hendrycksTest-high_school_computer_science|5": { |
|
"acc": 0.66, |
|
"acc_stderr": 0.04760952285695237, |
|
"acc_norm": 0.66, |
|
"acc_norm_stderr": 0.04760952285695237 |
|
}, |
|
"harness|hendrycksTest-high_school_european_history|5": { |
|
"acc": 0.7757575757575758, |
|
"acc_stderr": 0.03256866661681102, |
|
"acc_norm": 0.7757575757575758, |
|
"acc_norm_stderr": 0.03256866661681102 |
|
}, |
|
"harness|hendrycksTest-high_school_geography|5": { |
|
"acc": 0.7929292929292929, |
|
"acc_stderr": 0.028869778460267045, |
|
"acc_norm": 0.7929292929292929, |
|
"acc_norm_stderr": 0.028869778460267045 |
|
}, |
|
"harness|hendrycksTest-high_school_government_and_politics|5": { |
|
"acc": 0.9067357512953368, |
|
"acc_stderr": 0.020986854593289733, |
|
"acc_norm": 0.9067357512953368, |
|
"acc_norm_stderr": 0.020986854593289733 |
|
}, |
|
"harness|hendrycksTest-high_school_macroeconomics|5": { |
|
"acc": 0.6666666666666666, |
|
"acc_stderr": 0.023901157979402534, |
|
"acc_norm": 0.6666666666666666, |
|
"acc_norm_stderr": 0.023901157979402534 |
|
}, |
|
"harness|hendrycksTest-high_school_mathematics|5": { |
|
"acc": 0.34814814814814815, |
|
"acc_stderr": 0.02904560029061625, |
|
"acc_norm": 0.34814814814814815, |
|
"acc_norm_stderr": 0.02904560029061625 |
|
}, |
|
"harness|hendrycksTest-high_school_microeconomics|5": { |
|
"acc": 0.6764705882352942, |
|
"acc_stderr": 0.030388353551886793, |
|
"acc_norm": 0.6764705882352942, |
|
"acc_norm_stderr": 0.030388353551886793 |
|
}, |
|
"harness|hendrycksTest-high_school_physics|5": { |
|
"acc": 0.36423841059602646, |
|
"acc_stderr": 0.03929111781242742, |
|
"acc_norm": 0.36423841059602646, |
|
"acc_norm_stderr": 0.03929111781242742 |
|
}, |
|
"harness|hendrycksTest-high_school_psychology|5": { |
|
"acc": 0.8366972477064221, |
|
"acc_stderr": 0.01584825580650155, |
|
"acc_norm": 0.8366972477064221, |
|
"acc_norm_stderr": 0.01584825580650155 |
|
}, |
|
"harness|hendrycksTest-high_school_statistics|5": { |
|
"acc": 0.5046296296296297, |
|
"acc_stderr": 0.03409825519163572, |
|
"acc_norm": 0.5046296296296297, |
|
"acc_norm_stderr": 0.03409825519163572 |
|
}, |
|
"harness|hendrycksTest-high_school_us_history|5": { |
|
"acc": 0.8529411764705882, |
|
"acc_stderr": 0.024857478080250447, |
|
"acc_norm": 0.8529411764705882, |
|
"acc_norm_stderr": 0.024857478080250447 |
|
}, |
|
"harness|hendrycksTest-high_school_world_history|5": { |
|
"acc": 0.8143459915611815, |
|
"acc_stderr": 0.025310495376944856, |
|
"acc_norm": 0.8143459915611815, |
|
"acc_norm_stderr": 0.025310495376944856 |
|
}, |
|
"harness|hendrycksTest-human_aging|5": { |
|
"acc": 0.6816143497757847, |
|
"acc_stderr": 0.03126580522513713, |
|
"acc_norm": 0.6816143497757847, |
|
"acc_norm_stderr": 0.03126580522513713 |
|
}, |
|
"harness|hendrycksTest-human_sexuality|5": { |
|
"acc": 0.7862595419847328, |
|
"acc_stderr": 0.0359546161177469, |
|
"acc_norm": 0.7862595419847328, |
|
"acc_norm_stderr": 0.0359546161177469 |
|
}, |
|
"harness|hendrycksTest-international_law|5": { |
|
"acc": 0.7933884297520661, |
|
"acc_stderr": 0.03695980128098824, |
|
"acc_norm": 0.7933884297520661, |
|
"acc_norm_stderr": 0.03695980128098824 |
|
}, |
|
"harness|hendrycksTest-jurisprudence|5": { |
|
"acc": 0.7870370370370371, |
|
"acc_stderr": 0.0395783547198098, |
|
"acc_norm": 0.7870370370370371, |
|
"acc_norm_stderr": 0.0395783547198098 |
|
}, |
|
"harness|hendrycksTest-logical_fallacies|5": { |
|
"acc": 0.7730061349693251, |
|
"acc_stderr": 0.03291099578615769, |
|
"acc_norm": 0.7730061349693251, |
|
"acc_norm_stderr": 0.03291099578615769 |
|
}, |
|
"harness|hendrycksTest-machine_learning|5": { |
|
"acc": 0.48214285714285715, |
|
"acc_stderr": 0.047427623612430116, |
|
"acc_norm": 0.48214285714285715, |
|
"acc_norm_stderr": 0.047427623612430116 |
|
}, |
|
"harness|hendrycksTest-management|5": { |
|
"acc": 0.7864077669902912, |
|
"acc_stderr": 0.040580420156460344, |
|
"acc_norm": 0.7864077669902912, |
|
"acc_norm_stderr": 0.040580420156460344 |
|
}, |
|
"harness|hendrycksTest-marketing|5": { |
|
"acc": 0.8803418803418803, |
|
"acc_stderr": 0.021262719400406974, |
|
"acc_norm": 0.8803418803418803, |
|
"acc_norm_stderr": 0.021262719400406974 |
|
}, |
|
"harness|hendrycksTest-medical_genetics|5": { |
|
"acc": 0.73, |
|
"acc_stderr": 0.0446196043338474, |
|
"acc_norm": 0.73, |
|
"acc_norm_stderr": 0.0446196043338474 |
|
}, |
|
"harness|hendrycksTest-miscellaneous|5": { |
|
"acc": 0.8275862068965517, |
|
"acc_stderr": 0.013507943909371802, |
|
"acc_norm": 0.8275862068965517, |
|
"acc_norm_stderr": 0.013507943909371802 |
|
}, |
|
"harness|hendrycksTest-moral_disputes|5": { |
|
"acc": 0.7543352601156069, |
|
"acc_stderr": 0.023176298203992005, |
|
"acc_norm": 0.7543352601156069, |
|
"acc_norm_stderr": 0.023176298203992005 |
|
}, |
|
"harness|hendrycksTest-moral_scenarios|5": { |
|
"acc": 0.45027932960893857, |
|
"acc_stderr": 0.01663961523684581, |
|
"acc_norm": 0.45027932960893857, |
|
"acc_norm_stderr": 0.01663961523684581 |
|
}, |
|
"harness|hendrycksTest-nutrition|5": { |
|
"acc": 0.7254901960784313, |
|
"acc_stderr": 0.02555316999182652, |
|
"acc_norm": 0.7254901960784313, |
|
"acc_norm_stderr": 0.02555316999182652 |
|
}, |
|
"harness|hendrycksTest-philosophy|5": { |
|
"acc": 0.7138263665594855, |
|
"acc_stderr": 0.025670259242188933, |
|
"acc_norm": 0.7138263665594855, |
|
"acc_norm_stderr": 0.025670259242188933 |
|
}, |
|
"harness|hendrycksTest-prehistory|5": { |
|
"acc": 0.7561728395061729, |
|
"acc_stderr": 0.02389187954195961, |
|
"acc_norm": 0.7561728395061729, |
|
"acc_norm_stderr": 0.02389187954195961 |
|
}, |
|
"harness|hendrycksTest-professional_accounting|5": { |
|
"acc": 0.46808510638297873, |
|
"acc_stderr": 0.029766675075873866, |
|
"acc_norm": 0.46808510638297873, |
|
"acc_norm_stderr": 0.029766675075873866 |
|
}, |
|
"harness|hendrycksTest-professional_law|5": { |
|
"acc": 0.4745762711864407, |
|
"acc_stderr": 0.012753716929101004, |
|
"acc_norm": 0.4745762711864407, |
|
"acc_norm_stderr": 0.012753716929101004 |
|
}, |
|
"harness|hendrycksTest-professional_medicine|5": { |
|
"acc": 0.6911764705882353, |
|
"acc_stderr": 0.02806499816704009, |
|
"acc_norm": 0.6911764705882353, |
|
"acc_norm_stderr": 0.02806499816704009 |
|
}, |
|
"harness|hendrycksTest-professional_psychology|5": { |
|
"acc": 0.6748366013071896, |
|
"acc_stderr": 0.01895088677080631, |
|
"acc_norm": 0.6748366013071896, |
|
"acc_norm_stderr": 0.01895088677080631 |
|
}, |
|
"harness|hendrycksTest-public_relations|5": { |
|
"acc": 0.6545454545454545, |
|
"acc_stderr": 0.04554619617541054, |
|
"acc_norm": 0.6545454545454545, |
|
"acc_norm_stderr": 0.04554619617541054 |
|
}, |
|
"harness|hendrycksTest-security_studies|5": { |
|
"acc": 0.7346938775510204, |
|
"acc_stderr": 0.028263889943784603, |
|
"acc_norm": 0.7346938775510204, |
|
"acc_norm_stderr": 0.028263889943784603 |
|
}, |
|
"harness|hendrycksTest-sociology|5": { |
|
"acc": 0.8258706467661692, |
|
"acc_stderr": 0.026814951200421603, |
|
"acc_norm": 0.8258706467661692, |
|
"acc_norm_stderr": 0.026814951200421603 |
|
}, |
|
"harness|hendrycksTest-us_foreign_policy|5": { |
|
"acc": 0.85, |
|
"acc_stderr": 0.03588702812826371, |
|
"acc_norm": 0.85, |
|
"acc_norm_stderr": 0.03588702812826371 |
|
}, |
|
"harness|hendrycksTest-virology|5": { |
|
"acc": 0.5602409638554217, |
|
"acc_stderr": 0.03864139923699122, |
|
"acc_norm": 0.5602409638554217, |
|
"acc_norm_stderr": 0.03864139923699122 |
|
}, |
|
"harness|hendrycksTest-world_religions|5": { |
|
"acc": 0.8421052631578947, |
|
"acc_stderr": 0.027966785859160893, |
|
"acc_norm": 0.8421052631578947, |
|
"acc_norm_stderr": 0.027966785859160893 |
|
}, |
|
"harness|truthfulqa:mc|0": { |
|
"mc1": 0.5104039167686658, |
|
"mc1_stderr": 0.017499711430249268, |
|
"mc2": 0.6556430108444109, |
|
"mc2_stderr": 0.015519025079862213 |
|
}, |
|
"harness|winogrande|5": { |
|
"acc": 0.8516179952644041, |
|
"acc_stderr": 0.009990706005184136 |
|
}, |
|
"harness|gsm8k|5": { |
|
"acc": 0.7338893100833965, |
|
"acc_stderr": 0.012172750939040328 |
|
} |
|
} |
|
``` |
|
# [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) |
|
Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_MaziyarPanahi__TheTop-5x7B-Instruct-S4-v0.1) |
|
|
|
| Metric |Value| |
|
|---------------------------------|----:| |
|
|Avg. |74.94| |
|
|AI2 Reasoning Challenge (25-Shot)|72.18| |
|
|HellaSwag (10-Shot) |88.29| |
|
|MMLU (5-Shot) |65.03| |
|
|TruthfulQA (0-shot) |65.56| |
|
|Winogrande (5-shot) |85.16| |
|
|GSM8k (5-shot) |73.39| |
|
|
|
|