Update README.md
Browse files
README.md
CHANGED
@@ -1,127 +1,127 @@
|
|
1 |
-
---
|
2 |
-
license: apache-2.0
|
3 |
-
inference: false
|
4 |
-
---
|
5 |
-
|
6 |
-
# Description
|
7 |
-
4 bit quantization of [upstage/SOLAR-10.7B-Instruct-v1.0](https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0) using GPTQ. We use the config below for quantization/evaluation and [HuggingFaceH4/ultrachat_200k](https://huggingface.co/datasets/HuggingFaceH4/ultrachat_200k)as the calibration data. The code is available under [this repository.](https://github.com/IST-DASLab/marlin/tree/2f6d7c10e124b3c5fa29ff8d77d568bd7af3274c/gptq)
|
8 |
-
|
9 |
-
```yaml
|
10 |
-
bits: 4
|
11 |
-
damp_percent: 0.01
|
12 |
-
desc_act: true
|
13 |
-
exllama_config:
|
14 |
-
version: 2
|
15 |
-
group_size: 128
|
16 |
-
quant_method: gptq
|
17 |
-
static_groups: false
|
18 |
-
sym: true
|
19 |
-
true_sequential: true
|
20 |
-
```
|
21 |
-
|
22 |
-
## Evaluations
|
23 |
-
|
24 |
-
Below is a comprehensive evaluation using the awesome [mosaicml/llm-foundry](https://github.com/mosaicml/llm-foundry/tree/main/scripts/eval).
|
25 |
-
|
26 |
-
| model_name | core_average | world_knowledge | commonsense_reasoning | language_understanding | symbolic_problem_solving | reading_comprehension |
|
27 |
-
|:----------------------------------|---------------:|------------------:|------------------------:|-------------------------:|---------------------------:|------------------------:|
|
28 |
-
| upstage/SOLAR-10.7B-Instruct-v1.0 | 0.594131 | 0.602579 | 0.600195 | 0.747605 | 0.406245 | 0.614029 |
|
29 |
-
|
30 |
-
| Category | Benchmark | Subtask | Accuracy | Number few shot |
|
31 |
-
| :----------------------- | :--------------------------- | :---------------------------------- | -------: | :-------------- |
|
32 |
-
| symbolic_problem_solving | gsm8k | | 0.638362 | 0-shot |
|
33 |
-
| commonsense_reasoning | copa | | 0.84 | 0-shot |
|
34 |
-
| commonsense_reasoning | commonsense_qa | | 0.841933 | 0-shot |
|
35 |
-
| commonsense_reasoning | piqa | | 0.818281 | 0-shot |
|
36 |
-
| commonsense_reasoning | bigbench_strange_stories | | 0.793103 | 0-shot |
|
37 |
-
| commonsense_reasoning | bigbench_strategy_qa | | 0.66623 | 0-shot |
|
38 |
-
| language_understanding | lambada_openai | | 0.735882 | 0-shot |
|
39 |
-
| language_understanding | hellaswag | | 0.855208 | 0-shot |
|
40 |
-
| reading_comprehension | coqa | | 0.222723 | 0-shot |
|
41 |
-
| reading_comprehension | boolq | | 0.893884 | 0-shot |
|
42 |
-
| world_knowledge | triviaqa_sm_sub | | 0.628333 | 3-shot |
|
43 |
-
| world_knowledge | jeopardy | Average | 0.500792 | 3-shot |
|
44 |
-
| world_knowledge | | american_history | 0.581114 | 3-shot |
|
45 |
-
| world_knowledge | | literature | 0.655102 | 3-shot |
|
46 |
-
| world_knowledge | | science | 0.371849 | 3-shot |
|
47 |
-
| world_knowledge | | word_origins | 0.271233 | 3-shot |
|
48 |
-
| world_knowledge | | world_history | 0.624665 | 3-shot |
|
49 |
-
| world_knowledge | bigbench_qa_wikidata | | 0.669209 | 3-shot |
|
50 |
-
| world_knowledge | arc_easy | | 0.815657 | 3-shot |
|
51 |
-
| world_knowledge | arc_challenge | | 0.650171 | 3-shot |
|
52 |
-
| commonsense_reasoning | siqa | | 0.881781 | 3-shot |
|
53 |
-
| language_understanding | winograd | | 0.897436 | 3-shot |
|
54 |
-
| symbolic_problem_solving | bigbench_operators | | 0.595238 | 3-shot |
|
55 |
-
| reading_comprehension | squad | | 0.626395 | 3-shot |
|
56 |
-
| symbolic_problem_solving | svamp | | 0.603333 | 5-shot |
|
57 |
-
| world_knowledge | mmlu | Average | 0.647028 | 5-shot |
|
58 |
-
| world_knowledge | | abstract_algebra | 0.29 | 5-shot |
|
59 |
-
| world_knowledge | | anatomy | 0.577778 | 5-shot |
|
60 |
-
| world_knowledge | | astronomy | 0.710526 | 5-shot |
|
61 |
-
| world_knowledge | | business_ethics | 0.73 | 5-shot |
|
62 |
-
| world_knowledge | | clinical_knowledge | 0.701887 | 5-shot |
|
63 |
-
| world_knowledge | | college_biology | 0.729167 | 5-shot |
|
64 |
-
| world_knowledge | | college_chemistry | 0.39 | 5-shot |
|
65 |
-
| world_knowledge | | college_computer_science | 0.5 | 5-shot |
|
66 |
-
| world_knowledge | | college_mathematics | 0.31 | 5-shot |
|
67 |
-
| world_knowledge | | college_medicine | 0.66474 | 5-shot |
|
68 |
-
| world_knowledge | | college_physics | 0.411765 | 5-shot |
|
69 |
-
| world_knowledge | | computer_security | 0.72 | 5-shot |
|
70 |
-
| world_knowledge | | conceptual_physics | 0.582979 | 5-shot |
|
71 |
-
| world_knowledge | | econometrics | 0.473684 | 5-shot |
|
72 |
-
| world_knowledge | | electrical_engineering | 0.565517 | 5-shot |
|
73 |
-
| world_knowledge | | elementary_mathematics | 0.470899 | 5-shot |
|
74 |
-
| world_knowledge | | formal_logic | 0.460317 | 5-shot |
|
75 |
-
| world_knowledge | | global_facts | 0.33 | 5-shot |
|
76 |
-
| world_knowledge | | high_school_biology | 0.770968 | 5-shot |
|
77 |
-
| world_knowledge | | high_school_chemistry | 0.448276 | 5-shot |
|
78 |
-
| world_knowledge | | high_school_computer_science | 0.71 | 5-shot |
|
79 |
-
| world_knowledge | | high_school_european_history | 0.830303 | 5-shot |
|
80 |
-
| world_knowledge | | high_school_geography | 0.848485 | 5-shot |
|
81 |
-
| world_knowledge | | high_school_government_and_politics | 0.896373 | 5-shot |
|
82 |
-
| world_knowledge | | high_school_macroeconomics | 0.646154 | 5-shot |
|
83 |
-
| world_knowledge | | high_school_mathematics | 0.348148 | 5-shot |
|
84 |
-
| world_knowledge | | high_school_microeconomics | 0.722689 | 5-shot |
|
85 |
-
| world_knowledge | | high_school_physics | 0.344371 | 5-shot |
|
86 |
-
| world_knowledge | | high_school_psychology | 0.833028 | 5-shot |
|
87 |
-
| world_knowledge | | high_school_statistics | 0.523148 | 5-shot |
|
88 |
-
| world_knowledge | | high_school_us_history | 0.852941 | 5-shot |
|
89 |
-
| world_knowledge | | high_school_world_history | 0.827004 | 5-shot |
|
90 |
-
| world_knowledge | | human_aging | 0.713004 | 5-shot |
|
91 |
-
| world_knowledge | | human_sexuality | 0.755725 | 5-shot |
|
92 |
-
| world_knowledge | | international_law | 0.768595 | 5-shot |
|
93 |
-
| world_knowledge | | jurisprudence | 0.796296 | 5-shot |
|
94 |
-
| world_knowledge | | logical_fallacies | 0.723926 | 5-shot |
|
95 |
-
| world_knowledge | | machine_learning | 0.508929 | 5-shot |
|
96 |
-
| world_knowledge | | management | 0.825243 | 5-shot |
|
97 |
-
| world_knowledge | | marketing | 0.871795 | 5-shot |
|
98 |
-
| world_knowledge | | medical_genetics | 0.73 | 5-shot |
|
99 |
-
| world_knowledge | | miscellaneous | 0.814815 | 5-shot |
|
100 |
-
| world_knowledge | | moral_disputes | 0.736994 | 5-shot |
|
101 |
-
| world_knowledge | | moral_scenarios | 0.43352 | 5-shot |
|
102 |
-
| world_knowledge | | nutrition | 0.728758 | 5-shot |
|
103 |
-
| world_knowledge | | philosophy | 0.700965 | 5-shot |
|
104 |
-
| world_knowledge | | prehistory | 0.765432 | 5-shot |
|
105 |
-
| world_knowledge | | professional_accounting | 0.507092 | 5-shot |
|
106 |
-
| world_knowledge | | professional_law | 0.487614 | 5-shot |
|
107 |
-
| world_knowledge | | professional_medicine | 0.727941 | 5-shot |
|
108 |
-
| world_knowledge | | professional_psychology | 0.661765 | 5-shot |
|
109 |
-
| world_knowledge | | public_relations | 0.718182 | 5-shot |
|
110 |
-
| world_knowledge | | security_studies | 0.669388 | 5-shot |
|
111 |
-
| world_knowledge | | sociology | 0.81592 | 5-shot |
|
112 |
-
| world_knowledge | | us_foreign_policy | 0.89 | 5-shot |
|
113 |
-
| world_knowledge | | virology | 0.518072 | 5-shot |
|
114 |
-
| world_knowledge | | world_religions | 0.789474 | 5-shot |
|
115 |
-
| symbolic_problem_solving | bigbench_dyck_languages | | 0.458 | 5-shot |
|
116 |
-
| language_understanding | winogrande | | 0.826361 | 5-shot |
|
117 |
-
| symbolic_problem_solving | agi_eval_lsat_ar | | 0.269565 | 5-shot |
|
118 |
-
| symbolic_problem_solving | simple_arithmetic_nospaces | | 0.372 | 5-shot |
|
119 |
-
| symbolic_problem_solving | simple_arithmetic_withspaces | | 0.367 | 5-shot |
|
120 |
-
| reading_comprehension | agi_eval_lsat_rc | | 0.794776 | 5-shot |
|
121 |
-
| reading_comprehension | agi_eval_lsat_lr | | 0.641176 | 5-shot |
|
122 |
-
| reading_comprehension | agi_eval_sat_en | | 0.849515 | 5-shot |
|
123 |
-
| world_knowledge | arc_challenge | | 0.670648 | 25-shot |
|
124 |
-
| commonsense_reasoning | openbook_qa | | 0.56 | 10-shot |
|
125 |
-
| language_understanding | hellaswag | | 0.866461 | 10-shot |
|
126 |
-
| | bigbench_cs_algorithms | | 0.652273 | 10-shot |
|
127 |
| symbolic_problem_solving | bigbench_elementary_math_qa | | 0.392453 | 1-shot |
|
|
|
1 |
+
---
|
2 |
+
license: apache-2.0
|
3 |
+
inference: false
|
4 |
+
---
|
5 |
+
|
6 |
+
# Description
|
7 |
+
4 bit quantization of [upstage/SOLAR-10.7B-Instruct-v1.0](https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0) using GPTQ. We use the config below for quantization/evaluation and [HuggingFaceH4/ultrachat_200k](https://huggingface.co/datasets/HuggingFaceH4/ultrachat_200k) as the calibration data. The code is available under [this repository.](https://github.com/IST-DASLab/marlin/tree/2f6d7c10e124b3c5fa29ff8d77d568bd7af3274c/gptq)
|
8 |
+
|
9 |
+
```yaml
|
10 |
+
bits: 4
|
11 |
+
damp_percent: 0.01
|
12 |
+
desc_act: true
|
13 |
+
exllama_config:
|
14 |
+
version: 2
|
15 |
+
group_size: 128
|
16 |
+
quant_method: gptq
|
17 |
+
static_groups: false
|
18 |
+
sym: true
|
19 |
+
true_sequential: true
|
20 |
+
```
|
21 |
+
|
22 |
+
## Evaluations
|
23 |
+
|
24 |
+
Below is a comprehensive evaluation using the awesome [mosaicml/llm-foundry](https://github.com/mosaicml/llm-foundry/tree/main/scripts/eval).
|
25 |
+
|
26 |
+
| model_name | core_average | world_knowledge | commonsense_reasoning | language_understanding | symbolic_problem_solving | reading_comprehension |
|
27 |
+
|:----------------------------------|---------------:|------------------:|------------------------:|-------------------------:|---------------------------:|------------------------:|
|
28 |
+
| upstage/SOLAR-10.7B-Instruct-v1.0 | 0.594131 | 0.602579 | 0.600195 | 0.747605 | 0.406245 | 0.614029 |
|
29 |
+
|
30 |
+
| Category | Benchmark | Subtask | Accuracy | Number few shot |
|
31 |
+
| :----------------------- | :--------------------------- | :---------------------------------- | -------: | :-------------- |
|
32 |
+
| symbolic_problem_solving | gsm8k | | 0.638362 | 0-shot |
|
33 |
+
| commonsense_reasoning | copa | | 0.84 | 0-shot |
|
34 |
+
| commonsense_reasoning | commonsense_qa | | 0.841933 | 0-shot |
|
35 |
+
| commonsense_reasoning | piqa | | 0.818281 | 0-shot |
|
36 |
+
| commonsense_reasoning | bigbench_strange_stories | | 0.793103 | 0-shot |
|
37 |
+
| commonsense_reasoning | bigbench_strategy_qa | | 0.66623 | 0-shot |
|
38 |
+
| language_understanding | lambada_openai | | 0.735882 | 0-shot |
|
39 |
+
| language_understanding | hellaswag | | 0.855208 | 0-shot |
|
40 |
+
| reading_comprehension | coqa | | 0.222723 | 0-shot |
|
41 |
+
| reading_comprehension | boolq | | 0.893884 | 0-shot |
|
42 |
+
| world_knowledge | triviaqa_sm_sub | | 0.628333 | 3-shot |
|
43 |
+
| world_knowledge | jeopardy | Average | 0.500792 | 3-shot |
|
44 |
+
| world_knowledge | | american_history | 0.581114 | 3-shot |
|
45 |
+
| world_knowledge | | literature | 0.655102 | 3-shot |
|
46 |
+
| world_knowledge | | science | 0.371849 | 3-shot |
|
47 |
+
| world_knowledge | | word_origins | 0.271233 | 3-shot |
|
48 |
+
| world_knowledge | | world_history | 0.624665 | 3-shot |
|
49 |
+
| world_knowledge | bigbench_qa_wikidata | | 0.669209 | 3-shot |
|
50 |
+
| world_knowledge | arc_easy | | 0.815657 | 3-shot |
|
51 |
+
| world_knowledge | arc_challenge | | 0.650171 | 3-shot |
|
52 |
+
| commonsense_reasoning | siqa | | 0.881781 | 3-shot |
|
53 |
+
| language_understanding | winograd | | 0.897436 | 3-shot |
|
54 |
+
| symbolic_problem_solving | bigbench_operators | | 0.595238 | 3-shot |
|
55 |
+
| reading_comprehension | squad | | 0.626395 | 3-shot |
|
56 |
+
| symbolic_problem_solving | svamp | | 0.603333 | 5-shot |
|
57 |
+
| world_knowledge | mmlu | Average | 0.647028 | 5-shot |
|
58 |
+
| world_knowledge | | abstract_algebra | 0.29 | 5-shot |
|
59 |
+
| world_knowledge | | anatomy | 0.577778 | 5-shot |
|
60 |
+
| world_knowledge | | astronomy | 0.710526 | 5-shot |
|
61 |
+
| world_knowledge | | business_ethics | 0.73 | 5-shot |
|
62 |
+
| world_knowledge | | clinical_knowledge | 0.701887 | 5-shot |
|
63 |
+
| world_knowledge | | college_biology | 0.729167 | 5-shot |
|
64 |
+
| world_knowledge | | college_chemistry | 0.39 | 5-shot |
|
65 |
+
| world_knowledge | | college_computer_science | 0.5 | 5-shot |
|
66 |
+
| world_knowledge | | college_mathematics | 0.31 | 5-shot |
|
67 |
+
| world_knowledge | | college_medicine | 0.66474 | 5-shot |
|
68 |
+
| world_knowledge | | college_physics | 0.411765 | 5-shot |
|
69 |
+
| world_knowledge | | computer_security | 0.72 | 5-shot |
|
70 |
+
| world_knowledge | | conceptual_physics | 0.582979 | 5-shot |
|
71 |
+
| world_knowledge | | econometrics | 0.473684 | 5-shot |
|
72 |
+
| world_knowledge | | electrical_engineering | 0.565517 | 5-shot |
|
73 |
+
| world_knowledge | | elementary_mathematics | 0.470899 | 5-shot |
|
74 |
+
| world_knowledge | | formal_logic | 0.460317 | 5-shot |
|
75 |
+
| world_knowledge | | global_facts | 0.33 | 5-shot |
|
76 |
+
| world_knowledge | | high_school_biology | 0.770968 | 5-shot |
|
77 |
+
| world_knowledge | | high_school_chemistry | 0.448276 | 5-shot |
|
78 |
+
| world_knowledge | | high_school_computer_science | 0.71 | 5-shot |
|
79 |
+
| world_knowledge | | high_school_european_history | 0.830303 | 5-shot |
|
80 |
+
| world_knowledge | | high_school_geography | 0.848485 | 5-shot |
|
81 |
+
| world_knowledge | | high_school_government_and_politics | 0.896373 | 5-shot |
|
82 |
+
| world_knowledge | | high_school_macroeconomics | 0.646154 | 5-shot |
|
83 |
+
| world_knowledge | | high_school_mathematics | 0.348148 | 5-shot |
|
84 |
+
| world_knowledge | | high_school_microeconomics | 0.722689 | 5-shot |
|
85 |
+
| world_knowledge | | high_school_physics | 0.344371 | 5-shot |
|
86 |
+
| world_knowledge | | high_school_psychology | 0.833028 | 5-shot |
|
87 |
+
| world_knowledge | | high_school_statistics | 0.523148 | 5-shot |
|
88 |
+
| world_knowledge | | high_school_us_history | 0.852941 | 5-shot |
|
89 |
+
| world_knowledge | | high_school_world_history | 0.827004 | 5-shot |
|
90 |
+
| world_knowledge | | human_aging | 0.713004 | 5-shot |
|
91 |
+
| world_knowledge | | human_sexuality | 0.755725 | 5-shot |
|
92 |
+
| world_knowledge | | international_law | 0.768595 | 5-shot |
|
93 |
+
| world_knowledge | | jurisprudence | 0.796296 | 5-shot |
|
94 |
+
| world_knowledge | | logical_fallacies | 0.723926 | 5-shot |
|
95 |
+
| world_knowledge | | machine_learning | 0.508929 | 5-shot |
|
96 |
+
| world_knowledge | | management | 0.825243 | 5-shot |
|
97 |
+
| world_knowledge | | marketing | 0.871795 | 5-shot |
|
98 |
+
| world_knowledge | | medical_genetics | 0.73 | 5-shot |
|
99 |
+
| world_knowledge | | miscellaneous | 0.814815 | 5-shot |
|
100 |
+
| world_knowledge | | moral_disputes | 0.736994 | 5-shot |
|
101 |
+
| world_knowledge | | moral_scenarios | 0.43352 | 5-shot |
|
102 |
+
| world_knowledge | | nutrition | 0.728758 | 5-shot |
|
103 |
+
| world_knowledge | | philosophy | 0.700965 | 5-shot |
|
104 |
+
| world_knowledge | | prehistory | 0.765432 | 5-shot |
|
105 |
+
| world_knowledge | | professional_accounting | 0.507092 | 5-shot |
|
106 |
+
| world_knowledge | | professional_law | 0.487614 | 5-shot |
|
107 |
+
| world_knowledge | | professional_medicine | 0.727941 | 5-shot |
|
108 |
+
| world_knowledge | | professional_psychology | 0.661765 | 5-shot |
|
109 |
+
| world_knowledge | | public_relations | 0.718182 | 5-shot |
|
110 |
+
| world_knowledge | | security_studies | 0.669388 | 5-shot |
|
111 |
+
| world_knowledge | | sociology | 0.81592 | 5-shot |
|
112 |
+
| world_knowledge | | us_foreign_policy | 0.89 | 5-shot |
|
113 |
+
| world_knowledge | | virology | 0.518072 | 5-shot |
|
114 |
+
| world_knowledge | | world_religions | 0.789474 | 5-shot |
|
115 |
+
| symbolic_problem_solving | bigbench_dyck_languages | | 0.458 | 5-shot |
|
116 |
+
| language_understanding | winogrande | | 0.826361 | 5-shot |
|
117 |
+
| symbolic_problem_solving | agi_eval_lsat_ar | | 0.269565 | 5-shot |
|
118 |
+
| symbolic_problem_solving | simple_arithmetic_nospaces | | 0.372 | 5-shot |
|
119 |
+
| symbolic_problem_solving | simple_arithmetic_withspaces | | 0.367 | 5-shot |
|
120 |
+
| reading_comprehension | agi_eval_lsat_rc | | 0.794776 | 5-shot |
|
121 |
+
| reading_comprehension | agi_eval_lsat_lr | | 0.641176 | 5-shot |
|
122 |
+
| reading_comprehension | agi_eval_sat_en | | 0.849515 | 5-shot |
|
123 |
+
| world_knowledge | arc_challenge | | 0.670648 | 25-shot |
|
124 |
+
| commonsense_reasoning | openbook_qa | | 0.56 | 10-shot |
|
125 |
+
| language_understanding | hellaswag | | 0.866461 | 10-shot |
|
126 |
+
| | bigbench_cs_algorithms | | 0.652273 | 10-shot |
|
127 |
| symbolic_problem_solving | bigbench_elementary_math_qa | | 0.392453 | 1-shot |
|