xianchaowu commited on
Commit
52c4994
1 Parent(s): 1ae6855

checkpoint-200 for llama2-13b lazy lora

Browse files
Files changed (2) hide show
  1. README.md +127 -53
  2. adapter_model.bin +1 -1
README.md CHANGED
@@ -8,7 +8,17 @@ license: llama2
8
 
9
  0. using the updated [Meta's LLaMA-2 models](https://huggingface.co/meta-llama/Llama-2-13b-hf).
10
  1. support [4-bit qlora](https://arxiv.org/abs/2305.14314), extreme GPU memory and inference time saving;
11
- 2. better MMLU evaluation dataset results, llama2-13b's 54.8% to our 57.16% (+2.36%).
 
 
 
 
 
 
 
 
 
 
12
  3. This lazy-lora adapter is based on [Meta's LLaMA-2-13b-hf](https://huggingface.co/meta-llama/Llama-2-13b-hf), and using the [oasst1 dataset](https://huggingface.co/datasets/OpenAssistant/oasst1), following [Guanaco](https://huggingface.co/timdettmers/guanaco-65b).
13
 
14
  ### Introduction
@@ -84,67 +94,131 @@ model.print_trainable_parameters()
84
 
85
  ## MMLU result:
86
 
 
87
  ```json
88
- {"mmlu_loss": 1.7513423996918502,
89
- "mmlu_eval_accuracy_high_school_microeconomics": 0.6538461538461539,
90
- "mmlu_eval_accuracy_human_aging": 0.6956521739130435,
91
- "mmlu_eval_accuracy_high_school_biology": 0.5625,
92
- "mmlu_eval_accuracy_machine_learning": 0.45454545454545453,
93
- "mmlu_eval_accuracy_moral_scenarios": 0.31,
94
- "mmlu_eval_accuracy_astronomy": 0.6875,
95
- "mmlu_eval_accuracy_medical_genetics": 0.8181818181818182,
96
  "mmlu_eval_accuracy_high_school_government_and_politics": 0.7142857142857143,
97
- "mmlu_eval_accuracy_anatomy": 0.35714285714285715,
98
- "mmlu_eval_accuracy_prehistory": 0.5714285714285714,
99
- "mmlu_eval_accuracy_high_school_mathematics": 0.20689655172413793,
100
- "mmlu_eval_accuracy_philosophy": 0.6176470588235294,
101
- "mmlu_eval_accuracy_high_school_computer_science": 0.5555555555555556,
102
- "mmlu_eval_accuracy_high_school_statistics": 0.30434782608695654,
103
- "mmlu_eval_accuracy_conceptual_physics": 0.38461538461538464,
104
- "mmlu_eval_accuracy_global_facts": 0.5,
105
- "mmlu_eval_accuracy_high_school_us_history": 0.7727272727272727,
106
- "mmlu_eval_accuracy_professional_accounting": 0.41935483870967744,
107
- "mmlu_eval_accuracy_college_computer_science": 0.5454545454545454,
108
- "mmlu_eval_accuracy_econometrics": 0.4166666666666667,
109
- "mmlu_eval_accuracy_high_school_physics": 0.23529411764705882,
110
- "mmlu_eval_accuracy_public_relations": 0.6666666666666666,
111
  "mmlu_eval_accuracy_us_foreign_policy": 0.9090909090909091,
112
- "mmlu_eval_accuracy_miscellaneous": 0.6744186046511628,
113
- "mmlu_eval_accuracy_college_mathematics": 0.45454545454545453,
114
  "mmlu_eval_accuracy_management": 0.7272727272727273,
115
- "mmlu_eval_accuracy_college_biology": 0.625,
116
- "mmlu_eval_accuracy_high_school_world_history": 0.5384615384615384,
117
- "mmlu_eval_accuracy_electrical_engineering": 0.5,
 
 
118
  "mmlu_eval_accuracy_computer_security": 0.8181818181818182,
119
- "mmlu_eval_accuracy_clinical_knowledge": 0.4482758620689655,
120
- "mmlu_eval_accuracy_professional_psychology": 0.5507246376811594,
121
- "mmlu_eval_accuracy_high_school_geography": 0.8181818181818182,
122
- "mmlu_eval_accuracy_high_school_psychology": 0.8333333333333334,
123
- "mmlu_eval_accuracy_moral_disputes": 0.631578947368421,
124
- "mmlu_eval_accuracy_formal_logic": 0.2857142857142857,
125
- "mmlu_eval_accuracy_international_law": 0.8461538461538461,
126
- "mmlu_eval_accuracy_nutrition": 0.7878787878787878,
127
- "mmlu_eval_accuracy_marketing": 0.88,
128
- "mmlu_eval_accuracy_high_school_chemistry": 0.4090909090909091,
129
  "mmlu_eval_accuracy_college_chemistry": 0.375,
130
- "mmlu_eval_accuracy_professional_medicine": 0.5483870967741935,
131
- "mmlu_eval_accuracy_virology": 0.3888888888888889,
132
- "mmlu_eval_accuracy_logical_fallacies": 0.7222222222222222,
133
- "mmlu_eval_accuracy_high_school_european_history": 0.7777777777777778,
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  "mmlu_eval_accuracy_jurisprudence": 0.6363636363636364,
135
- "mmlu_eval_accuracy_human_sexuality": 0.5833333333333334,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  "mmlu_eval_accuracy_abstract_algebra": 0.2727272727272727,
 
 
137
  "mmlu_eval_accuracy_business_ethics": 0.2727272727272727,
138
- "mmlu_eval_accuracy_security_studies": 0.7037037037037037,
139
- "mmlu_eval_accuracy_professional_law": 0.4,
140
- "mmlu_eval_accuracy_college_medicine": 0.5,
141
- "mmlu_eval_accuracy_elementary_mathematics": 0.34146341463414637,
142
- "mmlu_eval_accuracy_high_school_macroeconomics": 0.5813953488372093,
143
- "mmlu_eval_accuracy_sociology": 0.8636363636363636,
144
- "mmlu_eval_accuracy_world_religions": 0.7894736842105263,
145
- "mmlu_eval_accuracy_college_physics": 0.6363636363636364,
146
- "mmlu_eval_accuracy": 0.5716083571911647,
147
- "epoch": 0.68}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  ```
149
 
150
  ## License and intended use
 
8
 
9
  0. using the updated [Meta's LLaMA-2 models](https://huggingface.co/meta-llama/Llama-2-13b-hf).
10
  1. support [4-bit qlora](https://arxiv.org/abs/2305.14314), extreme GPU memory and inference time saving;
11
+ 2. comparable MMLU evaluation dataset results, llama2-13b:
12
+
13
+ | | eval | test | comp-eval | comp-test |
14
+ |---------------|--------|--------|-----------|-----------|
15
+ |llama2-13b | 56.57% | - | | |
16
+ |ckpt-200 | 56.19% | 54.75% | -0.38% | - |
17
+ |ckpt-800 | 53.16% | 55.06% | -3.41% | - |
18
+
19
+ llama2-13b: "31820c723bc0368c339e067553021dc6a8bf4375"
20
+
21
+
22
  3. This lazy-lora adapter is based on [Meta's LLaMA-2-13b-hf](https://huggingface.co/meta-llama/Llama-2-13b-hf), and using the [oasst1 dataset](https://huggingface.co/datasets/OpenAssistant/oasst1), following [Guanaco](https://huggingface.co/timdettmers/guanaco-65b).
23
 
24
  ### Introduction
 
94
 
95
  ## MMLU result:
96
 
97
+ ### MMLU eval result:
98
  ```json
99
+ {"mmlu_loss": 1.459045737611944,
100
+ "mmlu_eval_accuracy_sociology": 0.8181818181818182,
 
 
 
 
 
 
101
  "mmlu_eval_accuracy_high_school_government_and_politics": 0.7142857142857143,
102
+ "mmlu_eval_accuracy_marketing": 0.88,
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  "mmlu_eval_accuracy_us_foreign_policy": 0.9090909090909091,
104
+ "mmlu_eval_accuracy_high_school_world_history": 0.5,
 
105
  "mmlu_eval_accuracy_management": 0.7272727272727273,
106
+ "mmlu_eval_accuracy_college_mathematics": 0.36363636363636365,
107
+ "mmlu_eval_accuracy_high_school_us_history": 0.8181818181818182,
108
+ "mmlu_eval_accuracy_high_school_computer_science": 0.5555555555555556,
109
+ "mmlu_eval_accuracy_elementary_mathematics": 0.3170731707317073,
110
+ "mmlu_eval_accuracy_high_school_chemistry": 0.36363636363636365,
111
  "mmlu_eval_accuracy_computer_security": 0.8181818181818182,
 
 
 
 
 
 
 
 
 
 
112
  "mmlu_eval_accuracy_college_chemistry": 0.375,
113
+ "mmlu_eval_accuracy_econometrics": 0.4166666666666667,
114
+ "mmlu_eval_accuracy_professional_psychology": 0.5362318840579711,
115
+ "mmlu_eval_accuracy_high_school_statistics": 0.30434782608695654,
116
+ "mmlu_eval_accuracy_high_school_macroeconomics": 0.5581395348837209,
117
+ "mmlu_eval_accuracy_moral_scenarios": 0.36,
118
+ "mmlu_eval_accuracy_moral_disputes": 0.6578947368421053,
119
+ "mmlu_eval_accuracy_astronomy": 0.5625,
120
+ "mmlu_eval_accuracy_professional_accounting": 0.41935483870967744,
121
+ "mmlu_eval_accuracy_human_sexuality": 0.5,
122
+ "mmlu_eval_accuracy_world_religions": 0.7894736842105263,
123
+ "mmlu_eval_accuracy_high_school_psychology": 0.8166666666666667,
124
+ "mmlu_eval_accuracy_nutrition": 0.7575757575757576,
125
+ "mmlu_eval_accuracy_high_school_geography": 0.8181818181818182,
126
+ "mmlu_eval_accuracy_global_facts": 0.4,
127
+ "mmlu_eval_accuracy_high_school_mathematics": 0.20689655172413793,
128
+ "mmlu_eval_accuracy_college_medicine": 0.4090909090909091,
129
+ "mmlu_eval_accuracy_electrical_engineering": 0.5,
130
  "mmlu_eval_accuracy_jurisprudence": 0.6363636363636364,
131
+ "mmlu_eval_accuracy_public_relations": 0.5833333333333334,
132
+ "mmlu_eval_accuracy_virology": 0.4444444444444444,
133
+ "mmlu_eval_accuracy_high_school_physics": 0.29411764705882354,
134
+ "mmlu_eval_accuracy_college_physics": 0.6363636363636364,
135
+ "mmlu_eval_accuracy_miscellaneous": 0.686046511627907,
136
+ "mmlu_eval_accuracy_international_law": 0.8461538461538461,
137
+ "mmlu_eval_accuracy_human_aging": 0.6956521739130435,
138
+ "mmlu_eval_accuracy_conceptual_physics": 0.38461538461538464,
139
+ "mmlu_eval_accuracy_formal_logic": 0.2857142857142857,
140
+ "mmlu_eval_accuracy_medical_genetics": 0.7272727272727273,
141
+ "mmlu_eval_accuracy_philosophy": 0.6470588235294118,
142
+ "mmlu_eval_accuracy_professional_medicine": 0.5806451612903226,
143
+ "mmlu_eval_accuracy_security_studies": 0.6666666666666666,
144
+ "mmlu_eval_accuracy_college_biology": 0.625,
145
+ "mmlu_eval_accuracy_high_school_biology": 0.53125,
146
+ "mmlu_eval_accuracy_machine_learning": 0.45454545454545453,
147
+ "mmlu_eval_accuracy_high_school_microeconomics": 0.7307692307692307,
148
+ "mmlu_eval_accuracy_professional_law": 0.38823529411764707,
149
+ "mmlu_eval_accuracy_logical_fallacies": 0.7777777777777778,
150
+ "mmlu_eval_accuracy_high_school_european_history": 0.7222222222222222,
151
+ "mmlu_eval_accuracy_prehistory": 0.6,
152
  "mmlu_eval_accuracy_abstract_algebra": 0.2727272727272727,
153
+ "mmlu_eval_accuracy_clinical_knowledge": 0.5517241379310345,
154
+ "mmlu_eval_accuracy_anatomy": 0.35714285714285715,
155
  "mmlu_eval_accuracy_business_ethics": 0.2727272727272727,
156
+ "mmlu_eval_accuracy_college_computer_science": 0.45454545454545453,
157
+ "mmlu_eval_accuracy": 0.5618637260755334}
158
+ ```
159
+
160
+ ### MMLU test result:
161
+
162
+ ```json
163
+ {"mmlu_loss": 1.4737504210172938,
164
+ "mmlu_test_accuracy_professional_law": 0.4172099087353325,
165
+ "mmlu_test_accuracy_formal_logic": 0.3412698412698413,
166
+ "mmlu_test_accuracy_anatomy": 0.4962962962962963,
167
+ "mmlu_test_accuracy_professional_medicine": 0.5404411764705882,
168
+ "mmlu_test_accuracy_high_school_mathematics": 0.2851851851851852,
169
+ "mmlu_test_accuracy_high_school_government_and_politics": 0.8134715025906736,
170
+ "mmlu_test_accuracy_electrical_engineering": 0.5448275862068965,
171
+ "mmlu_test_accuracy_conceptual_physics": 0.4595744680851064,
172
+ "mmlu_test_accuracy_jurisprudence": 0.7407407407407407,
173
+ "mmlu_test_accuracy_college_chemistry": 0.4,
174
+ "mmlu_test_accuracy_high_school_computer_science": 0.56,
175
+ "mmlu_test_accuracy_college_computer_science": 0.44,
176
+ "mmlu_test_accuracy_marketing": 0.7863247863247863,
177
+ "mmlu_test_accuracy_high_school_psychology": 0.7376146788990826,
178
+ "mmlu_test_accuracy_professional_psychology": 0.5522875816993464,
179
+ "mmlu_test_accuracy_logical_fallacies": 0.6748466257668712,
180
+ "mmlu_test_accuracy_high_school_microeconomics": 0.5672268907563025,
181
+ "mmlu_test_accuracy_international_law": 0.7107438016528925,
182
+ "mmlu_test_accuracy_high_school_world_history": 0.7088607594936709,
183
+ "mmlu_test_accuracy_medical_genetics": 0.57,
184
+ "mmlu_test_accuracy_professional_accounting": 0.41843971631205673,
185
+ "mmlu_test_accuracy_business_ethics": 0.57,
186
+ "mmlu_test_accuracy_moral_scenarios": 0.293854748603352,
187
+ "mmlu_test_accuracy_abstract_algebra": 0.34,
188
+ "mmlu_test_accuracy_elementary_mathematics": 0.328042328042328,
189
+ "mmlu_test_accuracy_high_school_biology": 0.6580645161290323,
190
+ "mmlu_test_accuracy_public_relations": 0.6090909090909091,
191
+ "mmlu_test_accuracy_high_school_physics": 0.31788079470198677,
192
+ "mmlu_test_accuracy_astronomy": 0.4934210526315789,
193
+ "mmlu_test_accuracy_high_school_us_history": 0.75,
194
+ "mmlu_test_accuracy_college_medicine": 0.5028901734104047,
195
+ "mmlu_test_accuracy_college_mathematics": 0.34,
196
+ "mmlu_test_accuracy_econometrics": 0.2894736842105263,
197
+ "mmlu_test_accuracy_clinical_knowledge": 0.6150943396226415,
198
+ "mmlu_test_accuracy_moral_disputes": 0.630057803468208,
199
+ "mmlu_test_accuracy_machine_learning": 0.26785714285714285,
200
+ "mmlu_test_accuracy_human_sexuality": 0.5801526717557252,
201
+ "mmlu_test_accuracy_virology": 0.4457831325301205,
202
+ "mmlu_test_accuracy_high_school_chemistry": 0.43349753694581283,
203
+ "mmlu_test_accuracy_high_school_geography": 0.6515151515151515,
204
+ "mmlu_test_accuracy_high_school_european_history": 0.6363636363636364,
205
+ "mmlu_test_accuracy_nutrition": 0.5849673202614379,
206
+ "mmlu_test_accuracy_human_aging": 0.6502242152466368,
207
+ "mmlu_test_accuracy_philosophy": 0.639871382636656,
208
+ "mmlu_test_accuracy_global_facts": 0.34,
209
+ "mmlu_test_accuracy_high_school_statistics": 0.4722222222222222,
210
+ "mmlu_test_accuracy_college_biology": 0.5486111111111112,
211
+ "mmlu_test_accuracy_management": 0.6893203883495146,
212
+ "mmlu_test_accuracy_sociology": 0.7213930348258707,
213
+ "mmlu_test_accuracy_security_studies": 0.6326530612244898,
214
+ "mmlu_test_accuracy_college_physics": 0.24509803921568626,
215
+ "mmlu_test_accuracy_high_school_macroeconomics": 0.5,
216
+ "mmlu_test_accuracy_us_foreign_policy": 0.83,
217
+ "mmlu_test_accuracy_miscellaneous": 0.7343550446998723,
218
+ "mmlu_test_accuracy_prehistory": 0.6512345679012346,
219
+ "mmlu_test_accuracy_computer_security": 0.69,
220
+ "mmlu_test_accuracy_world_religions": 0.7602339181286549,
221
+ "mmlu_test_accuracy": 0.5475190434068002}
222
  ```
223
 
224
  ## License and intended use
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:946885901731680cd58d8c7d6b65785bd7c8b8463e7e9078f69daf3b91672099
3
  size 500836813
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:326f478b0100dbf136cd83e9493d97fd1d65c2c7f4fd91bf93040e000ca69308
3
  size 500836813