ericpolewski commited on
Commit
b3779cd
1 Parent(s): 09217d4

Upload 8 files

Browse files
README.md ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+
7
+ The following `bitsandbytes` quantization config was used during training:
8
+ - quant_method: gptq
9
+ - bits: 4
10
+ - tokenizer: None
11
+ - dataset: None
12
+ - group_size: -1
13
+ - damp_percent: 0.1
14
+ - desc_act: True
15
+ - sym: True
16
+ - true_sequential: True
17
+ - use_cuda_fp16: False
18
+ - model_seqlen: 4096
19
+ - block_name_to_quantize: model.layers
20
+ - module_name_preceding_first_block: ['model.embed_tokens']
21
+ - batch_size: 1
22
+ - pad_token_id: None
23
+ - disable_exllama: True
24
+ - max_input_length: None
25
+
26
+ The following `bitsandbytes` quantization config was used during training:
27
+ - quant_method: gptq
28
+ - bits: 4
29
+ - tokenizer: None
30
+ - dataset: None
31
+ - group_size: -1
32
+ - damp_percent: 0.1
33
+ - desc_act: True
34
+ - sym: True
35
+ - true_sequential: True
36
+ - use_cuda_fp16: False
37
+ - model_seqlen: 4096
38
+ - block_name_to_quantize: model.layers
39
+ - module_name_preceding_first_block: ['model.embed_tokens']
40
+ - batch_size: 1
41
+ - pad_token_id: None
42
+ - disable_exllama: True
43
+ - max_input_length: None
44
+
45
+ The following `bitsandbytes` quantization config was used during training:
46
+ - quant_method: bitsandbytes
47
+ - load_in_8bit: False
48
+ - load_in_4bit: True
49
+ - llm_int8_threshold: 6.0
50
+ - llm_int8_skip_modules: None
51
+ - llm_int8_enable_fp32_cpu_offload: False
52
+ - llm_int8_has_fp16_weight: False
53
+ - bnb_4bit_quant_type: nf4
54
+ - bnb_4bit_use_double_quant: True
55
+ - bnb_4bit_compute_dtype: float16
56
+ ### Framework versions
57
+
58
+ - PEFT 0.5.0
59
+ - PEFT 0.5.0
60
+
61
+ - PEFT 0.5.0
adapter_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "models\\Yi-34B-GPTQ",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 128,
11
+ "lora_dropout": 0.05,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 64,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "gate_proj",
18
+ "down_proj",
19
+ "up_proj",
20
+ "q_proj",
21
+ "k_proj",
22
+ "v_proj",
23
+ "o_proj"
24
+ ],
25
+ "task_type": "CAUSAL_LM"
26
+ }
adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4154fb4163a1463f8291bce8164cce7f0259559bed8dab9d3f36ea70f0c73c94
3
+ size 1966383405
training_graph.json ADDED
@@ -0,0 +1,260 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "current_steps": 127,
4
+ "loss": 1.7356,
5
+ "learning_rate": 0.0,
6
+ "epoch": 0.01
7
+ },
8
+ {
9
+ "current_steps": 255,
10
+ "loss": 1.7333,
11
+ "learning_rate": 0.0003,
12
+ "epoch": 0.01
13
+ },
14
+ {
15
+ "current_steps": 383,
16
+ "loss": 1.7095,
17
+ "learning_rate": 0.00029932735426008964,
18
+ "epoch": 0.02
19
+ },
20
+ {
21
+ "current_steps": 511,
22
+ "loss": 1.4713,
23
+ "learning_rate": 0.00029865470852017935,
24
+ "epoch": 0.03
25
+ },
26
+ {
27
+ "current_steps": 639,
28
+ "loss": 1.3242,
29
+ "learning_rate": 0.000297982062780269,
30
+ "epoch": 0.03
31
+ },
32
+ {
33
+ "current_steps": 767,
34
+ "loss": 1.3469,
35
+ "learning_rate": 0.00029730941704035873,
36
+ "epoch": 0.04
37
+ },
38
+ {
39
+ "current_steps": 895,
40
+ "loss": 1.2645,
41
+ "learning_rate": 0.0002966367713004484,
42
+ "epoch": 0.05
43
+ },
44
+ {
45
+ "current_steps": 1023,
46
+ "loss": 1.2507,
47
+ "learning_rate": 0.00029596412556053806,
48
+ "epoch": 0.05
49
+ },
50
+ {
51
+ "current_steps": 1151,
52
+ "loss": 1.2348,
53
+ "learning_rate": 0.00029529147982062777,
54
+ "epoch": 0.06
55
+ },
56
+ {
57
+ "current_steps": 1279,
58
+ "loss": 1.3604,
59
+ "learning_rate": 0.00029461883408071743,
60
+ "epoch": 0.07
61
+ },
62
+ {
63
+ "current_steps": 1407,
64
+ "loss": 1.2057,
65
+ "learning_rate": 0.00029394618834080715,
66
+ "epoch": 0.07
67
+ },
68
+ {
69
+ "current_steps": 1535,
70
+ "loss": 1.2899,
71
+ "learning_rate": 0.00029327354260089687,
72
+ "epoch": 0.08
73
+ },
74
+ {
75
+ "current_steps": 1663,
76
+ "loss": 1.2692,
77
+ "learning_rate": 0.00029260089686098653,
78
+ "epoch": 0.09
79
+ },
80
+ {
81
+ "current_steps": 1791,
82
+ "loss": 1.169,
83
+ "learning_rate": 0.0002919282511210762,
84
+ "epoch": 0.09
85
+ },
86
+ {
87
+ "current_steps": 1919,
88
+ "loss": 1.1576,
89
+ "learning_rate": 0.0002912556053811659,
90
+ "epoch": 0.1
91
+ },
92
+ {
93
+ "current_steps": 2047,
94
+ "loss": 1.1784,
95
+ "learning_rate": 0.00029058295964125557,
96
+ "epoch": 0.11
97
+ },
98
+ {
99
+ "current_steps": 2175,
100
+ "loss": 1.1418,
101
+ "learning_rate": 0.0002899103139013453,
102
+ "epoch": 0.11
103
+ },
104
+ {
105
+ "current_steps": 2303,
106
+ "loss": 1.2816,
107
+ "learning_rate": 0.00028923766816143495,
108
+ "epoch": 0.12
109
+ },
110
+ {
111
+ "current_steps": 2431,
112
+ "loss": 1.2302,
113
+ "learning_rate": 0.00028856502242152467,
114
+ "epoch": 0.13
115
+ },
116
+ {
117
+ "current_steps": 2559,
118
+ "loss": 1.1729,
119
+ "learning_rate": 0.00028789237668161433,
120
+ "epoch": 0.13
121
+ },
122
+ {
123
+ "current_steps": 2687,
124
+ "loss": 1.1625,
125
+ "learning_rate": 0.000287219730941704,
126
+ "epoch": 0.14
127
+ },
128
+ {
129
+ "current_steps": 2815,
130
+ "loss": 1.1987,
131
+ "learning_rate": 0.0002865470852017937,
132
+ "epoch": 0.15
133
+ },
134
+ {
135
+ "current_steps": 2943,
136
+ "loss": 1.1673,
137
+ "learning_rate": 0.00028587443946188337,
138
+ "epoch": 0.15
139
+ },
140
+ {
141
+ "current_steps": 3071,
142
+ "loss": 1.2074,
143
+ "learning_rate": 0.0002852017937219731,
144
+ "epoch": 0.16
145
+ },
146
+ {
147
+ "current_steps": 3199,
148
+ "loss": 1.1848,
149
+ "learning_rate": 0.00028452914798206275,
150
+ "epoch": 0.17
151
+ },
152
+ {
153
+ "current_steps": 3327,
154
+ "loss": 1.1209,
155
+ "learning_rate": 0.0002838565022421524,
156
+ "epoch": 0.17
157
+ },
158
+ {
159
+ "current_steps": 3455,
160
+ "loss": 1.091,
161
+ "learning_rate": 0.0002831838565022421,
162
+ "epoch": 0.18
163
+ },
164
+ {
165
+ "current_steps": 3583,
166
+ "loss": 1.1893,
167
+ "learning_rate": 0.00028251121076233184,
168
+ "epoch": 0.19
169
+ },
170
+ {
171
+ "current_steps": 3711,
172
+ "loss": 1.128,
173
+ "learning_rate": 0.0002818385650224215,
174
+ "epoch": 0.19
175
+ },
176
+ {
177
+ "current_steps": 3839,
178
+ "loss": 1.0913,
179
+ "learning_rate": 0.0002811659192825112,
180
+ "epoch": 0.2
181
+ },
182
+ {
183
+ "current_steps": 3967,
184
+ "loss": 1.1659,
185
+ "learning_rate": 0.0002804932735426009,
186
+ "epoch": 0.21
187
+ },
188
+ {
189
+ "current_steps": 4095,
190
+ "loss": 1.1555,
191
+ "learning_rate": 0.00027982062780269054,
192
+ "epoch": 0.21
193
+ },
194
+ {
195
+ "current_steps": 4223,
196
+ "loss": 1.1274,
197
+ "learning_rate": 0.00027914798206278026,
198
+ "epoch": 0.22
199
+ },
200
+ {
201
+ "current_steps": 4351,
202
+ "loss": 1.1193,
203
+ "learning_rate": 0.0002784753363228699,
204
+ "epoch": 0.23
205
+ },
206
+ {
207
+ "current_steps": 4479,
208
+ "loss": 1.1114,
209
+ "learning_rate": 0.00027780269058295964,
210
+ "epoch": 0.23
211
+ },
212
+ {
213
+ "current_steps": 4607,
214
+ "loss": 1.1123,
215
+ "learning_rate": 0.0002771300448430493,
216
+ "epoch": 0.24
217
+ },
218
+ {
219
+ "current_steps": 4735,
220
+ "loss": 1.135,
221
+ "learning_rate": 0.00027645739910313896,
222
+ "epoch": 0.25
223
+ },
224
+ {
225
+ "current_steps": 4863,
226
+ "loss": 1.1067,
227
+ "learning_rate": 0.0002757847533632287,
228
+ "epoch": 0.25
229
+ },
230
+ {
231
+ "current_steps": 4991,
232
+ "loss": 1.0959,
233
+ "learning_rate": 0.00027511210762331834,
234
+ "epoch": 0.26
235
+ },
236
+ {
237
+ "current_steps": 5119,
238
+ "loss": 1.0699,
239
+ "learning_rate": 0.00027443946188340806,
240
+ "epoch": 0.27
241
+ },
242
+ {
243
+ "current_steps": 5247,
244
+ "loss": 1.1366,
245
+ "learning_rate": 0.0002737668161434977,
246
+ "epoch": 0.27
247
+ },
248
+ {
249
+ "current_steps": 5375,
250
+ "loss": 1.1146,
251
+ "learning_rate": 0.0002730941704035874,
252
+ "epoch": 0.28
253
+ },
254
+ {
255
+ "current_steps": 5503,
256
+ "loss": 1.1146,
257
+ "learning_rate": 0.0002730941704035874,
258
+ "epoch": 0.28
259
+ }
260
+ ]
training_graph.png ADDED
training_log.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name": "Yi-34B-GPTQ",
3
+ "base_model_class": "YiForCausalLM",
4
+ "base_loaded_in_4bit": false,
5
+ "base_loaded_in_8bit": false,
6
+ "projections": "gate, down, up, q, k, v, o",
7
+ "loss": 1.1146,
8
+ "learning_rate": 0.0002730941704035874,
9
+ "epoch": 0.28,
10
+ "current_steps": 5503,
11
+ "train_runtime": 18904.2861,
12
+ "train_samples_per_second": 3.03,
13
+ "train_steps_per_second": 0.024,
14
+ "total_flos": 8039607892967424.0,
15
+ "train_loss": 1.2526764671007793
16
+ }
training_parameters.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "lora_name": "11-5-yi34-oo",
3
+ "always_override": true,
4
+ "save_steps": 500.0,
5
+ "micro_batch_size": 1,
6
+ "batch_size": 0,
7
+ "epochs": 3.0,
8
+ "learning_rate": "3e-4",
9
+ "lr_scheduler_type": "linear",
10
+ "lora_rank": 64,
11
+ "lora_alpha": 128,
12
+ "lora_dropout": 0.05,
13
+ "cutoff_len": 256,
14
+ "dataset": "None",
15
+ "eval_dataset": "None",
16
+ "format": "None",
17
+ "eval_steps": 100.0,
18
+ "raw_text_file": "openorca-256",
19
+ "higher_rank_limit": false,
20
+ "warmup_steps": 100.0,
21
+ "optimizer": "adamw_torch",
22
+ "hard_cut_string": "</file>",
23
+ "train_only_after": "",
24
+ "stop_at_loss": 0,
25
+ "add_eos_token": false,
26
+ "min_chars": 0.0,
27
+ "report_to": "None",
28
+ "precize_slicing_overlap": true,
29
+ "add_eos_token_type": "Every Block",
30
+ "save_steps_under_loss": 1.8,
31
+ "add_bos_token": true,
32
+ "training_projection": "all",
33
+ "sliding_window": false,
34
+ "warmup_ratio": 0,
35
+ "grad_accumulation": 128
36
+ }
training_prompt.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "template_type": "raw_text"
3
+ }