practicaldreamer commited on
Commit
530a9a0
1 Parent(s): 6a44f40
adapter_config.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name_or_path": "Neko-Institute-of-Science/LLaMA-30B-HF",
3
+ "bias": "none",
4
+ "fan_in_fan_out": false,
5
+ "inference_mode": true,
6
+ "init_lora_weights": true,
7
+ "lora_alpha": 128,
8
+ "lora_dropout": 0.05,
9
+ "modules_to_save": null,
10
+ "peft_type": "LORA",
11
+ "r": 64,
12
+ "target_modules": [
13
+ "q_proj",
14
+ "v_proj"
15
+ ],
16
+ "task_type": "CAUSAL_LM"
17
+ }
adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48eca6e0a8ca2ec993e3bc7396f9fe1c06fa72915006f31e72592cf3b81f16ad
3
+ size 409031373
checkpoint-36/adapter_config.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name_or_path": "Neko-Institute-of-Science/LLaMA-30B-HF",
3
+ "bias": "none",
4
+ "fan_in_fan_out": false,
5
+ "inference_mode": true,
6
+ "init_lora_weights": true,
7
+ "lora_alpha": 128,
8
+ "lora_dropout": 0.05,
9
+ "modules_to_save": null,
10
+ "peft_type": "LORA",
11
+ "r": 64,
12
+ "target_modules": [
13
+ "q_proj",
14
+ "v_proj"
15
+ ],
16
+ "task_type": "CAUSAL_LM"
17
+ }
checkpoint-36/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48eca6e0a8ca2ec993e3bc7396f9fe1c06fa72915006f31e72592cf3b81f16ad
3
+ size 409031373
checkpoint-36/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4e68ef518f7c73a60e165ddc32b1e55569a2678dc88a9eba216124114ffc3f5
3
+ size 205153925
checkpoint-36/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47253551939bd748a3719d1c09bdc491a07c56dbaef6f75e6b7464039329022c
3
+ size 14575
checkpoint-36/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b280ae8d5a93531d2378b82c4312f067d6f81420714361b4c45009a9f2adfca1
3
+ size 627
checkpoint-36/trainer_state.json ADDED
@@ -0,0 +1,304 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.093778133392334,
3
+ "best_model_checkpoint": "output_dir/checkpoint-36",
4
+ "epoch": 1.083725305738476,
5
+ "global_step": 36,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.03,
12
+ "learning_rate": 6.000000000000001e-07,
13
+ "loss": 1.1282,
14
+ "step": 1
15
+ },
16
+ {
17
+ "epoch": 0.06,
18
+ "learning_rate": 1.2000000000000002e-06,
19
+ "loss": 1.1856,
20
+ "step": 2
21
+ },
22
+ {
23
+ "epoch": 0.09,
24
+ "learning_rate": 1.8e-06,
25
+ "loss": 1.1748,
26
+ "step": 3
27
+ },
28
+ {
29
+ "epoch": 0.12,
30
+ "learning_rate": 2.4000000000000003e-06,
31
+ "loss": 1.1748,
32
+ "step": 4
33
+ },
34
+ {
35
+ "epoch": 0.12,
36
+ "eval_loss": 1.1107146739959717,
37
+ "eval_runtime": 9.0527,
38
+ "eval_samples_per_second": 1.215,
39
+ "eval_steps_per_second": 0.221,
40
+ "step": 4
41
+ },
42
+ {
43
+ "epoch": 0.15,
44
+ "learning_rate": 3e-06,
45
+ "loss": 1.1506,
46
+ "step": 5
47
+ },
48
+ {
49
+ "epoch": 0.18,
50
+ "learning_rate": 3.6e-06,
51
+ "loss": 1.1282,
52
+ "step": 6
53
+ },
54
+ {
55
+ "epoch": 0.21,
56
+ "learning_rate": 4.2000000000000004e-06,
57
+ "loss": 1.1833,
58
+ "step": 7
59
+ },
60
+ {
61
+ "epoch": 0.24,
62
+ "learning_rate": 4.800000000000001e-06,
63
+ "loss": 1.1521,
64
+ "step": 8
65
+ },
66
+ {
67
+ "epoch": 0.24,
68
+ "eval_loss": 1.1096446514129639,
69
+ "eval_runtime": 9.0523,
70
+ "eval_samples_per_second": 1.215,
71
+ "eval_steps_per_second": 0.221,
72
+ "step": 8
73
+ },
74
+ {
75
+ "epoch": 0.27,
76
+ "learning_rate": 5.4e-06,
77
+ "loss": 1.1579,
78
+ "step": 9
79
+ },
80
+ {
81
+ "epoch": 0.3,
82
+ "learning_rate": 6e-06,
83
+ "loss": 1.1198,
84
+ "step": 10
85
+ },
86
+ {
87
+ "epoch": 0.33,
88
+ "learning_rate": 6.6e-06,
89
+ "loss": 1.2155,
90
+ "step": 11
91
+ },
92
+ {
93
+ "epoch": 0.36,
94
+ "learning_rate": 7.2e-06,
95
+ "loss": 1.1188,
96
+ "step": 12
97
+ },
98
+ {
99
+ "epoch": 0.36,
100
+ "eval_loss": 1.1087640523910522,
101
+ "eval_runtime": 9.0422,
102
+ "eval_samples_per_second": 1.217,
103
+ "eval_steps_per_second": 0.221,
104
+ "step": 12
105
+ },
106
+ {
107
+ "epoch": 0.39,
108
+ "learning_rate": 7.8e-06,
109
+ "loss": 1.1327,
110
+ "step": 13
111
+ },
112
+ {
113
+ "epoch": 0.42,
114
+ "learning_rate": 8.400000000000001e-06,
115
+ "loss": 1.1311,
116
+ "step": 14
117
+ },
118
+ {
119
+ "epoch": 0.45,
120
+ "learning_rate": 9e-06,
121
+ "loss": 1.1268,
122
+ "step": 15
123
+ },
124
+ {
125
+ "epoch": 0.48,
126
+ "learning_rate": 9.600000000000001e-06,
127
+ "loss": 1.1316,
128
+ "step": 16
129
+ },
130
+ {
131
+ "epoch": 0.48,
132
+ "eval_loss": 1.1073920726776123,
133
+ "eval_runtime": 9.0556,
134
+ "eval_samples_per_second": 1.215,
135
+ "eval_steps_per_second": 0.221,
136
+ "step": 16
137
+ },
138
+ {
139
+ "epoch": 0.51,
140
+ "learning_rate": 1.02e-05,
141
+ "loss": 1.1142,
142
+ "step": 17
143
+ },
144
+ {
145
+ "epoch": 0.54,
146
+ "learning_rate": 1.08e-05,
147
+ "loss": 1.1369,
148
+ "step": 18
149
+ },
150
+ {
151
+ "epoch": 0.57,
152
+ "learning_rate": 1.1400000000000001e-05,
153
+ "loss": 1.139,
154
+ "step": 19
155
+ },
156
+ {
157
+ "epoch": 0.6,
158
+ "learning_rate": 1.2e-05,
159
+ "loss": 1.1231,
160
+ "step": 20
161
+ },
162
+ {
163
+ "epoch": 0.6,
164
+ "eval_loss": 1.1051356792449951,
165
+ "eval_runtime": 9.0525,
166
+ "eval_samples_per_second": 1.215,
167
+ "eval_steps_per_second": 0.221,
168
+ "step": 20
169
+ },
170
+ {
171
+ "epoch": 0.63,
172
+ "learning_rate": 1.26e-05,
173
+ "loss": 1.1243,
174
+ "step": 21
175
+ },
176
+ {
177
+ "epoch": 0.66,
178
+ "learning_rate": 1.32e-05,
179
+ "loss": 1.1161,
180
+ "step": 22
181
+ },
182
+ {
183
+ "epoch": 0.69,
184
+ "learning_rate": 1.3800000000000002e-05,
185
+ "loss": 1.1153,
186
+ "step": 23
187
+ },
188
+ {
189
+ "epoch": 0.72,
190
+ "learning_rate": 1.44e-05,
191
+ "loss": 1.1217,
192
+ "step": 24
193
+ },
194
+ {
195
+ "epoch": 0.72,
196
+ "eval_loss": 1.1019339561462402,
197
+ "eval_runtime": 9.0337,
198
+ "eval_samples_per_second": 1.218,
199
+ "eval_steps_per_second": 0.221,
200
+ "step": 24
201
+ },
202
+ {
203
+ "epoch": 0.75,
204
+ "learning_rate": 1.5e-05,
205
+ "loss": 1.1115,
206
+ "step": 25
207
+ },
208
+ {
209
+ "epoch": 0.78,
210
+ "learning_rate": 1.56e-05,
211
+ "loss": 1.1215,
212
+ "step": 26
213
+ },
214
+ {
215
+ "epoch": 0.81,
216
+ "learning_rate": 1.62e-05,
217
+ "loss": 1.1057,
218
+ "step": 27
219
+ },
220
+ {
221
+ "epoch": 0.84,
222
+ "learning_rate": 1.6800000000000002e-05,
223
+ "loss": 1.1184,
224
+ "step": 28
225
+ },
226
+ {
227
+ "epoch": 0.84,
228
+ "eval_loss": 1.0975638628005981,
229
+ "eval_runtime": 9.0363,
230
+ "eval_samples_per_second": 1.217,
231
+ "eval_steps_per_second": 0.221,
232
+ "step": 28
233
+ },
234
+ {
235
+ "epoch": 0.87,
236
+ "learning_rate": 1.74e-05,
237
+ "loss": 1.117,
238
+ "step": 29
239
+ },
240
+ {
241
+ "epoch": 0.9,
242
+ "learning_rate": 1.8e-05,
243
+ "loss": 1.1143,
244
+ "step": 30
245
+ },
246
+ {
247
+ "epoch": 0.93,
248
+ "learning_rate": 1.86e-05,
249
+ "loss": 1.1195,
250
+ "step": 31
251
+ },
252
+ {
253
+ "epoch": 0.96,
254
+ "learning_rate": 1.9200000000000003e-05,
255
+ "loss": 1.1062,
256
+ "step": 32
257
+ },
258
+ {
259
+ "epoch": 0.96,
260
+ "eval_loss": 1.0937966108322144,
261
+ "eval_runtime": 9.0438,
262
+ "eval_samples_per_second": 1.216,
263
+ "eval_steps_per_second": 0.221,
264
+ "step": 32
265
+ },
266
+ {
267
+ "epoch": 0.99,
268
+ "learning_rate": 1.98e-05,
269
+ "loss": 1.1314,
270
+ "step": 33
271
+ },
272
+ {
273
+ "epoch": 1.02,
274
+ "learning_rate": 2.04e-05,
275
+ "loss": 1.1049,
276
+ "step": 34
277
+ },
278
+ {
279
+ "epoch": 1.05,
280
+ "learning_rate": 2.1e-05,
281
+ "loss": 1.1067,
282
+ "step": 35
283
+ },
284
+ {
285
+ "epoch": 1.08,
286
+ "learning_rate": 2.16e-05,
287
+ "loss": 1.1027,
288
+ "step": 36
289
+ },
290
+ {
291
+ "epoch": 1.08,
292
+ "eval_loss": 1.093778133392334,
293
+ "eval_runtime": 9.0523,
294
+ "eval_samples_per_second": 1.215,
295
+ "eval_steps_per_second": 0.221,
296
+ "step": 36
297
+ }
298
+ ],
299
+ "max_steps": 99,
300
+ "num_train_epochs": 3,
301
+ "total_flos": 1.8344233538578022e+18,
302
+ "trial_name": null,
303
+ "trial_params": null
304
+ }
checkpoint-36/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b21bf743ecaa5f9de425c65a9bb817e2a0607a8a178b081ea0acfe15e633fff
3
+ size 3963
documentation/hyperparameters.yml ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: Neko-Institute-of-Science/LLaMA-30B-HF
2
+ base_model_config: Neko-Institute-of-Science/LLaMA-30B-HF
3
+ model_type: LlamaForCausalLM
4
+ tokenizer_type: LlamaTokenizer
5
+ load_in_8bit: true
6
+ load_4bit:
7
+ datasets:
8
+ - path: practicaldreamer/RPGPT_PublicDomain-ShareGPT
9
+ data_files: RPGPT_PublicDomain_v3-sharegpt.json
10
+ type: sharegpt
11
+ dataset_prepared_path: data/last_run_prepared
12
+ val_set_size: 0.0025
13
+ adapter: lora
14
+ lora_model_dir:
15
+ sequence_len: 2048
16
+ max_packed_sequence_len:
17
+ lora_r: 64
18
+ lora_alpha: 128
19
+ lora_dropout: 0.05
20
+ lora_target_modules:
21
+ - q_proj
22
+ - v_proj
23
+ # - k_proj
24
+ # - o_proj
25
+ lora_fan_in_fan_out: false
26
+ wandb_project:
27
+ wandb_watch:
28
+ wandb_run_id:
29
+ wandb_log_model: checkpoint
30
+ output_dir: output_dir
31
+ batch_size: 128
32
+ micro_batch_size: 4
33
+ eval_batch_size: 1
34
+ num_epochs: 1
35
+ warmup_steps: 50
36
+ logging_steps:
37
+ learning_rate: 0.00003
38
+ optimizer: adamw_bnb_8bit
39
+ torchdistx_path:
40
+ lr_scheduler: cosine
41
+ train_on_inputs: false
42
+ group_by_length: false
43
+ bf16: true
44
+ tf32: true
45
+ gradient_checkpointing: true
46
+ early_stopping_patience: 3
47
+ resume_from_checkpoint:
48
+ auto_resume_from_checkpoints:
49
+ local_rank:
50
+ xformers_attention: true
51
+ flash_attention:
52
+ gptq_groupsize:
53
+ gptq_model_v1:
54
+ save_steps: 4
55
+ debug:
56
+ deepspeed:
57
+ weight_decay: 0.0
58
+ fsdp:
59
+ fsdp_config:
60
+ fsdp_transformer_layer_cls_to_wrap:
61
+ fsdp_min_num_params: 2000
62
+ fsdp_backward_prefetch:
63
+ - backward_pre
64
+ limit_all_gathers: false
65
+ special_tokens:
66
+ pad_token: "[PAD]"
67
+ bos_token: "<s>"
68
+ eos_token: "</s>"
69
+ unk_token: "<unk>"
documentation/preprocessed_sample.txt ADDED
The diff for this file is too large to render. See raw diff
 
documentation/requirements.txt ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate @ git+https://github.com/huggingface/accelerate.git@24ae624d96866e3f993a13fc341ea0dcb68b1470
2
+ aiohttp==3.8.4
3
+ aiosignal==1.3.1
4
+ alpaca-lora-4bit @ git+https://github.com/winglian/alpaca_lora_4bit.git@1b4a376ea816eb2417404b4d1ac27fa16471588a
5
+ appdirs==1.4.4
6
+ async-timeout==4.0.2
7
+ attrdict==2.0.1
8
+ attrs==23.1.0
9
+ -e git+https://github.com/winglian/axolotl@a10a8265efde4ec61037560e3b8e2e31dab984af#egg=axolotl
10
+ bitsandbytes==0.37.2
11
+ black==23.3.0
12
+ certifi==2022.12.7
13
+ charset-normalizer==3.1.0
14
+ click==8.1.3
15
+ cmake==3.26.3
16
+ colorama==0.4.6
17
+ datasets==2.12.0
18
+ deepspeed==0.9.4
19
+ dill==0.3.6
20
+ docker-pycreds==0.4.0
21
+ einops==0.6.1
22
+ filelock==3.12.0
23
+ fire==0.5.0
24
+ flash-attn==1.0.4
25
+ frozenlist==1.3.3
26
+ fsspec==2023.4.0
27
+ gitdb==4.0.10
28
+ GitPython==3.1.31
29
+ hjson==3.1.0
30
+ huggingface-hub==0.14.1
31
+ idna==3.4
32
+ Jinja2==3.1.2
33
+ lit==16.0.2
34
+ MarkupSafe==2.1.2
35
+ mpmath==1.3.0
36
+ multidict==6.0.4
37
+ multiprocess==0.70.14
38
+ mypy-extensions==1.0.0
39
+ networkx==3.1
40
+ ninja==1.11.1
41
+ numpy==1.24.3
42
+ nvidia-cublas-cu11==11.10.3.66
43
+ nvidia-cuda-cupti-cu11==11.7.101
44
+ nvidia-cuda-nvrtc-cu11==11.7.99
45
+ nvidia-cuda-runtime-cu11==11.7.99
46
+ nvidia-cudnn-cu11==8.5.0.96
47
+ nvidia-cufft-cu11==10.9.0.58
48
+ nvidia-curand-cu11==10.2.10.91
49
+ nvidia-cusolver-cu11==11.4.0.1
50
+ nvidia-cusparse-cu11==11.7.4.91
51
+ nvidia-nccl-cu11==2.14.3
52
+ nvidia-nvtx-cu11==11.7.91
53
+ packaging==23.1
54
+ pandas==2.0.1
55
+ pathspec==0.11.1
56
+ pathtools==0.1.2
57
+ peft @ git+https://github.com/huggingface/peft.git@70af02a2bca5a63921790036b2c9430edf4037e2
58
+ platformdirs==3.5.0
59
+ protobuf==4.22.4
60
+ psutil==5.9.5
61
+ py-cpuinfo==9.0.0
62
+ pyarrow==12.0.0
63
+ pydantic==1.10.7
64
+ pyre-extensions==0.0.29
65
+ python-dateutil==2.8.2
66
+ pytz==2023.3
67
+ PyYAML==6.0
68
+ regex==2023.5.5
69
+ requests==2.30.0
70
+ responses==0.18.0
71
+ safetensors==0.3.1
72
+ sentencepiece==0.1.99
73
+ sentry-sdk==1.21.1
74
+ setproctitle==1.3.2
75
+ six==1.16.0
76
+ smmap==5.0.0
77
+ sympy==1.11.1
78
+ termcolor==2.3.0
79
+ tokenizers==0.13.3
80
+ tomli==2.0.1
81
+ torch==2.0.0
82
+ tqdm==4.65.0
83
+ transformers @ git+https://github.com/huggingface/transformers.git@799df10aef3abfe6158c83daf0a9eacf8f6f0a1f
84
+ triton==2.0.0
85
+ typing-inspect==0.8.0
86
+ typing_extensions==4.5.0
87
+ tzdata==2023.3
88
+ urllib3==2.0.2
89
+ wandb==0.15.4
90
+ xformers==0.0.19
91
+ xxhash==3.2.0
92
+ yarl==1.9.2
documentation/wandb.info ADDED
@@ -0,0 +1 @@
 
 
1
+ https://wandb.ai/practicaldreamer/rpgpt/runs/d4gsi8vy