ayuan0324 commited on
Commit
3866dd5
1 Parent(s): 2222c6a

Upload 43 files

Browse files
Files changed (43) hide show
  1. config.json +24 -0
  2. finetune.py +283 -0
  3. generation_config.json +7 -0
  4. pytorch_model-00001-of-00039.bin +3 -0
  5. pytorch_model-00002-of-00039.bin +3 -0
  6. pytorch_model-00003-of-00039.bin +3 -0
  7. pytorch_model-00004-of-00039.bin +3 -0
  8. pytorch_model-00005-of-00039.bin +3 -0
  9. pytorch_model-00006-of-00039.bin +3 -0
  10. pytorch_model-00007-of-00039.bin +3 -0
  11. pytorch_model-00008-of-00039.bin +3 -0
  12. pytorch_model-00009-of-00039.bin +3 -0
  13. pytorch_model-00010-of-00039.bin +3 -0
  14. pytorch_model-00011-of-00039.bin +3 -0
  15. pytorch_model-00012-of-00039.bin +3 -0
  16. pytorch_model-00013-of-00039.bin +3 -0
  17. pytorch_model-00014-of-00039.bin +3 -0
  18. pytorch_model-00015-of-00039.bin +3 -0
  19. pytorch_model-00016-of-00039.bin +3 -0
  20. pytorch_model-00017-of-00039.bin +3 -0
  21. pytorch_model-00018-of-00039.bin +3 -0
  22. pytorch_model-00019-of-00039.bin +3 -0
  23. pytorch_model-00020-of-00039.bin +3 -0
  24. pytorch_model-00021-of-00039.bin +3 -0
  25. pytorch_model-00022-of-00039.bin +3 -0
  26. pytorch_model-00023-of-00039.bin +3 -0
  27. pytorch_model-00024-of-00039.bin +3 -0
  28. pytorch_model-00025-of-00039.bin +3 -0
  29. pytorch_model-00026-of-00039.bin +3 -0
  30. pytorch_model-00027-of-00039.bin +3 -0
  31. pytorch_model-00028-of-00039.bin +3 -0
  32. pytorch_model-00029-of-00039.bin +3 -0
  33. pytorch_model-00030-of-00039.bin +3 -0
  34. pytorch_model-00031-of-00039.bin +3 -0
  35. pytorch_model-00032-of-00039.bin +3 -0
  36. pytorch_model-00033-of-00039.bin +3 -0
  37. pytorch_model-00034-of-00039.bin +3 -0
  38. pytorch_model-00035-of-00039.bin +3 -0
  39. pytorch_model-00036-of-00039.bin +3 -0
  40. pytorch_model-00037-of-00039.bin +3 -0
  41. pytorch_model-00038-of-00039.bin +3 -0
  42. pytorch_model-00039-of-00039.bin +3 -0
  43. pytorch_model.bin.index.json +330 -0
config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "huggyllama/llama-7b",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "bos_token_id": 1,
7
+ "eos_token_id": 2,
8
+ "hidden_act": "silu",
9
+ "hidden_size": 4096,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 11008,
12
+ "max_position_embeddings": 2048,
13
+ "max_sequence_length": 2048,
14
+ "model_type": "llama",
15
+ "num_attention_heads": 32,
16
+ "num_hidden_layers": 32,
17
+ "pad_token_id": 0,
18
+ "rms_norm_eps": 1e-06,
19
+ "tie_word_embeddings": false,
20
+ "torch_dtype": "float16",
21
+ "transformers_version": "4.28.0",
22
+ "use_cache": true,
23
+ "vocab_size": 32000
24
+ }
finetune.py ADDED
@@ -0,0 +1,283 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ from typing import List
4
+
5
+ import fire
6
+ import torch
7
+ import transformers
8
+ from datasets import load_dataset
9
+
10
+ """
11
+ Unused imports:
12
+ import torch.nn as nn
13
+ import bitsandbytes as bnb
14
+ """
15
+
16
+ from peft import (
17
+ LoraConfig,
18
+ get_peft_model,
19
+ get_peft_model_state_dict,
20
+ prepare_model_for_int8_training,
21
+ set_peft_model_state_dict,
22
+ )
23
+ from transformers import LlamaForCausalLM, LlamaTokenizer
24
+
25
+ from utils.prompter import Prompter
26
+
27
+
28
+ def train(
29
+ # model/data params
30
+ base_model: str = "./hf_ckpt", # the only required argument
31
+ data_path: str = "ayuan0324/ocean_only",
32
+ output_dir: str = "./lora-alpaca",
33
+ # training hyperparams
34
+ batch_size: int = 128,
35
+ micro_batch_size: int = 4,
36
+ num_epochs: int = 3,
37
+ learning_rate: float = 1e-4,
38
+ cutoff_len: int = 512,
39
+ val_set_size: int = 2000,
40
+ # lora hyperparams
41
+ lora_r: int = 8,
42
+ lora_alpha: int = 16,
43
+ lora_dropout: float = 0.05,
44
+ lora_target_modules: List[str] = [
45
+ "q_proj",
46
+ "v_proj",
47
+ ],
48
+ # llm hyperparams
49
+ train_on_inputs: bool = True, # if False, masks out inputs in loss
50
+ add_eos_token: bool = False,
51
+ group_by_length: bool = False, # faster, but produces an odd training loss curve
52
+ # wandb params
53
+ wandb_project: str = "",
54
+ wandb_run_name: str = "",
55
+ wandb_watch: str = "", # options: false | gradients | all
56
+ wandb_log_model: str = "", # options: false | true
57
+ resume_from_checkpoint: str = None, # either training checkpoint or final adapter
58
+ prompt_template_name: str = "alpaca", # The prompt template to use, will default to alpaca.
59
+ ):
60
+ if int(os.environ.get("LOCAL_RANK", 0)) == 0:
61
+ print(
62
+ f"Training Alpaca-LoRA model with params:\n"
63
+ f"base_model: {base_model}\n"
64
+ f"data_path: {data_path}\n"
65
+ f"output_dir: {output_dir}\n"
66
+ f"batch_size: {batch_size}\n"
67
+ f"micro_batch_size: {micro_batch_size}\n"
68
+ f"num_epochs: {num_epochs}\n"
69
+ f"learning_rate: {learning_rate}\n"
70
+ f"cutoff_len: {cutoff_len}\n"
71
+ f"val_set_size: {val_set_size}\n"
72
+ f"lora_r: {lora_r}\n"
73
+ f"lora_alpha: {lora_alpha}\n"
74
+ f"lora_dropout: {lora_dropout}\n"
75
+ f"lora_target_modules: {lora_target_modules}\n"
76
+ f"train_on_inputs: {train_on_inputs}\n"
77
+ f"add_eos_token: {add_eos_token}\n"
78
+ f"group_by_length: {group_by_length}\n"
79
+ f"wandb_project: {wandb_project}\n"
80
+ f"wandb_run_name: {wandb_run_name}\n"
81
+ f"wandb_watch: {wandb_watch}\n"
82
+ f"wandb_log_model: {wandb_log_model}\n"
83
+ f"resume_from_checkpoint: {resume_from_checkpoint or False}\n"
84
+ f"prompt template: {prompt_template_name}\n"
85
+ )
86
+ assert (
87
+ base_model
88
+ ), "Please specify a --base_model, e.g. --base_model='huggyllama/llama-7b'"
89
+ gradient_accumulation_steps = batch_size // micro_batch_size
90
+
91
+ prompter = Prompter(prompt_template_name)
92
+
93
+ device_map = "auto"
94
+ world_size = int(os.environ.get("WORLD_SIZE", 1))
95
+ ddp = world_size != 1
96
+ if ddp:
97
+ device_map = {"": int(os.environ.get("LOCAL_RANK") or 0)}
98
+ gradient_accumulation_steps = gradient_accumulation_steps // world_size
99
+
100
+ # Check if parameter passed or if set within environ
101
+ use_wandb = len(wandb_project) > 0 or (
102
+ "WANDB_PROJECT" in os.environ and len(os.environ["WANDB_PROJECT"]) > 0
103
+ )
104
+ # Only overwrite environ if wandb param passed
105
+ if len(wandb_project) > 0:
106
+ os.environ["WANDB_PROJECT"] = wandb_project
107
+ if len(wandb_watch) > 0:
108
+ os.environ["WANDB_WATCH"] = wandb_watch
109
+ if len(wandb_log_model) > 0:
110
+ os.environ["WANDB_LOG_MODEL"] = wandb_log_model
111
+
112
+ model = LlamaForCausalLM.from_pretrained(
113
+ base_model,
114
+ load_in_8bit=True,
115
+ torch_dtype=torch.float16,
116
+ device_map=device_map,
117
+ )
118
+
119
+ tokenizer = LlamaTokenizer.from_pretrained(base_model)
120
+
121
+ tokenizer.pad_token_id = (
122
+ 0 # unk. we want this to be different from the eos token
123
+ )
124
+ tokenizer.padding_side = "left" # Allow batched inference
125
+
126
+ def tokenize(prompt, add_eos_token=True):
127
+ # there's probably a way to do this with the tokenizer settings
128
+ # but again, gotta move fast
129
+ result = tokenizer(
130
+ prompt,
131
+ truncation=True,
132
+ max_length=cutoff_len,
133
+ padding=False,
134
+ return_tensors=None,
135
+ )
136
+ if (
137
+ result["input_ids"][-1] != tokenizer.eos_token_id
138
+ and len(result["input_ids"]) < cutoff_len
139
+ and add_eos_token
140
+ ):
141
+ result["input_ids"].append(tokenizer.eos_token_id)
142
+ result["attention_mask"].append(1)
143
+
144
+ result["labels"] = result["input_ids"].copy()
145
+
146
+ return result
147
+
148
+ def generate_and_tokenize_prompt(data_point):
149
+ full_prompt = prompter.generate_prompt(
150
+ data_point["instruction"],
151
+ data_point["input"],
152
+ data_point["output"],
153
+ )
154
+ tokenized_full_prompt = tokenize(full_prompt)
155
+ if not train_on_inputs:
156
+ user_prompt = prompter.generate_prompt(
157
+ data_point["instruction"], data_point["input"]
158
+ )
159
+ tokenized_user_prompt = tokenize(
160
+ user_prompt, add_eos_token=add_eos_token
161
+ )
162
+ user_prompt_len = len(tokenized_user_prompt["input_ids"])
163
+
164
+ if add_eos_token:
165
+ user_prompt_len -= 1
166
+
167
+ tokenized_full_prompt["labels"] = [
168
+ -100
169
+ ] * user_prompt_len + tokenized_full_prompt["labels"][
170
+ user_prompt_len:
171
+ ] # could be sped up, probably
172
+ return tokenized_full_prompt
173
+
174
+ model = prepare_model_for_int8_training(model)
175
+
176
+ config = LoraConfig(
177
+ r=lora_r,
178
+ lora_alpha=lora_alpha,
179
+ target_modules=lora_target_modules,
180
+ lora_dropout=lora_dropout,
181
+ bias="none",
182
+ task_type="CAUSAL_LM",
183
+ )
184
+ model = get_peft_model(model, config)
185
+
186
+ if data_path.endswith(".json") or data_path.endswith(".jsonl"):
187
+ data = load_dataset("json", data_files=data_path)
188
+ else:
189
+ data = load_dataset(data_path)
190
+
191
+ if resume_from_checkpoint:
192
+ # Check the available weights and load them
193
+ checkpoint_name = os.path.join(
194
+ resume_from_checkpoint, "pytorch_model.bin"
195
+ ) # Full checkpoint
196
+ if not os.path.exists(checkpoint_name):
197
+ checkpoint_name = os.path.join(
198
+ resume_from_checkpoint, "adapter_model.bin"
199
+ ) # only LoRA model - LoRA config above has to fit
200
+ resume_from_checkpoint = (
201
+ False # So the trainer won't try loading its state
202
+ )
203
+ # The two files above have a different name depending on how they were saved, but are actually the same.
204
+ if os.path.exists(checkpoint_name):
205
+ print(f"Restarting from {checkpoint_name}")
206
+ adapters_weights = torch.load(checkpoint_name)
207
+ set_peft_model_state_dict(model, adapters_weights)
208
+ else:
209
+ print(f"Checkpoint {checkpoint_name} not found")
210
+
211
+ model.print_trainable_parameters() # Be more transparent about the % of trainable params.
212
+
213
+ if val_set_size > 0:
214
+ train_val = data["train"].train_test_split(
215
+ test_size=val_set_size, shuffle=True, seed=42
216
+ )
217
+ train_data = (
218
+ train_val["train"].shuffle().map(generate_and_tokenize_prompt)
219
+ )
220
+ val_data = (
221
+ train_val["test"].shuffle().map(generate_and_tokenize_prompt)
222
+ )
223
+ else:
224
+ train_data = data["train"].shuffle().map(generate_and_tokenize_prompt)
225
+ val_data = None
226
+
227
+ if not ddp and torch.cuda.device_count() > 1:
228
+ # keeps Trainer from trying its own DataParallelism when more than 1 gpu is available
229
+ model.is_parallelizable = True
230
+ model.model_parallel = True
231
+
232
+ trainer = transformers.Trainer(
233
+ model=model,
234
+ train_dataset=train_data,
235
+ eval_dataset=val_data,
236
+ args=transformers.TrainingArguments(
237
+ per_device_train_batch_size=micro_batch_size,
238
+ gradient_accumulation_steps=gradient_accumulation_steps,
239
+ warmup_steps=100,
240
+ num_train_epochs=num_epochs,
241
+ learning_rate=learning_rate,
242
+ fp16=True,
243
+ logging_steps=10,
244
+ optim="adamw_torch",
245
+ evaluation_strategy="steps" if val_set_size > 0 else "no",
246
+ save_strategy="steps",
247
+ eval_steps=200 if val_set_size > 0 else None,
248
+ save_steps=200,
249
+ output_dir=output_dir,
250
+ save_total_limit=3,
251
+ load_best_model_at_end=True if val_set_size > 0 else False,
252
+ ddp_find_unused_parameters=False if ddp else None,
253
+ group_by_length=group_by_length,
254
+ report_to="wandb" if use_wandb else None,
255
+ run_name=wandb_run_name if use_wandb else None,
256
+ ),
257
+ data_collator=transformers.DataCollatorForSeq2Seq(
258
+ tokenizer, pad_to_multiple_of=8, return_tensors="pt", padding=True
259
+ ),
260
+ )
261
+ model.config.use_cache = False
262
+
263
+ old_state_dict = model.state_dict
264
+ model.state_dict = (
265
+ lambda self, *_, **__: get_peft_model_state_dict(
266
+ self, old_state_dict()
267
+ )
268
+ ).__get__(model, type(model))
269
+
270
+ if torch.__version__ >= "2" and sys.platform != "win32":
271
+ model = torch.compile(model)
272
+
273
+ trainer.train(resume_from_checkpoint=resume_from_checkpoint)
274
+
275
+ model.save_pretrained(output_dir)
276
+
277
+ print(
278
+ "\n If there's a warning about missing keys above, please disregard :)"
279
+ )
280
+
281
+
282
+ if __name__ == "__main__":
283
+ fire.Fire(train)
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.28.0"
7
+ }
pytorch_model-00001-of-00039.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90fc3335deafc1d0cd82699db4fafbfa22e5fc5c52696e05ad1930e6446c11fa
3
+ size 396364479
pytorch_model-00002-of-00039.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96ce010ee58182b60924d585564c8b7d8bfb5e84f0f1fafc0176b36b8f49e5c0
3
+ size 371215393
pytorch_model-00003-of-00039.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45e329af0cfdee73b8dbee049a3825a60305360629a80ca20dfd5ffbc1480235
3
+ size 371215986
pytorch_model-00004-of-00039.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce8a1e871b4c94010a63b9deb286e5ff7b02b0b3d83c3221ced303c66bf0d8b6
3
+ size 371215986
pytorch_model-00005-of-00039.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac9fff71fd60f865e2ca9572ea35fc9312c138a751db424a26b4f4f044504d2e
3
+ size 371215986
pytorch_model-00006-of-00039.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc56fa3fcf2556087de8deb1c6ef2f2e9740cc2c5005b8644d7167d4543883dc
3
+ size 314575888
pytorch_model-00007-of-00039.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c7486ef5ab3126ebce1f16eff9e50357409fe0e87a54bcd705bbbd47fbf508f
3
+ size 314592882
pytorch_model-00008-of-00039.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b93fead1f4c2dbd9566dfd20fce2cc19620238a6dbace3d7595e6f61354e0920
3
+ size 314592882
pytorch_model-00009-of-00039.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b25592d4d48d2cc5249b8df5602d95c090cf7269d5b829d330f7efccaaaf33d
3
+ size 371215393
pytorch_model-00010-of-00039.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a5b8de0bb5870624f744740cb6407a461fb46c2171ee68e96de631c43d21712
3
+ size 371215986
pytorch_model-00011-of-00039.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:776350ef96be9db69e0a3965ef154c47c49c812ff782d2d3e869746d3bfae493
3
+ size 371215986
pytorch_model-00012-of-00039.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66e6c471713784f1d67472d3c3e6adf0c4f54b074e787272ab630b07855f8b01
3
+ size 371215986
pytorch_model-00013-of-00039.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73d31049bc510f3a4dd1b99722ce9616083fde95dac580c500bb45f4fab6aeb5
3
+ size 314575888
pytorch_model-00014-of-00039.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2cda02e4999cbb38ed962e2b6c886bc4a9d9e1f02bcc6eb250b46253d52e4179
3
+ size 314592882
pytorch_model-00015-of-00039.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb2f69ed87a5538da3ae130996cdea24fda0d44bf9f156f9aaec26ee8e595ad7
3
+ size 314592882
pytorch_model-00016-of-00039.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9f429e7148088e3300073e08d5c4604d5b1fb08f50979aeaa8a08aadfaada2b
3
+ size 371215393
pytorch_model-00017-of-00039.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c84d5406ba562b112c54ade3f7d23b9dd4f3e9b6b0b14381e91eb5d44d9fa579
3
+ size 371215986
pytorch_model-00018-of-00039.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35e64c30f73093d649aeb9128d6172dec4c5a943bb53bdb0b5cfbd6002b3d368
3
+ size 371215986
pytorch_model-00019-of-00039.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5cce86ebb65f3de5ae89c5ebc240d77edf64aae65db519d46b34a1212e076d9
3
+ size 371215986
pytorch_model-00020-of-00039.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4eb9082ff795c22ecb5f47833525e7a6f9b6cbcaa95d7d6d0c5dad89544aaa61
3
+ size 314575888
pytorch_model-00021-of-00039.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f2dace08ad6da9a5a2a8492e6e43e3534420a296b4d222f003fb3a19b4f601f
3
+ size 314592882
pytorch_model-00022-of-00039.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73716429a131afa2787a0f23d5ca68265279cf407bdd3965da989d55d86cba8e
3
+ size 314592882
pytorch_model-00023-of-00039.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e18ed7f91b6f34e1d17ac67f1deda6893f340c9b8de18d34895f95df39c964ae
3
+ size 371215393
pytorch_model-00024-of-00039.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f7a0695f9766fe5a501238b769c09a7ae65fdf9927c3709a2af258972d90873
3
+ size 371215986
pytorch_model-00025-of-00039.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:724cde5b763d3296daf126f1995fed529291cff8db7b21e19df71ffa7088480f
3
+ size 371215986
pytorch_model-00026-of-00039.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6cb5e18deac94f237ff77d4df32da3d910198a4a5e1e6dade3cd1992abade7d
3
+ size 371215986
pytorch_model-00027-of-00039.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ef4761a5d3fec5ffa3833a222c24f984c0e594e82f9fcb547270e007cc18186
3
+ size 314575888
pytorch_model-00028-of-00039.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45816163d157fd97f91a92ff2d35475a26d054624fce208df76f4143dfbf46a5
3
+ size 314592882
pytorch_model-00029-of-00039.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb6d11c146f4e5d71bd553d86fba93799cf3bd9a4ed7f73b095719b33b16df2c
3
+ size 314592882
pytorch_model-00030-of-00039.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8e7bd467e6f8955591475bc755017a5da85c0f5990dce959921cfb95e0e781b
3
+ size 371215393
pytorch_model-00031-of-00039.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c396a1a4c0c475beced1220877157fe9eaace0f9837ff806cc716032baa41b73
3
+ size 371215986
pytorch_model-00032-of-00039.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79970ec11ba37086fe77fc441c44c5c7355958d720d5264f8c10ca4e31f801e2
3
+ size 371215986
pytorch_model-00033-of-00039.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1774033715bee46e83b1df9cbdf735eb863acbfaa8366506f5bde2284826ad2
3
+ size 371215986
pytorch_model-00034-of-00039.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d79092fd6b36c131b49d1af7c7a11148a3d2e5e751a188008cfa2156f1ad28d
3
+ size 314575888
pytorch_model-00035-of-00039.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce14b273fc3e7b48133a2dc19a67aeb4b5e833ae112ac48d71caae66ab7c34b9
3
+ size 314592882
pytorch_model-00036-of-00039.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10fbea1247f48991cc3e930843a3c568db3cd8416dcb52044c91200da80d123f
3
+ size 314592882
pytorch_model-00037-of-00039.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fc157263f2901696a39292c3bde890cded5696b3becbad0a7b89539bc1c4efc
3
+ size 371215393
pytorch_model-00038-of-00039.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38d4728a76a0b2f6c1db125d74cb8150fb240602588cd1e52d37de63d886808f
3
+ size 304114977
pytorch_model-00039-of-00039.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3350fb71c2bc1e88d77d9685716fd2759e879137814a4120d98265a43f405919
3
+ size 262144938
pytorch_model.bin.index.json ADDED
@@ -0,0 +1,330 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 13476839424
4
+ },
5
+ "weight_map": {
6
+ "lm_head.weight": "pytorch_model-00039-of-00039.bin",
7
+ "model.embed_tokens.weight": "pytorch_model-00001-of-00039.bin",
8
+ "model.layers.0.input_layernorm.weight": "pytorch_model-00002-of-00039.bin",
9
+ "model.layers.0.mlp.down_proj.weight": "pytorch_model-00002-of-00039.bin",
10
+ "model.layers.0.mlp.gate_proj.weight": "pytorch_model-00002-of-00039.bin",
11
+ "model.layers.0.mlp.up_proj.weight": "pytorch_model-00002-of-00039.bin",
12
+ "model.layers.0.post_attention_layernorm.weight": "pytorch_model-00002-of-00039.bin",
13
+ "model.layers.0.self_attn.k_proj.weight": "pytorch_model-00001-of-00039.bin",
14
+ "model.layers.0.self_attn.o_proj.weight": "pytorch_model-00001-of-00039.bin",
15
+ "model.layers.0.self_attn.q_proj.weight": "pytorch_model-00001-of-00039.bin",
16
+ "model.layers.0.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00039.bin",
17
+ "model.layers.0.self_attn.v_proj.weight": "pytorch_model-00001-of-00039.bin",
18
+ "model.layers.1.input_layernorm.weight": "pytorch_model-00003-of-00039.bin",
19
+ "model.layers.1.mlp.down_proj.weight": "pytorch_model-00003-of-00039.bin",
20
+ "model.layers.1.mlp.gate_proj.weight": "pytorch_model-00003-of-00039.bin",
21
+ "model.layers.1.mlp.up_proj.weight": "pytorch_model-00003-of-00039.bin",
22
+ "model.layers.1.post_attention_layernorm.weight": "pytorch_model-00003-of-00039.bin",
23
+ "model.layers.1.self_attn.k_proj.weight": "pytorch_model-00002-of-00039.bin",
24
+ "model.layers.1.self_attn.o_proj.weight": "pytorch_model-00003-of-00039.bin",
25
+ "model.layers.1.self_attn.q_proj.weight": "pytorch_model-00002-of-00039.bin",
26
+ "model.layers.1.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00039.bin",
27
+ "model.layers.1.self_attn.v_proj.weight": "pytorch_model-00002-of-00039.bin",
28
+ "model.layers.10.input_layernorm.weight": "pytorch_model-00014-of-00039.bin",
29
+ "model.layers.10.mlp.down_proj.weight": "pytorch_model-00013-of-00039.bin",
30
+ "model.layers.10.mlp.gate_proj.weight": "pytorch_model-00013-of-00039.bin",
31
+ "model.layers.10.mlp.up_proj.weight": "pytorch_model-00014-of-00039.bin",
32
+ "model.layers.10.post_attention_layernorm.weight": "pytorch_model-00014-of-00039.bin",
33
+ "model.layers.10.self_attn.k_proj.weight": "pytorch_model-00013-of-00039.bin",
34
+ "model.layers.10.self_attn.o_proj.weight": "pytorch_model-00013-of-00039.bin",
35
+ "model.layers.10.self_attn.q_proj.weight": "pytorch_model-00013-of-00039.bin",
36
+ "model.layers.10.self_attn.rotary_emb.inv_freq": "pytorch_model-00013-of-00039.bin",
37
+ "model.layers.10.self_attn.v_proj.weight": "pytorch_model-00013-of-00039.bin",
38
+ "model.layers.11.input_layernorm.weight": "pytorch_model-00015-of-00039.bin",
39
+ "model.layers.11.mlp.down_proj.weight": "pytorch_model-00015-of-00039.bin",
40
+ "model.layers.11.mlp.gate_proj.weight": "pytorch_model-00014-of-00039.bin",
41
+ "model.layers.11.mlp.up_proj.weight": "pytorch_model-00015-of-00039.bin",
42
+ "model.layers.11.post_attention_layernorm.weight": "pytorch_model-00015-of-00039.bin",
43
+ "model.layers.11.self_attn.k_proj.weight": "pytorch_model-00014-of-00039.bin",
44
+ "model.layers.11.self_attn.o_proj.weight": "pytorch_model-00014-of-00039.bin",
45
+ "model.layers.11.self_attn.q_proj.weight": "pytorch_model-00014-of-00039.bin",
46
+ "model.layers.11.self_attn.rotary_emb.inv_freq": "pytorch_model-00014-of-00039.bin",
47
+ "model.layers.11.self_attn.v_proj.weight": "pytorch_model-00014-of-00039.bin",
48
+ "model.layers.12.input_layernorm.weight": "pytorch_model-00016-of-00039.bin",
49
+ "model.layers.12.mlp.down_proj.weight": "pytorch_model-00016-of-00039.bin",
50
+ "model.layers.12.mlp.gate_proj.weight": "pytorch_model-00016-of-00039.bin",
51
+ "model.layers.12.mlp.up_proj.weight": "pytorch_model-00016-of-00039.bin",
52
+ "model.layers.12.post_attention_layernorm.weight": "pytorch_model-00016-of-00039.bin",
53
+ "model.layers.12.self_attn.k_proj.weight": "pytorch_model-00015-of-00039.bin",
54
+ "model.layers.12.self_attn.o_proj.weight": "pytorch_model-00015-of-00039.bin",
55
+ "model.layers.12.self_attn.q_proj.weight": "pytorch_model-00015-of-00039.bin",
56
+ "model.layers.12.self_attn.rotary_emb.inv_freq": "pytorch_model-00015-of-00039.bin",
57
+ "model.layers.12.self_attn.v_proj.weight": "pytorch_model-00015-of-00039.bin",
58
+ "model.layers.13.input_layernorm.weight": "pytorch_model-00017-of-00039.bin",
59
+ "model.layers.13.mlp.down_proj.weight": "pytorch_model-00017-of-00039.bin",
60
+ "model.layers.13.mlp.gate_proj.weight": "pytorch_model-00017-of-00039.bin",
61
+ "model.layers.13.mlp.up_proj.weight": "pytorch_model-00017-of-00039.bin",
62
+ "model.layers.13.post_attention_layernorm.weight": "pytorch_model-00017-of-00039.bin",
63
+ "model.layers.13.self_attn.k_proj.weight": "pytorch_model-00016-of-00039.bin",
64
+ "model.layers.13.self_attn.o_proj.weight": "pytorch_model-00017-of-00039.bin",
65
+ "model.layers.13.self_attn.q_proj.weight": "pytorch_model-00016-of-00039.bin",
66
+ "model.layers.13.self_attn.rotary_emb.inv_freq": "pytorch_model-00017-of-00039.bin",
67
+ "model.layers.13.self_attn.v_proj.weight": "pytorch_model-00016-of-00039.bin",
68
+ "model.layers.14.input_layernorm.weight": "pytorch_model-00018-of-00039.bin",
69
+ "model.layers.14.mlp.down_proj.weight": "pytorch_model-00018-of-00039.bin",
70
+ "model.layers.14.mlp.gate_proj.weight": "pytorch_model-00018-of-00039.bin",
71
+ "model.layers.14.mlp.up_proj.weight": "pytorch_model-00018-of-00039.bin",
72
+ "model.layers.14.post_attention_layernorm.weight": "pytorch_model-00018-of-00039.bin",
73
+ "model.layers.14.self_attn.k_proj.weight": "pytorch_model-00017-of-00039.bin",
74
+ "model.layers.14.self_attn.o_proj.weight": "pytorch_model-00018-of-00039.bin",
75
+ "model.layers.14.self_attn.q_proj.weight": "pytorch_model-00017-of-00039.bin",
76
+ "model.layers.14.self_attn.rotary_emb.inv_freq": "pytorch_model-00018-of-00039.bin",
77
+ "model.layers.14.self_attn.v_proj.weight": "pytorch_model-00018-of-00039.bin",
78
+ "model.layers.15.input_layernorm.weight": "pytorch_model-00019-of-00039.bin",
79
+ "model.layers.15.mlp.down_proj.weight": "pytorch_model-00019-of-00039.bin",
80
+ "model.layers.15.mlp.gate_proj.weight": "pytorch_model-00019-of-00039.bin",
81
+ "model.layers.15.mlp.up_proj.weight": "pytorch_model-00019-of-00039.bin",
82
+ "model.layers.15.post_attention_layernorm.weight": "pytorch_model-00019-of-00039.bin",
83
+ "model.layers.15.self_attn.k_proj.weight": "pytorch_model-00019-of-00039.bin",
84
+ "model.layers.15.self_attn.o_proj.weight": "pytorch_model-00019-of-00039.bin",
85
+ "model.layers.15.self_attn.q_proj.weight": "pytorch_model-00018-of-00039.bin",
86
+ "model.layers.15.self_attn.rotary_emb.inv_freq": "pytorch_model-00019-of-00039.bin",
87
+ "model.layers.15.self_attn.v_proj.weight": "pytorch_model-00019-of-00039.bin",
88
+ "model.layers.16.input_layernorm.weight": "pytorch_model-00021-of-00039.bin",
89
+ "model.layers.16.mlp.down_proj.weight": "pytorch_model-00020-of-00039.bin",
90
+ "model.layers.16.mlp.gate_proj.weight": "pytorch_model-00020-of-00039.bin",
91
+ "model.layers.16.mlp.up_proj.weight": "pytorch_model-00021-of-00039.bin",
92
+ "model.layers.16.post_attention_layernorm.weight": "pytorch_model-00021-of-00039.bin",
93
+ "model.layers.16.self_attn.k_proj.weight": "pytorch_model-00020-of-00039.bin",
94
+ "model.layers.16.self_attn.o_proj.weight": "pytorch_model-00020-of-00039.bin",
95
+ "model.layers.16.self_attn.q_proj.weight": "pytorch_model-00020-of-00039.bin",
96
+ "model.layers.16.self_attn.rotary_emb.inv_freq": "pytorch_model-00020-of-00039.bin",
97
+ "model.layers.16.self_attn.v_proj.weight": "pytorch_model-00020-of-00039.bin",
98
+ "model.layers.17.input_layernorm.weight": "pytorch_model-00022-of-00039.bin",
99
+ "model.layers.17.mlp.down_proj.weight": "pytorch_model-00022-of-00039.bin",
100
+ "model.layers.17.mlp.gate_proj.weight": "pytorch_model-00021-of-00039.bin",
101
+ "model.layers.17.mlp.up_proj.weight": "pytorch_model-00022-of-00039.bin",
102
+ "model.layers.17.post_attention_layernorm.weight": "pytorch_model-00022-of-00039.bin",
103
+ "model.layers.17.self_attn.k_proj.weight": "pytorch_model-00021-of-00039.bin",
104
+ "model.layers.17.self_attn.o_proj.weight": "pytorch_model-00021-of-00039.bin",
105
+ "model.layers.17.self_attn.q_proj.weight": "pytorch_model-00021-of-00039.bin",
106
+ "model.layers.17.self_attn.rotary_emb.inv_freq": "pytorch_model-00021-of-00039.bin",
107
+ "model.layers.17.self_attn.v_proj.weight": "pytorch_model-00021-of-00039.bin",
108
+ "model.layers.18.input_layernorm.weight": "pytorch_model-00023-of-00039.bin",
109
+ "model.layers.18.mlp.down_proj.weight": "pytorch_model-00023-of-00039.bin",
110
+ "model.layers.18.mlp.gate_proj.weight": "pytorch_model-00023-of-00039.bin",
111
+ "model.layers.18.mlp.up_proj.weight": "pytorch_model-00023-of-00039.bin",
112
+ "model.layers.18.post_attention_layernorm.weight": "pytorch_model-00023-of-00039.bin",
113
+ "model.layers.18.self_attn.k_proj.weight": "pytorch_model-00022-of-00039.bin",
114
+ "model.layers.18.self_attn.o_proj.weight": "pytorch_model-00022-of-00039.bin",
115
+ "model.layers.18.self_attn.q_proj.weight": "pytorch_model-00022-of-00039.bin",
116
+ "model.layers.18.self_attn.rotary_emb.inv_freq": "pytorch_model-00022-of-00039.bin",
117
+ "model.layers.18.self_attn.v_proj.weight": "pytorch_model-00022-of-00039.bin",
118
+ "model.layers.19.input_layernorm.weight": "pytorch_model-00024-of-00039.bin",
119
+ "model.layers.19.mlp.down_proj.weight": "pytorch_model-00024-of-00039.bin",
120
+ "model.layers.19.mlp.gate_proj.weight": "pytorch_model-00024-of-00039.bin",
121
+ "model.layers.19.mlp.up_proj.weight": "pytorch_model-00024-of-00039.bin",
122
+ "model.layers.19.post_attention_layernorm.weight": "pytorch_model-00024-of-00039.bin",
123
+ "model.layers.19.self_attn.k_proj.weight": "pytorch_model-00023-of-00039.bin",
124
+ "model.layers.19.self_attn.o_proj.weight": "pytorch_model-00024-of-00039.bin",
125
+ "model.layers.19.self_attn.q_proj.weight": "pytorch_model-00023-of-00039.bin",
126
+ "model.layers.19.self_attn.rotary_emb.inv_freq": "pytorch_model-00024-of-00039.bin",
127
+ "model.layers.19.self_attn.v_proj.weight": "pytorch_model-00023-of-00039.bin",
128
+ "model.layers.2.input_layernorm.weight": "pytorch_model-00004-of-00039.bin",
129
+ "model.layers.2.mlp.down_proj.weight": "pytorch_model-00004-of-00039.bin",
130
+ "model.layers.2.mlp.gate_proj.weight": "pytorch_model-00004-of-00039.bin",
131
+ "model.layers.2.mlp.up_proj.weight": "pytorch_model-00004-of-00039.bin",
132
+ "model.layers.2.post_attention_layernorm.weight": "pytorch_model-00004-of-00039.bin",
133
+ "model.layers.2.self_attn.k_proj.weight": "pytorch_model-00003-of-00039.bin",
134
+ "model.layers.2.self_attn.o_proj.weight": "pytorch_model-00004-of-00039.bin",
135
+ "model.layers.2.self_attn.q_proj.weight": "pytorch_model-00003-of-00039.bin",
136
+ "model.layers.2.self_attn.rotary_emb.inv_freq": "pytorch_model-00004-of-00039.bin",
137
+ "model.layers.2.self_attn.v_proj.weight": "pytorch_model-00004-of-00039.bin",
138
+ "model.layers.20.input_layernorm.weight": "pytorch_model-00025-of-00039.bin",
139
+ "model.layers.20.mlp.down_proj.weight": "pytorch_model-00025-of-00039.bin",
140
+ "model.layers.20.mlp.gate_proj.weight": "pytorch_model-00025-of-00039.bin",
141
+ "model.layers.20.mlp.up_proj.weight": "pytorch_model-00025-of-00039.bin",
142
+ "model.layers.20.post_attention_layernorm.weight": "pytorch_model-00025-of-00039.bin",
143
+ "model.layers.20.self_attn.k_proj.weight": "pytorch_model-00024-of-00039.bin",
144
+ "model.layers.20.self_attn.o_proj.weight": "pytorch_model-00025-of-00039.bin",
145
+ "model.layers.20.self_attn.q_proj.weight": "pytorch_model-00024-of-00039.bin",
146
+ "model.layers.20.self_attn.rotary_emb.inv_freq": "pytorch_model-00025-of-00039.bin",
147
+ "model.layers.20.self_attn.v_proj.weight": "pytorch_model-00025-of-00039.bin",
148
+ "model.layers.21.input_layernorm.weight": "pytorch_model-00026-of-00039.bin",
149
+ "model.layers.21.mlp.down_proj.weight": "pytorch_model-00026-of-00039.bin",
150
+ "model.layers.21.mlp.gate_proj.weight": "pytorch_model-00026-of-00039.bin",
151
+ "model.layers.21.mlp.up_proj.weight": "pytorch_model-00026-of-00039.bin",
152
+ "model.layers.21.post_attention_layernorm.weight": "pytorch_model-00026-of-00039.bin",
153
+ "model.layers.21.self_attn.k_proj.weight": "pytorch_model-00026-of-00039.bin",
154
+ "model.layers.21.self_attn.o_proj.weight": "pytorch_model-00026-of-00039.bin",
155
+ "model.layers.21.self_attn.q_proj.weight": "pytorch_model-00025-of-00039.bin",
156
+ "model.layers.21.self_attn.rotary_emb.inv_freq": "pytorch_model-00026-of-00039.bin",
157
+ "model.layers.21.self_attn.v_proj.weight": "pytorch_model-00026-of-00039.bin",
158
+ "model.layers.22.input_layernorm.weight": "pytorch_model-00028-of-00039.bin",
159
+ "model.layers.22.mlp.down_proj.weight": "pytorch_model-00027-of-00039.bin",
160
+ "model.layers.22.mlp.gate_proj.weight": "pytorch_model-00027-of-00039.bin",
161
+ "model.layers.22.mlp.up_proj.weight": "pytorch_model-00028-of-00039.bin",
162
+ "model.layers.22.post_attention_layernorm.weight": "pytorch_model-00028-of-00039.bin",
163
+ "model.layers.22.self_attn.k_proj.weight": "pytorch_model-00027-of-00039.bin",
164
+ "model.layers.22.self_attn.o_proj.weight": "pytorch_model-00027-of-00039.bin",
165
+ "model.layers.22.self_attn.q_proj.weight": "pytorch_model-00027-of-00039.bin",
166
+ "model.layers.22.self_attn.rotary_emb.inv_freq": "pytorch_model-00027-of-00039.bin",
167
+ "model.layers.22.self_attn.v_proj.weight": "pytorch_model-00027-of-00039.bin",
168
+ "model.layers.23.input_layernorm.weight": "pytorch_model-00029-of-00039.bin",
169
+ "model.layers.23.mlp.down_proj.weight": "pytorch_model-00029-of-00039.bin",
170
+ "model.layers.23.mlp.gate_proj.weight": "pytorch_model-00028-of-00039.bin",
171
+ "model.layers.23.mlp.up_proj.weight": "pytorch_model-00029-of-00039.bin",
172
+ "model.layers.23.post_attention_layernorm.weight": "pytorch_model-00029-of-00039.bin",
173
+ "model.layers.23.self_attn.k_proj.weight": "pytorch_model-00028-of-00039.bin",
174
+ "model.layers.23.self_attn.o_proj.weight": "pytorch_model-00028-of-00039.bin",
175
+ "model.layers.23.self_attn.q_proj.weight": "pytorch_model-00028-of-00039.bin",
176
+ "model.layers.23.self_attn.rotary_emb.inv_freq": "pytorch_model-00028-of-00039.bin",
177
+ "model.layers.23.self_attn.v_proj.weight": "pytorch_model-00028-of-00039.bin",
178
+ "model.layers.24.input_layernorm.weight": "pytorch_model-00030-of-00039.bin",
179
+ "model.layers.24.mlp.down_proj.weight": "pytorch_model-00030-of-00039.bin",
180
+ "model.layers.24.mlp.gate_proj.weight": "pytorch_model-00030-of-00039.bin",
181
+ "model.layers.24.mlp.up_proj.weight": "pytorch_model-00030-of-00039.bin",
182
+ "model.layers.24.post_attention_layernorm.weight": "pytorch_model-00030-of-00039.bin",
183
+ "model.layers.24.self_attn.k_proj.weight": "pytorch_model-00029-of-00039.bin",
184
+ "model.layers.24.self_attn.o_proj.weight": "pytorch_model-00029-of-00039.bin",
185
+ "model.layers.24.self_attn.q_proj.weight": "pytorch_model-00029-of-00039.bin",
186
+ "model.layers.24.self_attn.rotary_emb.inv_freq": "pytorch_model-00029-of-00039.bin",
187
+ "model.layers.24.self_attn.v_proj.weight": "pytorch_model-00029-of-00039.bin",
188
+ "model.layers.25.input_layernorm.weight": "pytorch_model-00031-of-00039.bin",
189
+ "model.layers.25.mlp.down_proj.weight": "pytorch_model-00031-of-00039.bin",
190
+ "model.layers.25.mlp.gate_proj.weight": "pytorch_model-00031-of-00039.bin",
191
+ "model.layers.25.mlp.up_proj.weight": "pytorch_model-00031-of-00039.bin",
192
+ "model.layers.25.post_attention_layernorm.weight": "pytorch_model-00031-of-00039.bin",
193
+ "model.layers.25.self_attn.k_proj.weight": "pytorch_model-00030-of-00039.bin",
194
+ "model.layers.25.self_attn.o_proj.weight": "pytorch_model-00031-of-00039.bin",
195
+ "model.layers.25.self_attn.q_proj.weight": "pytorch_model-00030-of-00039.bin",
196
+ "model.layers.25.self_attn.rotary_emb.inv_freq": "pytorch_model-00031-of-00039.bin",
197
+ "model.layers.25.self_attn.v_proj.weight": "pytorch_model-00030-of-00039.bin",
198
+ "model.layers.26.input_layernorm.weight": "pytorch_model-00032-of-00039.bin",
199
+ "model.layers.26.mlp.down_proj.weight": "pytorch_model-00032-of-00039.bin",
200
+ "model.layers.26.mlp.gate_proj.weight": "pytorch_model-00032-of-00039.bin",
201
+ "model.layers.26.mlp.up_proj.weight": "pytorch_model-00032-of-00039.bin",
202
+ "model.layers.26.post_attention_layernorm.weight": "pytorch_model-00032-of-00039.bin",
203
+ "model.layers.26.self_attn.k_proj.weight": "pytorch_model-00031-of-00039.bin",
204
+ "model.layers.26.self_attn.o_proj.weight": "pytorch_model-00032-of-00039.bin",
205
+ "model.layers.26.self_attn.q_proj.weight": "pytorch_model-00031-of-00039.bin",
206
+ "model.layers.26.self_attn.rotary_emb.inv_freq": "pytorch_model-00032-of-00039.bin",
207
+ "model.layers.26.self_attn.v_proj.weight": "pytorch_model-00032-of-00039.bin",
208
+ "model.layers.27.input_layernorm.weight": "pytorch_model-00033-of-00039.bin",
209
+ "model.layers.27.mlp.down_proj.weight": "pytorch_model-00033-of-00039.bin",
210
+ "model.layers.27.mlp.gate_proj.weight": "pytorch_model-00033-of-00039.bin",
211
+ "model.layers.27.mlp.up_proj.weight": "pytorch_model-00033-of-00039.bin",
212
+ "model.layers.27.post_attention_layernorm.weight": "pytorch_model-00033-of-00039.bin",
213
+ "model.layers.27.self_attn.k_proj.weight": "pytorch_model-00033-of-00039.bin",
214
+ "model.layers.27.self_attn.o_proj.weight": "pytorch_model-00033-of-00039.bin",
215
+ "model.layers.27.self_attn.q_proj.weight": "pytorch_model-00032-of-00039.bin",
216
+ "model.layers.27.self_attn.rotary_emb.inv_freq": "pytorch_model-00033-of-00039.bin",
217
+ "model.layers.27.self_attn.v_proj.weight": "pytorch_model-00033-of-00039.bin",
218
+ "model.layers.28.input_layernorm.weight": "pytorch_model-00035-of-00039.bin",
219
+ "model.layers.28.mlp.down_proj.weight": "pytorch_model-00034-of-00039.bin",
220
+ "model.layers.28.mlp.gate_proj.weight": "pytorch_model-00034-of-00039.bin",
221
+ "model.layers.28.mlp.up_proj.weight": "pytorch_model-00035-of-00039.bin",
222
+ "model.layers.28.post_attention_layernorm.weight": "pytorch_model-00035-of-00039.bin",
223
+ "model.layers.28.self_attn.k_proj.weight": "pytorch_model-00034-of-00039.bin",
224
+ "model.layers.28.self_attn.o_proj.weight": "pytorch_model-00034-of-00039.bin",
225
+ "model.layers.28.self_attn.q_proj.weight": "pytorch_model-00034-of-00039.bin",
226
+ "model.layers.28.self_attn.rotary_emb.inv_freq": "pytorch_model-00034-of-00039.bin",
227
+ "model.layers.28.self_attn.v_proj.weight": "pytorch_model-00034-of-00039.bin",
228
+ "model.layers.29.input_layernorm.weight": "pytorch_model-00036-of-00039.bin",
229
+ "model.layers.29.mlp.down_proj.weight": "pytorch_model-00036-of-00039.bin",
230
+ "model.layers.29.mlp.gate_proj.weight": "pytorch_model-00035-of-00039.bin",
231
+ "model.layers.29.mlp.up_proj.weight": "pytorch_model-00036-of-00039.bin",
232
+ "model.layers.29.post_attention_layernorm.weight": "pytorch_model-00036-of-00039.bin",
233
+ "model.layers.29.self_attn.k_proj.weight": "pytorch_model-00035-of-00039.bin",
234
+ "model.layers.29.self_attn.o_proj.weight": "pytorch_model-00035-of-00039.bin",
235
+ "model.layers.29.self_attn.q_proj.weight": "pytorch_model-00035-of-00039.bin",
236
+ "model.layers.29.self_attn.rotary_emb.inv_freq": "pytorch_model-00035-of-00039.bin",
237
+ "model.layers.29.self_attn.v_proj.weight": "pytorch_model-00035-of-00039.bin",
238
+ "model.layers.3.input_layernorm.weight": "pytorch_model-00005-of-00039.bin",
239
+ "model.layers.3.mlp.down_proj.weight": "pytorch_model-00005-of-00039.bin",
240
+ "model.layers.3.mlp.gate_proj.weight": "pytorch_model-00005-of-00039.bin",
241
+ "model.layers.3.mlp.up_proj.weight": "pytorch_model-00005-of-00039.bin",
242
+ "model.layers.3.post_attention_layernorm.weight": "pytorch_model-00005-of-00039.bin",
243
+ "model.layers.3.self_attn.k_proj.weight": "pytorch_model-00005-of-00039.bin",
244
+ "model.layers.3.self_attn.o_proj.weight": "pytorch_model-00005-of-00039.bin",
245
+ "model.layers.3.self_attn.q_proj.weight": "pytorch_model-00004-of-00039.bin",
246
+ "model.layers.3.self_attn.rotary_emb.inv_freq": "pytorch_model-00005-of-00039.bin",
247
+ "model.layers.3.self_attn.v_proj.weight": "pytorch_model-00005-of-00039.bin",
248
+ "model.layers.30.input_layernorm.weight": "pytorch_model-00037-of-00039.bin",
249
+ "model.layers.30.mlp.down_proj.weight": "pytorch_model-00037-of-00039.bin",
250
+ "model.layers.30.mlp.gate_proj.weight": "pytorch_model-00037-of-00039.bin",
251
+ "model.layers.30.mlp.up_proj.weight": "pytorch_model-00037-of-00039.bin",
252
+ "model.layers.30.post_attention_layernorm.weight": "pytorch_model-00037-of-00039.bin",
253
+ "model.layers.30.self_attn.k_proj.weight": "pytorch_model-00036-of-00039.bin",
254
+ "model.layers.30.self_attn.o_proj.weight": "pytorch_model-00036-of-00039.bin",
255
+ "model.layers.30.self_attn.q_proj.weight": "pytorch_model-00036-of-00039.bin",
256
+ "model.layers.30.self_attn.rotary_emb.inv_freq": "pytorch_model-00036-of-00039.bin",
257
+ "model.layers.30.self_attn.v_proj.weight": "pytorch_model-00036-of-00039.bin",
258
+ "model.layers.31.input_layernorm.weight": "pytorch_model-00038-of-00039.bin",
259
+ "model.layers.31.mlp.down_proj.weight": "pytorch_model-00038-of-00039.bin",
260
+ "model.layers.31.mlp.gate_proj.weight": "pytorch_model-00038-of-00039.bin",
261
+ "model.layers.31.mlp.up_proj.weight": "pytorch_model-00038-of-00039.bin",
262
+ "model.layers.31.post_attention_layernorm.weight": "pytorch_model-00038-of-00039.bin",
263
+ "model.layers.31.self_attn.k_proj.weight": "pytorch_model-00037-of-00039.bin",
264
+ "model.layers.31.self_attn.o_proj.weight": "pytorch_model-00038-of-00039.bin",
265
+ "model.layers.31.self_attn.q_proj.weight": "pytorch_model-00037-of-00039.bin",
266
+ "model.layers.31.self_attn.rotary_emb.inv_freq": "pytorch_model-00038-of-00039.bin",
267
+ "model.layers.31.self_attn.v_proj.weight": "pytorch_model-00037-of-00039.bin",
268
+ "model.layers.4.input_layernorm.weight": "pytorch_model-00007-of-00039.bin",
269
+ "model.layers.4.mlp.down_proj.weight": "pytorch_model-00006-of-00039.bin",
270
+ "model.layers.4.mlp.gate_proj.weight": "pytorch_model-00006-of-00039.bin",
271
+ "model.layers.4.mlp.up_proj.weight": "pytorch_model-00007-of-00039.bin",
272
+ "model.layers.4.post_attention_layernorm.weight": "pytorch_model-00007-of-00039.bin",
273
+ "model.layers.4.self_attn.k_proj.weight": "pytorch_model-00006-of-00039.bin",
274
+ "model.layers.4.self_attn.o_proj.weight": "pytorch_model-00006-of-00039.bin",
275
+ "model.layers.4.self_attn.q_proj.weight": "pytorch_model-00006-of-00039.bin",
276
+ "model.layers.4.self_attn.rotary_emb.inv_freq": "pytorch_model-00006-of-00039.bin",
277
+ "model.layers.4.self_attn.v_proj.weight": "pytorch_model-00006-of-00039.bin",
278
+ "model.layers.5.input_layernorm.weight": "pytorch_model-00008-of-00039.bin",
279
+ "model.layers.5.mlp.down_proj.weight": "pytorch_model-00008-of-00039.bin",
280
+ "model.layers.5.mlp.gate_proj.weight": "pytorch_model-00007-of-00039.bin",
281
+ "model.layers.5.mlp.up_proj.weight": "pytorch_model-00008-of-00039.bin",
282
+ "model.layers.5.post_attention_layernorm.weight": "pytorch_model-00008-of-00039.bin",
283
+ "model.layers.5.self_attn.k_proj.weight": "pytorch_model-00007-of-00039.bin",
284
+ "model.layers.5.self_attn.o_proj.weight": "pytorch_model-00007-of-00039.bin",
285
+ "model.layers.5.self_attn.q_proj.weight": "pytorch_model-00007-of-00039.bin",
286
+ "model.layers.5.self_attn.rotary_emb.inv_freq": "pytorch_model-00007-of-00039.bin",
287
+ "model.layers.5.self_attn.v_proj.weight": "pytorch_model-00007-of-00039.bin",
288
+ "model.layers.6.input_layernorm.weight": "pytorch_model-00009-of-00039.bin",
289
+ "model.layers.6.mlp.down_proj.weight": "pytorch_model-00009-of-00039.bin",
290
+ "model.layers.6.mlp.gate_proj.weight": "pytorch_model-00009-of-00039.bin",
291
+ "model.layers.6.mlp.up_proj.weight": "pytorch_model-00009-of-00039.bin",
292
+ "model.layers.6.post_attention_layernorm.weight": "pytorch_model-00009-of-00039.bin",
293
+ "model.layers.6.self_attn.k_proj.weight": "pytorch_model-00008-of-00039.bin",
294
+ "model.layers.6.self_attn.o_proj.weight": "pytorch_model-00008-of-00039.bin",
295
+ "model.layers.6.self_attn.q_proj.weight": "pytorch_model-00008-of-00039.bin",
296
+ "model.layers.6.self_attn.rotary_emb.inv_freq": "pytorch_model-00008-of-00039.bin",
297
+ "model.layers.6.self_attn.v_proj.weight": "pytorch_model-00008-of-00039.bin",
298
+ "model.layers.7.input_layernorm.weight": "pytorch_model-00010-of-00039.bin",
299
+ "model.layers.7.mlp.down_proj.weight": "pytorch_model-00010-of-00039.bin",
300
+ "model.layers.7.mlp.gate_proj.weight": "pytorch_model-00010-of-00039.bin",
301
+ "model.layers.7.mlp.up_proj.weight": "pytorch_model-00010-of-00039.bin",
302
+ "model.layers.7.post_attention_layernorm.weight": "pytorch_model-00010-of-00039.bin",
303
+ "model.layers.7.self_attn.k_proj.weight": "pytorch_model-00009-of-00039.bin",
304
+ "model.layers.7.self_attn.o_proj.weight": "pytorch_model-00010-of-00039.bin",
305
+ "model.layers.7.self_attn.q_proj.weight": "pytorch_model-00009-of-00039.bin",
306
+ "model.layers.7.self_attn.rotary_emb.inv_freq": "pytorch_model-00010-of-00039.bin",
307
+ "model.layers.7.self_attn.v_proj.weight": "pytorch_model-00009-of-00039.bin",
308
+ "model.layers.8.input_layernorm.weight": "pytorch_model-00011-of-00039.bin",
309
+ "model.layers.8.mlp.down_proj.weight": "pytorch_model-00011-of-00039.bin",
310
+ "model.layers.8.mlp.gate_proj.weight": "pytorch_model-00011-of-00039.bin",
311
+ "model.layers.8.mlp.up_proj.weight": "pytorch_model-00011-of-00039.bin",
312
+ "model.layers.8.post_attention_layernorm.weight": "pytorch_model-00011-of-00039.bin",
313
+ "model.layers.8.self_attn.k_proj.weight": "pytorch_model-00010-of-00039.bin",
314
+ "model.layers.8.self_attn.o_proj.weight": "pytorch_model-00011-of-00039.bin",
315
+ "model.layers.8.self_attn.q_proj.weight": "pytorch_model-00010-of-00039.bin",
316
+ "model.layers.8.self_attn.rotary_emb.inv_freq": "pytorch_model-00011-of-00039.bin",
317
+ "model.layers.8.self_attn.v_proj.weight": "pytorch_model-00011-of-00039.bin",
318
+ "model.layers.9.input_layernorm.weight": "pytorch_model-00012-of-00039.bin",
319
+ "model.layers.9.mlp.down_proj.weight": "pytorch_model-00012-of-00039.bin",
320
+ "model.layers.9.mlp.gate_proj.weight": "pytorch_model-00012-of-00039.bin",
321
+ "model.layers.9.mlp.up_proj.weight": "pytorch_model-00012-of-00039.bin",
322
+ "model.layers.9.post_attention_layernorm.weight": "pytorch_model-00012-of-00039.bin",
323
+ "model.layers.9.self_attn.k_proj.weight": "pytorch_model-00012-of-00039.bin",
324
+ "model.layers.9.self_attn.o_proj.weight": "pytorch_model-00012-of-00039.bin",
325
+ "model.layers.9.self_attn.q_proj.weight": "pytorch_model-00011-of-00039.bin",
326
+ "model.layers.9.self_attn.rotary_emb.inv_freq": "pytorch_model-00012-of-00039.bin",
327
+ "model.layers.9.self_attn.v_proj.weight": "pytorch_model-00012-of-00039.bin",
328
+ "model.norm.weight": "pytorch_model-00038-of-00039.bin"
329
+ }
330
+ }