chunfeng994 commited on
Commit
600996f
1 Parent(s): 620411d

Upload folder using huggingface_hub

Browse files
Files changed (8) hide show
  1. cal_flops.py +31 -0
  2. cal_lr.py +76 -0
  3. cal_ppl.py +116 -0
  4. length_cdf.py +51 -0
  5. llama_pro.py +115 -0
  6. llamafy_baichuan2.py +92 -0
  7. llamafy_qwen.py +144 -0
  8. loftq_init.py +82 -0
cal_flops.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Calculates the flops of pre-trained models.
3
+ # Usage: python cal_flops.py --model_name_or_path path_to_model --batch_size 1 --seq_length 512
4
+ # Inspired by: https://www.deepspeed.ai/tutorials/flops-profiler/
5
+
6
+ import fire
7
+ import torch
8
+ from deepspeed.accelerator import get_accelerator # type: ignore
9
+ from deepspeed.profiling.flops_profiler import get_model_profile # type: ignore
10
+
11
+ from llmtuner.chat import ChatModel
12
+
13
+
14
+ def calculate_flops(
15
+ model_name_or_path: str,
16
+ batch_size: int = 1,
17
+ seq_length: int = 256,
18
+ flash_attn: str = "auto",
19
+ ):
20
+ with get_accelerator().device(0):
21
+ chat_model = ChatModel(dict(model_name_or_path=model_name_or_path, template="empty", flash_attn=flash_attn))
22
+ fake_input = torch.ones((batch_size, seq_length), dtype=torch.long, device=chat_model.model.device)
23
+ input_dict = {"input_ids": fake_input, "labels": fake_input.clone()}
24
+ flops, macs, params = get_model_profile(chat_model.model, kwargs=input_dict, print_profile=True, detailed=True)
25
+ print("FLOPs:", flops)
26
+ print("MACs:", macs)
27
+ print("Params:", params)
28
+
29
+
30
+ if __name__ == "__main__":
31
+ fire.Fire(calculate_flops)
cal_lr.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Calculates the optimal learning rate for 7B/13B models using LLaMA's hyper-parameters.
3
+ # Usage: python cal_lr.py --model_name_or_path path_to_model --dataset alpaca_en --cutoff_len 1024 --batch_size 16
4
+ # Inspired by: https://github.com/imoneoi/openchat/blob/master/ochat/training_deepspeed/train.py
5
+
6
+ import math
7
+ from typing import Literal
8
+
9
+ import fire
10
+ import torch
11
+ from torch.utils.data import DataLoader
12
+ from tqdm import tqdm
13
+ from transformers import DataCollatorForLanguageModeling, DataCollatorForSeq2Seq
14
+
15
+ from llmtuner.data import get_dataset
16
+ from llmtuner.extras.constants import IGNORE_INDEX
17
+ from llmtuner.hparams import get_train_args
18
+ from llmtuner.model import load_tokenizer
19
+
20
+
21
+ BASE_LR = 3e-4 # 1.5e-4 for 30B-70B models
22
+ BASE_BS = 4_000_000 # from llama paper
23
+
24
+
25
+ def calculate_lr(
26
+ model_name_or_path: str,
27
+ batch_size: int, # total batch size, namely (batch size * gradient accumulation * world size)
28
+ stage: Literal["pt", "sft"] = "sft",
29
+ dataset: str = "alpaca_en",
30
+ dataset_dir: str = "data",
31
+ template: str = "default",
32
+ cutoff_len: int = 1024, # i.e. maximum input length during training
33
+ is_mistral: bool = False, # mistral model uses a smaller learning rate,
34
+ ):
35
+ model_args, data_args, training_args, _, _ = get_train_args(
36
+ dict(
37
+ stage=stage,
38
+ model_name_or_path=model_name_or_path,
39
+ dataset=dataset,
40
+ dataset_dir=dataset_dir,
41
+ template=template,
42
+ cutoff_len=cutoff_len,
43
+ output_dir="dummy_dir",
44
+ overwrite_cache=True,
45
+ )
46
+ )
47
+ tokenizer_module = load_tokenizer(model_args)
48
+ tokenizer = tokenizer_module["tokenizer"]
49
+ trainset = get_dataset(model_args, data_args, training_args, stage, **tokenizer_module)
50
+ if stage == "pt":
51
+ data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
52
+ elif stage == "sft":
53
+ data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, label_pad_token_id=IGNORE_INDEX)
54
+ else:
55
+ raise NotImplementedError
56
+
57
+ dataloader = DataLoader(trainset, batch_size, shuffle=False, collate_fn=data_collator, pin_memory=True)
58
+ valid_tokens, total_tokens = 0, 0
59
+ for batch in tqdm(dataloader):
60
+ valid_tokens += torch.sum(batch["labels"] != IGNORE_INDEX).item()
61
+ total_tokens += torch.numel(batch["labels"])
62
+
63
+ batch_max_len = cutoff_len * batch_size # max tokens in a batch
64
+ valid_ratio = valid_tokens / total_tokens
65
+ batch_valid_len = batch_max_len * valid_ratio
66
+ lr = BASE_LR * math.sqrt(batch_valid_len / BASE_BS) # lr ~ sqrt(batch_size)
67
+ lr = lr / 6.0 if is_mistral else lr
68
+ print(
69
+ "Optimal learning rate is {:.2e} for valid ratio% {:.2f} and effective batch size {:.2f}".format(
70
+ lr, valid_ratio * 100, batch_valid_len
71
+ )
72
+ )
73
+
74
+
75
+ if __name__ == "__main__":
76
+ fire.Fire(calculate_lr)
cal_ppl.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Calculates the ppl on the dataset of the pre-trained models.
3
+ # Usage: python cal_ppl.py --model_name_or_path path_to_model --save_name ppl.json
4
+
5
+ import json
6
+ from dataclasses import dataclass
7
+ from typing import Any, Dict, Literal, Optional, Sequence
8
+
9
+ import fire
10
+ import torch
11
+ from torch.utils.data import DataLoader
12
+ from tqdm import tqdm
13
+ from transformers import DataCollatorForLanguageModeling, DataCollatorForSeq2Seq
14
+
15
+ from llmtuner.data import get_dataset
16
+ from llmtuner.extras.constants import IGNORE_INDEX
17
+ from llmtuner.hparams import get_train_args
18
+ from llmtuner.model import load_model, load_tokenizer
19
+
20
+
21
+ @dataclass
22
+ class PairwiseDataCollatorWithPadding(DataCollatorForSeq2Seq):
23
+ r"""
24
+ Data collator for pairwise data.
25
+ """
26
+
27
+ train_on_prompt: bool = False
28
+
29
+ def __call__(self, features: Sequence[Dict[str, Any]]) -> Dict[str, torch.Tensor]:
30
+ r"""
31
+ Pads batched data to the longest sequence in the batch.
32
+
33
+ We generate 2 * n examples where the first n examples represent chosen examples and
34
+ the last n examples represent rejected examples.
35
+ """
36
+ chosen_features = []
37
+ for feature in features:
38
+ prompt_len, answer_len = len(feature["prompt_ids"]), len(feature["chosen_ids"])
39
+ input_ids = feature["prompt_ids"] + feature["chosen_ids"]
40
+ attention_mask = [1] * (prompt_len + answer_len)
41
+ labels = input_ids if self.train_on_prompt else [IGNORE_INDEX] * prompt_len + feature["chosen_ids"]
42
+ chosen_features.append({"input_ids": input_ids, "attention_mask": attention_mask, "labels": labels})
43
+
44
+ return super().__call__(chosen_features)
45
+
46
+
47
+ def cal_ppl(
48
+ model_name_or_path: str,
49
+ save_name: str,
50
+ batch_size: int = 4,
51
+ stage: Literal["pt", "sft", "rm"] = "sft",
52
+ dataset: str = "alpaca_en",
53
+ dataset_dir: str = "data",
54
+ template: str = "default",
55
+ cutoff_len: int = 1024,
56
+ max_samples: Optional[int] = None,
57
+ train_on_prompt: bool = False,
58
+ ):
59
+ model_args, data_args, training_args, finetuning_args, _ = get_train_args(
60
+ dict(
61
+ stage=stage,
62
+ model_name_or_path=model_name_or_path,
63
+ dataset=dataset,
64
+ dataset_dir=dataset_dir,
65
+ template=template,
66
+ cutoff_len=cutoff_len,
67
+ max_samples=max_samples,
68
+ train_on_prompt=train_on_prompt,
69
+ output_dir="dummy_dir",
70
+ overwrite_cache=True,
71
+ )
72
+ )
73
+ tokenizer_module = load_tokenizer(model_args)
74
+ tokenizer = tokenizer_module["tokenizer"]
75
+ trainset = get_dataset(model_args, data_args, training_args, stage, **tokenizer_module)
76
+ model = load_model(tokenizer, model_args, finetuning_args, is_trainable=False)
77
+ if stage == "pt":
78
+ data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
79
+ elif stage == "sft":
80
+ data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, label_pad_token_id=IGNORE_INDEX)
81
+ elif stage == "rm":
82
+ data_collator = PairwiseDataCollatorWithPadding(
83
+ tokenizer=tokenizer, label_pad_token_id=IGNORE_INDEX, train_on_prompt=train_on_prompt
84
+ )
85
+ else:
86
+ raise NotImplementedError
87
+
88
+ dataloader = DataLoader(trainset, batch_size, shuffle=False, collate_fn=data_collator, pin_memory=True)
89
+ criterion = torch.nn.CrossEntropyLoss(reduction="none")
90
+ total_ppl = 0
91
+ perplexities = []
92
+ batch: Dict[str, "torch.Tensor"]
93
+ with torch.no_grad():
94
+ for batch in tqdm(dataloader):
95
+ batch = batch.to(model.device)
96
+ outputs = model(**batch)
97
+ shift_logits: "torch.Tensor" = outputs["logits"][..., :-1, :]
98
+ shift_labels: "torch.Tensor" = batch["labels"][..., 1:]
99
+ loss_mask = shift_labels != IGNORE_INDEX
100
+ flatten_logits = shift_logits.contiguous().view(shift_labels.size(0) * shift_labels.size(1), -1)
101
+ flatten_labels = shift_labels.contiguous().view(-1)
102
+ token_logps: "torch.Tensor" = criterion(flatten_logits, flatten_labels)
103
+ token_logps = token_logps.contiguous().view(shift_logits.size(0), -1)
104
+ sentence_logps = (token_logps * loss_mask).sum(-1) / loss_mask.sum(-1)
105
+ total_ppl += sentence_logps.exp().sum().item()
106
+ perplexities.extend(sentence_logps.exp().tolist())
107
+
108
+ with open(save_name, "w", encoding="utf-8") as f:
109
+ json.dump(perplexities, f, indent=2)
110
+
111
+ print("Average perplexity is {:.2f}".format(total_ppl / len(perplexities)))
112
+ print("Perplexities have been saved at {}.".format(save_name))
113
+
114
+
115
+ if __name__ == "__main__":
116
+ fire.Fire(cal_ppl)
length_cdf.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Calculates the distribution of the input lengths in the dataset.
3
+ # Usage: python length_cdf.py --model_name_or_path path_to_model --dataset alpaca_en --template default
4
+
5
+ from collections import defaultdict
6
+
7
+ import fire
8
+ from tqdm import tqdm
9
+
10
+ from llmtuner.data import get_dataset
11
+ from llmtuner.hparams import get_train_args
12
+ from llmtuner.model import load_tokenizer
13
+
14
+
15
+ def length_cdf(
16
+ model_name_or_path: str,
17
+ dataset: str = "alpaca_en",
18
+ dataset_dir: str = "data",
19
+ template: str = "default",
20
+ interval: int = 1000,
21
+ ):
22
+ model_args, data_args, training_args, _, _ = get_train_args(
23
+ dict(
24
+ stage="sft",
25
+ model_name_or_path=model_name_or_path,
26
+ dataset=dataset,
27
+ dataset_dir=dataset_dir,
28
+ template=template,
29
+ cutoff_len=1_000_000,
30
+ output_dir="dummy_dir",
31
+ overwrite_cache=True,
32
+ )
33
+ )
34
+ tokenizer_module = load_tokenizer(model_args)
35
+ trainset = get_dataset(model_args, data_args, training_args, stage="sft", **tokenizer_module)
36
+ total_num = len(trainset)
37
+ length_dict = defaultdict(int)
38
+ for sample in tqdm(trainset["input_ids"]):
39
+ length_dict[len(sample) // interval * interval] += 1
40
+
41
+ length_tuples = list(length_dict.items())
42
+ length_tuples.sort()
43
+ count_accu, prob_accu = 0, 0
44
+ for length, count in length_tuples:
45
+ count_accu += count
46
+ prob_accu += count / total_num * 100
47
+ print("{:d} ({:.2f}%) samples have length < {}.".format(count_accu, prob_accu, length + interval))
48
+
49
+
50
+ if __name__ == "__main__":
51
+ fire.Fire(length_cdf)
llama_pro.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Performs block expansion for LLaMA, Mistral or Qwen1.5 models.
3
+ # Usage: python llama_pro.py --model_name_or_path meta-llama/Llama-2-7b-hf --output_dir llama2_pro --num_expand 8
4
+ # Inspired by: https://github.com/TencentARC/LLaMA-Pro/blob/main/scripts/block_expansion.py
5
+
6
+ import json
7
+ import os
8
+ from collections import OrderedDict
9
+ from typing import TYPE_CHECKING, Optional
10
+
11
+ import fire
12
+ import torch
13
+ from safetensors.torch import save_file
14
+ from tqdm import tqdm
15
+ from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
16
+ from transformers.modeling_utils import (
17
+ SAFE_WEIGHTS_INDEX_NAME,
18
+ SAFE_WEIGHTS_NAME,
19
+ WEIGHTS_INDEX_NAME,
20
+ WEIGHTS_NAME,
21
+ shard_checkpoint,
22
+ )
23
+
24
+
25
+ if TYPE_CHECKING:
26
+ from transformers import PretrainedConfig, PreTrainedModel
27
+
28
+
29
+ def change_name(name: str, old_index: int, new_index: int) -> str:
30
+ return name.replace(".{:d}.".format(old_index), ".{:d}.".format(new_index))
31
+
32
+
33
+ def block_expansion(
34
+ model_name_or_path: str,
35
+ output_dir: str,
36
+ num_expand: int,
37
+ shard_size: Optional[str] = "2GB",
38
+ save_safetensors: Optional[bool] = False,
39
+ ):
40
+ config: "PretrainedConfig" = AutoConfig.from_pretrained(model_name_or_path)
41
+ num_layers = getattr(config, "num_hidden_layers")
42
+ setattr(config, "num_hidden_layers", num_layers + num_expand)
43
+ config.save_pretrained(output_dir)
44
+
45
+ tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
46
+ tokenizer.save_pretrained(output_dir)
47
+
48
+ config: "PretrainedConfig" = AutoConfig.from_pretrained(model_name_or_path) # load the original one
49
+ if save_safetensors:
50
+ setattr(config, "tie_word_embeddings", False) # safetensors does not allow shared weights
51
+
52
+ model: "PreTrainedModel" = AutoModelForCausalLM.from_pretrained(
53
+ model_name_or_path,
54
+ config=config,
55
+ torch_dtype="auto",
56
+ trust_remote_code=True,
57
+ low_cpu_mem_usage=True,
58
+ )
59
+ state_dict = model.state_dict()
60
+
61
+ if num_layers % num_expand != 0:
62
+ raise ValueError("`num_layers` {} should be divisible by `num_expand` {}.".format(num_layers, num_expand))
63
+
64
+ split = num_layers // num_expand
65
+ layer_cnt = 0
66
+ output_state_dict = OrderedDict()
67
+ for i in range(num_layers):
68
+ for key, value in state_dict.items():
69
+ if ".{:d}.".format(i) in key:
70
+ output_state_dict[change_name(key, i, layer_cnt)] = value
71
+
72
+ print("Add layer {} copied from layer {}".format(layer_cnt, i))
73
+ layer_cnt += 1
74
+ if (i + 1) % split == 0:
75
+ for key, value in state_dict.items():
76
+ if ".{:d}.".format(i) in key:
77
+ if "down_proj" in key or "o_proj" in key:
78
+ output_state_dict[change_name(key, i, layer_cnt)] = torch.zeros_like(value)
79
+ else:
80
+ output_state_dict[change_name(key, i, layer_cnt)] = torch.clone(value)
81
+
82
+ print("Add layer {} expanded from layer {}".format(layer_cnt, i))
83
+ layer_cnt += 1
84
+
85
+ for key, value in state_dict.items():
86
+ if key not in output_state_dict:
87
+ output_state_dict[key] = value
88
+
89
+ weights_name = SAFE_WEIGHTS_NAME if save_safetensors else WEIGHTS_NAME
90
+ shards, index = shard_checkpoint(output_state_dict, max_shard_size=shard_size, weights_name=weights_name)
91
+
92
+ for shard_file, shard in tqdm(shards.items(), desc="Save weights"):
93
+ if save_safetensors:
94
+ save_file(shard, os.path.join(output_dir, shard_file), metadata={"format": "pt"})
95
+ else:
96
+ torch.save(shard, os.path.join(output_dir, shard_file))
97
+
98
+ if index is None:
99
+ print("Model weights saved in {}".format(os.path.join(output_dir, weights_name)))
100
+ else:
101
+ index_name = SAFE_WEIGHTS_INDEX_NAME if save_safetensors else WEIGHTS_INDEX_NAME
102
+ with open(os.path.join(output_dir, index_name), "w", encoding="utf-8") as f:
103
+ json.dump(index, f, indent=2, sort_keys=True)
104
+ print("Model weights saved in {}".format(output_dir))
105
+
106
+ print("Fine-tune this model with:")
107
+ print(" --model_name_or_path {} \\".format(output_dir))
108
+ print(" --finetuning_type freeze \\")
109
+ print(" --name_module_trainable all \\")
110
+ print(" --num_layer_trainable {} \\".format(num_expand))
111
+ print(" --use_llama_pro")
112
+
113
+
114
+ if __name__ == "__main__":
115
+ fire.Fire(block_expansion)
llamafy_baichuan2.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Converts the Baichuan2-7B model in the same format as LLaMA2-7B.
3
+ # Usage: python llamafy_baichuan2.py --input_dir input --output_dir output
4
+ # Inspired by: https://huggingface.co/fireballoon/baichuan-llama-7b/blob/main/convert_baichuan_to_llama.py
5
+ # Converted model: https://huggingface.co/hiyouga/Baichuan2-7B-Base-LLaMAfied
6
+
7
+ import json
8
+ import os
9
+ from collections import OrderedDict
10
+ from typing import Any, Dict, Optional
11
+
12
+ import fire
13
+ import torch
14
+ from safetensors.torch import save_file
15
+ from tqdm import tqdm
16
+ from transformers.modeling_utils import (
17
+ SAFE_WEIGHTS_INDEX_NAME,
18
+ SAFE_WEIGHTS_NAME,
19
+ WEIGHTS_INDEX_NAME,
20
+ WEIGHTS_NAME,
21
+ shard_checkpoint,
22
+ )
23
+
24
+
25
+ CONFIG_NAME = "config.json"
26
+
27
+
28
+ def save_weight(input_dir: str, output_dir: str, shard_size: str, save_safetensors: bool):
29
+ baichuan2_state_dict: Dict[str, torch.Tensor] = OrderedDict()
30
+ for filepath in tqdm(os.listdir(input_dir), desc="Load weights"):
31
+ if os.path.isfile(os.path.join(input_dir, filepath)) and filepath.endswith(".bin"):
32
+ shard_weight = torch.load(os.path.join(input_dir, filepath), map_location="cpu")
33
+ baichuan2_state_dict.update(shard_weight)
34
+
35
+ llama2_state_dict: Dict[str, torch.Tensor] = OrderedDict()
36
+ for key, value in tqdm(baichuan2_state_dict.items(), desc="Convert format"):
37
+ if "W_pack" in key:
38
+ proj_size = value.size(0) // 3
39
+ llama2_state_dict[key.replace("W_pack", "q_proj")] = value[:proj_size, :]
40
+ llama2_state_dict[key.replace("W_pack", "k_proj")] = value[proj_size : 2 * proj_size, :]
41
+ llama2_state_dict[key.replace("W_pack", "v_proj")] = value[2 * proj_size :, :]
42
+ elif "lm_head" in key:
43
+ llama2_state_dict[key] = torch.nn.functional.normalize(value)
44
+ else:
45
+ llama2_state_dict[key] = value
46
+
47
+ weights_name = SAFE_WEIGHTS_NAME if save_safetensors else WEIGHTS_NAME
48
+ shards, index = shard_checkpoint(llama2_state_dict, max_shard_size=shard_size, weights_name=weights_name)
49
+
50
+ for shard_file, shard in tqdm(shards.items(), desc="Save weights"):
51
+ if save_safetensors:
52
+ save_file(shard, os.path.join(output_dir, shard_file), metadata={"format": "pt"})
53
+ else:
54
+ torch.save(shard, os.path.join(output_dir, shard_file))
55
+
56
+ if index is None:
57
+ print("Model weights saved in {}".format(os.path.join(output_dir, WEIGHTS_NAME)))
58
+ else:
59
+ index_name = SAFE_WEIGHTS_INDEX_NAME if save_safetensors else WEIGHTS_INDEX_NAME
60
+ with open(os.path.join(output_dir, index_name), "w", encoding="utf-8") as f:
61
+ json.dump(index, f, indent=2, sort_keys=True)
62
+ print("Model weights saved in {}".format(output_dir))
63
+
64
+
65
+ def save_config(input_dir: str, output_dir: str):
66
+ with open(os.path.join(input_dir, CONFIG_NAME), "r", encoding="utf-8") as f:
67
+ llama2_config_dict: Dict[str, Any] = json.load(f)
68
+
69
+ llama2_config_dict["architectures"] = ["LlamaForCausalLM"]
70
+ llama2_config_dict.pop("auto_map", None)
71
+ llama2_config_dict.pop("tokenizer_class", None)
72
+ llama2_config_dict["model_type"] = "llama"
73
+
74
+ with open(os.path.join(output_dir, CONFIG_NAME), "w", encoding="utf-8") as f:
75
+ json.dump(llama2_config_dict, f, indent=2)
76
+ print("Model config saved in {}".format(os.path.join(output_dir, CONFIG_NAME)))
77
+
78
+
79
+ def llamafy_baichuan2(
80
+ input_dir: str, output_dir: str, shard_size: Optional[str] = "2GB", save_safetensors: Optional[bool] = False
81
+ ):
82
+ try:
83
+ os.makedirs(output_dir, exist_ok=False)
84
+ except Exception as e:
85
+ raise print("Output dir already exists", e)
86
+
87
+ save_weight(input_dir, output_dir, shard_size, save_safetensors)
88
+ save_config(input_dir, output_dir)
89
+
90
+
91
+ if __name__ == "__main__":
92
+ fire.Fire(llamafy_baichuan2)
llamafy_qwen.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Converts the Qwen models in the same format as LLaMA2.
3
+ # Usage: python llamafy_qwen.py --input_dir input --output_dir output
4
+ # Converted model: https://huggingface.co/hiyouga/Qwen-14B-Chat-LLaMAfied
5
+
6
+ import json
7
+ import os
8
+ from collections import OrderedDict
9
+ from typing import Any, Dict, Optional
10
+
11
+ import fire
12
+ import torch
13
+ from safetensors import safe_open
14
+ from safetensors.torch import save_file
15
+ from tqdm import tqdm
16
+ from transformers.modeling_utils import (
17
+ SAFE_WEIGHTS_INDEX_NAME,
18
+ SAFE_WEIGHTS_NAME,
19
+ WEIGHTS_INDEX_NAME,
20
+ WEIGHTS_NAME,
21
+ shard_checkpoint,
22
+ )
23
+ from transformers.utils import check_min_version
24
+
25
+
26
+ try:
27
+ check_min_version("4.34.0")
28
+ except Exception:
29
+ raise ValueError("Please upgrade `transformers` to 4.34.0")
30
+
31
+
32
+ CONFIG_NAME = "config.json"
33
+
34
+
35
+ def save_weight(input_dir: str, output_dir: str, shard_size: str, save_safetensors: bool) -> str:
36
+ qwen_state_dict: Dict[str, torch.Tensor] = OrderedDict()
37
+ for filepath in tqdm(os.listdir(input_dir), desc="Load weights"):
38
+ if os.path.isfile(os.path.join(input_dir, filepath)) and filepath.endswith(".safetensors"):
39
+ with safe_open(os.path.join(input_dir, filepath), framework="pt", device="cpu") as f:
40
+ for key in f.keys():
41
+ qwen_state_dict[key] = f.get_tensor(key)
42
+
43
+ llama2_state_dict: Dict[str, torch.Tensor] = OrderedDict()
44
+ torch_dtype = None
45
+ for key, value in tqdm(qwen_state_dict.items(), desc="Convert format"):
46
+ if torch_dtype is None:
47
+ torch_dtype = value.dtype
48
+ if "wte" in key:
49
+ llama2_state_dict["model.embed_tokens.weight"] = value
50
+ elif "ln_f" in key:
51
+ llama2_state_dict["model.norm.weight"] = value
52
+ else:
53
+ key = key.replace("transformer.h", "model.layers")
54
+ if "attn.c_attn" in key:
55
+ proj_size = value.size(0) // 3
56
+ llama2_state_dict[key.replace("attn.c_attn", "self_attn.q_proj")] = value[:proj_size, ...]
57
+ llama2_state_dict[key.replace("attn.c_attn", "self_attn.k_proj")] = value[
58
+ proj_size : 2 * proj_size, ...
59
+ ]
60
+ llama2_state_dict[key.replace("attn.c_attn", "self_attn.v_proj")] = value[2 * proj_size :, ...]
61
+ elif "attn.c_proj" in key:
62
+ llama2_state_dict[key.replace("attn.c_proj", "self_attn.o_proj")] = value
63
+ llama2_state_dict[key.replace("attn.c_proj.weight", "self_attn.o_proj.bias")] = torch.zeros_like(
64
+ value[:, 0]
65
+ ).squeeze()
66
+ elif "ln_1" in key:
67
+ llama2_state_dict[key.replace("ln_1", "input_layernorm")] = value
68
+ elif "ln_2" in key:
69
+ llama2_state_dict[key.replace("ln_2", "post_attention_layernorm")] = value
70
+ elif "mlp.w1" in key:
71
+ llama2_state_dict[key.replace("mlp.w1", "mlp.up_proj")] = value
72
+ elif "mlp.w2" in key:
73
+ llama2_state_dict[key.replace("mlp.w2", "mlp.gate_proj")] = value
74
+ elif "mlp.c_proj" in key:
75
+ llama2_state_dict[key.replace("mlp.c_proj", "mlp.down_proj")] = value
76
+ elif "lm_head" in key:
77
+ llama2_state_dict[key] = value
78
+ else:
79
+ raise KeyError("Unable to process key {}".format(key))
80
+
81
+ weights_name = SAFE_WEIGHTS_NAME if save_safetensors else WEIGHTS_NAME
82
+ shards, index = shard_checkpoint(llama2_state_dict, max_shard_size=shard_size, weights_name=weights_name)
83
+
84
+ for shard_file, shard in tqdm(shards.items(), desc="Save weights"):
85
+ if save_safetensors:
86
+ save_file(shard, os.path.join(output_dir, shard_file), metadata={"format": "pt"})
87
+ else:
88
+ torch.save(shard, os.path.join(output_dir, shard_file))
89
+
90
+ if index is None:
91
+ print("Model weights saved in {}".format(os.path.join(output_dir, weights_name)))
92
+ else:
93
+ index_name = SAFE_WEIGHTS_INDEX_NAME if save_safetensors else WEIGHTS_INDEX_NAME
94
+ with open(os.path.join(output_dir, index_name), "w", encoding="utf-8") as f:
95
+ json.dump(index, f, indent=2, sort_keys=True)
96
+ print("Model weights saved in {}".format(output_dir))
97
+
98
+ return str(torch_dtype).replace("torch.", "")
99
+
100
+
101
+ def save_config(input_dir: str, output_dir: str, torch_dtype: str):
102
+ with open(os.path.join(input_dir, CONFIG_NAME), "r", encoding="utf-8") as f:
103
+ qwen_config_dict: Dict[str, Any] = json.load(f)
104
+
105
+ llama2_config_dict: Dict[str, Any] = OrderedDict()
106
+ llama2_config_dict["architectures"] = ["LlamaForCausalLM"]
107
+ llama2_config_dict["hidden_act"] = "silu"
108
+ llama2_config_dict["hidden_size"] = qwen_config_dict["hidden_size"]
109
+ llama2_config_dict["initializer_range"] = qwen_config_dict["initializer_range"]
110
+ llama2_config_dict["intermediate_size"] = qwen_config_dict["intermediate_size"] // 2
111
+ llama2_config_dict["max_position_embeddings"] = qwen_config_dict["max_position_embeddings"]
112
+ llama2_config_dict["model_type"] = "llama"
113
+ llama2_config_dict["num_attention_heads"] = qwen_config_dict["num_attention_heads"]
114
+ llama2_config_dict["num_hidden_layers"] = qwen_config_dict["num_hidden_layers"]
115
+ llama2_config_dict["num_key_value_heads"] = qwen_config_dict["hidden_size"] // qwen_config_dict["kv_channels"]
116
+ llama2_config_dict["pretraining_tp"] = 1
117
+ llama2_config_dict["rms_norm_eps"] = qwen_config_dict["layer_norm_epsilon"]
118
+ llama2_config_dict["rope_scaling"] = None
119
+ llama2_config_dict["tie_word_embeddings"] = qwen_config_dict["tie_word_embeddings"]
120
+ llama2_config_dict["torch_dtype"] = torch_dtype
121
+ llama2_config_dict["transformers_version"] = "4.34.0"
122
+ llama2_config_dict["use_cache"] = True
123
+ llama2_config_dict["vocab_size"] = qwen_config_dict["vocab_size"]
124
+ llama2_config_dict["attention_bias"] = True
125
+
126
+ with open(os.path.join(output_dir, CONFIG_NAME), "w", encoding="utf-8") as f:
127
+ json.dump(llama2_config_dict, f, indent=2)
128
+ print("Model config saved in {}".format(os.path.join(output_dir, CONFIG_NAME)))
129
+
130
+
131
+ def llamafy_qwen(
132
+ input_dir: str, output_dir: str, shard_size: Optional[str] = "2GB", save_safetensors: Optional[bool] = False
133
+ ):
134
+ try:
135
+ os.makedirs(output_dir, exist_ok=False)
136
+ except Exception as e:
137
+ raise print("Output dir already exists", e)
138
+
139
+ torch_dtype = save_weight(input_dir, output_dir, shard_size, save_safetensors)
140
+ save_config(input_dir, output_dir, torch_dtype)
141
+
142
+
143
+ if __name__ == "__main__":
144
+ fire.Fire(llamafy_qwen)
loftq_init.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Initializes LoRA weights with LoRA-fine-tuning-aware Quantization (LoftQ)
3
+ # Usage: python loftq_init.py --model_name_or_path path_to_model --save_dir output_dir
4
+ # Inspired by: https://github.com/huggingface/peft/blob/main/examples/loftq_finetuning/quantize_save_load.py
5
+
6
+ import os
7
+ from typing import TYPE_CHECKING, Optional
8
+
9
+ import fire
10
+ import torch
11
+ import torch.nn as nn
12
+ from peft import LoftQConfig, LoraConfig, TaskType, get_peft_model
13
+ from transformers import AutoModelForCausalLM, AutoTokenizer
14
+
15
+
16
+ if TYPE_CHECKING:
17
+ from transformers import PreTrainedModel
18
+
19
+
20
+ class Shell(nn.Module):
21
+ def __init__(self, weight: torch.Tensor, bias: Optional[torch.Tensor] = None):
22
+ super().__init__()
23
+ self.weight = nn.Parameter(weight, requires_grad=False)
24
+ if bias is not None:
25
+ self.bias = nn.Parameter(bias, requires_grad=False)
26
+
27
+
28
+ def unwrap_model(model: nn.Module, pattern=".base_layer") -> None:
29
+ for name in {k.split(pattern)[0] for k, _ in model.named_modules() if pattern in k}:
30
+ parent_name = ".".join(name.split(".")[:-1])
31
+ child_name = name.split(".")[-1]
32
+ parent_module = model.get_submodule(parent_name)
33
+ child_module = getattr(parent_module, child_name)
34
+ base_layer = getattr(child_module, "base_layer")
35
+ weight = getattr(base_layer, "weight", None)
36
+ bias = getattr(base_layer, "bias", None)
37
+ setattr(parent_module, child_name, Shell(weight, bias))
38
+
39
+ print("Model unwrapped.")
40
+
41
+
42
+ def quantize_loftq(
43
+ model_name_or_path: str,
44
+ save_dir: str,
45
+ loftq_bits: Optional[int] = 4,
46
+ loftq_iter: Optional[int] = 1,
47
+ lora_alpha: Optional[int] = None,
48
+ lora_rank: Optional[int] = 16,
49
+ lora_target: Optional[str] = "q_proj,v_proj",
50
+ save_safetensors: Optional[bool] = False,
51
+ ):
52
+ tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True)
53
+ model = AutoModelForCausalLM.from_pretrained(model_name_or_path, trust_remote_code=True, torch_dtype="auto")
54
+ loftq_config = LoftQConfig(loftq_bits=loftq_bits, loftq_iter=loftq_iter)
55
+ lora_config = LoraConfig(
56
+ task_type=TaskType.CAUSAL_LM,
57
+ inference_mode=True,
58
+ r=lora_rank,
59
+ lora_alpha=lora_alpha if lora_alpha is not None else lora_rank * 2,
60
+ lora_dropout=0.1,
61
+ target_modules=[name.strip() for name in lora_target.split(",")],
62
+ init_lora_weights="loftq",
63
+ loftq_config=loftq_config,
64
+ )
65
+
66
+ # Init LoftQ model
67
+ lora_model = get_peft_model(model, lora_config)
68
+ base_model: "PreTrainedModel" = lora_model.get_base_model()
69
+
70
+ # Save LoftQ model
71
+ setattr(lora_model.base_model.peft_config["default"], "base_model_name_or_path", save_dir)
72
+ setattr(lora_model.base_model.peft_config["default"], "init_lora_weights", True)
73
+ lora_model.save_pretrained(os.path.join(save_dir, "adapters"), safe_serialization=save_safetensors)
74
+
75
+ # Save base model
76
+ unwrap_model(base_model)
77
+ base_model.save_pretrained(save_dir, safe_serialization=save_safetensors)
78
+ tokenizer.save_pretrained(save_dir)
79
+
80
+
81
+ if __name__ == "__main__":
82
+ fire.Fire(quantize_loftq)