chunfeng994
commited on
Commit
•
600996f
1
Parent(s):
620411d
Upload folder using huggingface_hub
Browse files- cal_flops.py +31 -0
- cal_lr.py +76 -0
- cal_ppl.py +116 -0
- length_cdf.py +51 -0
- llama_pro.py +115 -0
- llamafy_baichuan2.py +92 -0
- llamafy_qwen.py +144 -0
- loftq_init.py +82 -0
cal_flops.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Calculates the flops of pre-trained models.
|
3 |
+
# Usage: python cal_flops.py --model_name_or_path path_to_model --batch_size 1 --seq_length 512
|
4 |
+
# Inspired by: https://www.deepspeed.ai/tutorials/flops-profiler/
|
5 |
+
|
6 |
+
import fire
|
7 |
+
import torch
|
8 |
+
from deepspeed.accelerator import get_accelerator # type: ignore
|
9 |
+
from deepspeed.profiling.flops_profiler import get_model_profile # type: ignore
|
10 |
+
|
11 |
+
from llmtuner.chat import ChatModel
|
12 |
+
|
13 |
+
|
14 |
+
def calculate_flops(
|
15 |
+
model_name_or_path: str,
|
16 |
+
batch_size: int = 1,
|
17 |
+
seq_length: int = 256,
|
18 |
+
flash_attn: str = "auto",
|
19 |
+
):
|
20 |
+
with get_accelerator().device(0):
|
21 |
+
chat_model = ChatModel(dict(model_name_or_path=model_name_or_path, template="empty", flash_attn=flash_attn))
|
22 |
+
fake_input = torch.ones((batch_size, seq_length), dtype=torch.long, device=chat_model.model.device)
|
23 |
+
input_dict = {"input_ids": fake_input, "labels": fake_input.clone()}
|
24 |
+
flops, macs, params = get_model_profile(chat_model.model, kwargs=input_dict, print_profile=True, detailed=True)
|
25 |
+
print("FLOPs:", flops)
|
26 |
+
print("MACs:", macs)
|
27 |
+
print("Params:", params)
|
28 |
+
|
29 |
+
|
30 |
+
if __name__ == "__main__":
|
31 |
+
fire.Fire(calculate_flops)
|
cal_lr.py
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Calculates the optimal learning rate for 7B/13B models using LLaMA's hyper-parameters.
|
3 |
+
# Usage: python cal_lr.py --model_name_or_path path_to_model --dataset alpaca_en --cutoff_len 1024 --batch_size 16
|
4 |
+
# Inspired by: https://github.com/imoneoi/openchat/blob/master/ochat/training_deepspeed/train.py
|
5 |
+
|
6 |
+
import math
|
7 |
+
from typing import Literal
|
8 |
+
|
9 |
+
import fire
|
10 |
+
import torch
|
11 |
+
from torch.utils.data import DataLoader
|
12 |
+
from tqdm import tqdm
|
13 |
+
from transformers import DataCollatorForLanguageModeling, DataCollatorForSeq2Seq
|
14 |
+
|
15 |
+
from llmtuner.data import get_dataset
|
16 |
+
from llmtuner.extras.constants import IGNORE_INDEX
|
17 |
+
from llmtuner.hparams import get_train_args
|
18 |
+
from llmtuner.model import load_tokenizer
|
19 |
+
|
20 |
+
|
21 |
+
BASE_LR = 3e-4 # 1.5e-4 for 30B-70B models
|
22 |
+
BASE_BS = 4_000_000 # from llama paper
|
23 |
+
|
24 |
+
|
25 |
+
def calculate_lr(
|
26 |
+
model_name_or_path: str,
|
27 |
+
batch_size: int, # total batch size, namely (batch size * gradient accumulation * world size)
|
28 |
+
stage: Literal["pt", "sft"] = "sft",
|
29 |
+
dataset: str = "alpaca_en",
|
30 |
+
dataset_dir: str = "data",
|
31 |
+
template: str = "default",
|
32 |
+
cutoff_len: int = 1024, # i.e. maximum input length during training
|
33 |
+
is_mistral: bool = False, # mistral model uses a smaller learning rate,
|
34 |
+
):
|
35 |
+
model_args, data_args, training_args, _, _ = get_train_args(
|
36 |
+
dict(
|
37 |
+
stage=stage,
|
38 |
+
model_name_or_path=model_name_or_path,
|
39 |
+
dataset=dataset,
|
40 |
+
dataset_dir=dataset_dir,
|
41 |
+
template=template,
|
42 |
+
cutoff_len=cutoff_len,
|
43 |
+
output_dir="dummy_dir",
|
44 |
+
overwrite_cache=True,
|
45 |
+
)
|
46 |
+
)
|
47 |
+
tokenizer_module = load_tokenizer(model_args)
|
48 |
+
tokenizer = tokenizer_module["tokenizer"]
|
49 |
+
trainset = get_dataset(model_args, data_args, training_args, stage, **tokenizer_module)
|
50 |
+
if stage == "pt":
|
51 |
+
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
|
52 |
+
elif stage == "sft":
|
53 |
+
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, label_pad_token_id=IGNORE_INDEX)
|
54 |
+
else:
|
55 |
+
raise NotImplementedError
|
56 |
+
|
57 |
+
dataloader = DataLoader(trainset, batch_size, shuffle=False, collate_fn=data_collator, pin_memory=True)
|
58 |
+
valid_tokens, total_tokens = 0, 0
|
59 |
+
for batch in tqdm(dataloader):
|
60 |
+
valid_tokens += torch.sum(batch["labels"] != IGNORE_INDEX).item()
|
61 |
+
total_tokens += torch.numel(batch["labels"])
|
62 |
+
|
63 |
+
batch_max_len = cutoff_len * batch_size # max tokens in a batch
|
64 |
+
valid_ratio = valid_tokens / total_tokens
|
65 |
+
batch_valid_len = batch_max_len * valid_ratio
|
66 |
+
lr = BASE_LR * math.sqrt(batch_valid_len / BASE_BS) # lr ~ sqrt(batch_size)
|
67 |
+
lr = lr / 6.0 if is_mistral else lr
|
68 |
+
print(
|
69 |
+
"Optimal learning rate is {:.2e} for valid ratio% {:.2f} and effective batch size {:.2f}".format(
|
70 |
+
lr, valid_ratio * 100, batch_valid_len
|
71 |
+
)
|
72 |
+
)
|
73 |
+
|
74 |
+
|
75 |
+
if __name__ == "__main__":
|
76 |
+
fire.Fire(calculate_lr)
|
cal_ppl.py
ADDED
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Calculates the ppl on the dataset of the pre-trained models.
|
3 |
+
# Usage: python cal_ppl.py --model_name_or_path path_to_model --save_name ppl.json
|
4 |
+
|
5 |
+
import json
|
6 |
+
from dataclasses import dataclass
|
7 |
+
from typing import Any, Dict, Literal, Optional, Sequence
|
8 |
+
|
9 |
+
import fire
|
10 |
+
import torch
|
11 |
+
from torch.utils.data import DataLoader
|
12 |
+
from tqdm import tqdm
|
13 |
+
from transformers import DataCollatorForLanguageModeling, DataCollatorForSeq2Seq
|
14 |
+
|
15 |
+
from llmtuner.data import get_dataset
|
16 |
+
from llmtuner.extras.constants import IGNORE_INDEX
|
17 |
+
from llmtuner.hparams import get_train_args
|
18 |
+
from llmtuner.model import load_model, load_tokenizer
|
19 |
+
|
20 |
+
|
21 |
+
@dataclass
|
22 |
+
class PairwiseDataCollatorWithPadding(DataCollatorForSeq2Seq):
|
23 |
+
r"""
|
24 |
+
Data collator for pairwise data.
|
25 |
+
"""
|
26 |
+
|
27 |
+
train_on_prompt: bool = False
|
28 |
+
|
29 |
+
def __call__(self, features: Sequence[Dict[str, Any]]) -> Dict[str, torch.Tensor]:
|
30 |
+
r"""
|
31 |
+
Pads batched data to the longest sequence in the batch.
|
32 |
+
|
33 |
+
We generate 2 * n examples where the first n examples represent chosen examples and
|
34 |
+
the last n examples represent rejected examples.
|
35 |
+
"""
|
36 |
+
chosen_features = []
|
37 |
+
for feature in features:
|
38 |
+
prompt_len, answer_len = len(feature["prompt_ids"]), len(feature["chosen_ids"])
|
39 |
+
input_ids = feature["prompt_ids"] + feature["chosen_ids"]
|
40 |
+
attention_mask = [1] * (prompt_len + answer_len)
|
41 |
+
labels = input_ids if self.train_on_prompt else [IGNORE_INDEX] * prompt_len + feature["chosen_ids"]
|
42 |
+
chosen_features.append({"input_ids": input_ids, "attention_mask": attention_mask, "labels": labels})
|
43 |
+
|
44 |
+
return super().__call__(chosen_features)
|
45 |
+
|
46 |
+
|
47 |
+
def cal_ppl(
|
48 |
+
model_name_or_path: str,
|
49 |
+
save_name: str,
|
50 |
+
batch_size: int = 4,
|
51 |
+
stage: Literal["pt", "sft", "rm"] = "sft",
|
52 |
+
dataset: str = "alpaca_en",
|
53 |
+
dataset_dir: str = "data",
|
54 |
+
template: str = "default",
|
55 |
+
cutoff_len: int = 1024,
|
56 |
+
max_samples: Optional[int] = None,
|
57 |
+
train_on_prompt: bool = False,
|
58 |
+
):
|
59 |
+
model_args, data_args, training_args, finetuning_args, _ = get_train_args(
|
60 |
+
dict(
|
61 |
+
stage=stage,
|
62 |
+
model_name_or_path=model_name_or_path,
|
63 |
+
dataset=dataset,
|
64 |
+
dataset_dir=dataset_dir,
|
65 |
+
template=template,
|
66 |
+
cutoff_len=cutoff_len,
|
67 |
+
max_samples=max_samples,
|
68 |
+
train_on_prompt=train_on_prompt,
|
69 |
+
output_dir="dummy_dir",
|
70 |
+
overwrite_cache=True,
|
71 |
+
)
|
72 |
+
)
|
73 |
+
tokenizer_module = load_tokenizer(model_args)
|
74 |
+
tokenizer = tokenizer_module["tokenizer"]
|
75 |
+
trainset = get_dataset(model_args, data_args, training_args, stage, **tokenizer_module)
|
76 |
+
model = load_model(tokenizer, model_args, finetuning_args, is_trainable=False)
|
77 |
+
if stage == "pt":
|
78 |
+
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
|
79 |
+
elif stage == "sft":
|
80 |
+
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, label_pad_token_id=IGNORE_INDEX)
|
81 |
+
elif stage == "rm":
|
82 |
+
data_collator = PairwiseDataCollatorWithPadding(
|
83 |
+
tokenizer=tokenizer, label_pad_token_id=IGNORE_INDEX, train_on_prompt=train_on_prompt
|
84 |
+
)
|
85 |
+
else:
|
86 |
+
raise NotImplementedError
|
87 |
+
|
88 |
+
dataloader = DataLoader(trainset, batch_size, shuffle=False, collate_fn=data_collator, pin_memory=True)
|
89 |
+
criterion = torch.nn.CrossEntropyLoss(reduction="none")
|
90 |
+
total_ppl = 0
|
91 |
+
perplexities = []
|
92 |
+
batch: Dict[str, "torch.Tensor"]
|
93 |
+
with torch.no_grad():
|
94 |
+
for batch in tqdm(dataloader):
|
95 |
+
batch = batch.to(model.device)
|
96 |
+
outputs = model(**batch)
|
97 |
+
shift_logits: "torch.Tensor" = outputs["logits"][..., :-1, :]
|
98 |
+
shift_labels: "torch.Tensor" = batch["labels"][..., 1:]
|
99 |
+
loss_mask = shift_labels != IGNORE_INDEX
|
100 |
+
flatten_logits = shift_logits.contiguous().view(shift_labels.size(0) * shift_labels.size(1), -1)
|
101 |
+
flatten_labels = shift_labels.contiguous().view(-1)
|
102 |
+
token_logps: "torch.Tensor" = criterion(flatten_logits, flatten_labels)
|
103 |
+
token_logps = token_logps.contiguous().view(shift_logits.size(0), -1)
|
104 |
+
sentence_logps = (token_logps * loss_mask).sum(-1) / loss_mask.sum(-1)
|
105 |
+
total_ppl += sentence_logps.exp().sum().item()
|
106 |
+
perplexities.extend(sentence_logps.exp().tolist())
|
107 |
+
|
108 |
+
with open(save_name, "w", encoding="utf-8") as f:
|
109 |
+
json.dump(perplexities, f, indent=2)
|
110 |
+
|
111 |
+
print("Average perplexity is {:.2f}".format(total_ppl / len(perplexities)))
|
112 |
+
print("Perplexities have been saved at {}.".format(save_name))
|
113 |
+
|
114 |
+
|
115 |
+
if __name__ == "__main__":
|
116 |
+
fire.Fire(cal_ppl)
|
length_cdf.py
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Calculates the distribution of the input lengths in the dataset.
|
3 |
+
# Usage: python length_cdf.py --model_name_or_path path_to_model --dataset alpaca_en --template default
|
4 |
+
|
5 |
+
from collections import defaultdict
|
6 |
+
|
7 |
+
import fire
|
8 |
+
from tqdm import tqdm
|
9 |
+
|
10 |
+
from llmtuner.data import get_dataset
|
11 |
+
from llmtuner.hparams import get_train_args
|
12 |
+
from llmtuner.model import load_tokenizer
|
13 |
+
|
14 |
+
|
15 |
+
def length_cdf(
|
16 |
+
model_name_or_path: str,
|
17 |
+
dataset: str = "alpaca_en",
|
18 |
+
dataset_dir: str = "data",
|
19 |
+
template: str = "default",
|
20 |
+
interval: int = 1000,
|
21 |
+
):
|
22 |
+
model_args, data_args, training_args, _, _ = get_train_args(
|
23 |
+
dict(
|
24 |
+
stage="sft",
|
25 |
+
model_name_or_path=model_name_or_path,
|
26 |
+
dataset=dataset,
|
27 |
+
dataset_dir=dataset_dir,
|
28 |
+
template=template,
|
29 |
+
cutoff_len=1_000_000,
|
30 |
+
output_dir="dummy_dir",
|
31 |
+
overwrite_cache=True,
|
32 |
+
)
|
33 |
+
)
|
34 |
+
tokenizer_module = load_tokenizer(model_args)
|
35 |
+
trainset = get_dataset(model_args, data_args, training_args, stage="sft", **tokenizer_module)
|
36 |
+
total_num = len(trainset)
|
37 |
+
length_dict = defaultdict(int)
|
38 |
+
for sample in tqdm(trainset["input_ids"]):
|
39 |
+
length_dict[len(sample) // interval * interval] += 1
|
40 |
+
|
41 |
+
length_tuples = list(length_dict.items())
|
42 |
+
length_tuples.sort()
|
43 |
+
count_accu, prob_accu = 0, 0
|
44 |
+
for length, count in length_tuples:
|
45 |
+
count_accu += count
|
46 |
+
prob_accu += count / total_num * 100
|
47 |
+
print("{:d} ({:.2f}%) samples have length < {}.".format(count_accu, prob_accu, length + interval))
|
48 |
+
|
49 |
+
|
50 |
+
if __name__ == "__main__":
|
51 |
+
fire.Fire(length_cdf)
|
llama_pro.py
ADDED
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Performs block expansion for LLaMA, Mistral or Qwen1.5 models.
|
3 |
+
# Usage: python llama_pro.py --model_name_or_path meta-llama/Llama-2-7b-hf --output_dir llama2_pro --num_expand 8
|
4 |
+
# Inspired by: https://github.com/TencentARC/LLaMA-Pro/blob/main/scripts/block_expansion.py
|
5 |
+
|
6 |
+
import json
|
7 |
+
import os
|
8 |
+
from collections import OrderedDict
|
9 |
+
from typing import TYPE_CHECKING, Optional
|
10 |
+
|
11 |
+
import fire
|
12 |
+
import torch
|
13 |
+
from safetensors.torch import save_file
|
14 |
+
from tqdm import tqdm
|
15 |
+
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
|
16 |
+
from transformers.modeling_utils import (
|
17 |
+
SAFE_WEIGHTS_INDEX_NAME,
|
18 |
+
SAFE_WEIGHTS_NAME,
|
19 |
+
WEIGHTS_INDEX_NAME,
|
20 |
+
WEIGHTS_NAME,
|
21 |
+
shard_checkpoint,
|
22 |
+
)
|
23 |
+
|
24 |
+
|
25 |
+
if TYPE_CHECKING:
|
26 |
+
from transformers import PretrainedConfig, PreTrainedModel
|
27 |
+
|
28 |
+
|
29 |
+
def change_name(name: str, old_index: int, new_index: int) -> str:
|
30 |
+
return name.replace(".{:d}.".format(old_index), ".{:d}.".format(new_index))
|
31 |
+
|
32 |
+
|
33 |
+
def block_expansion(
|
34 |
+
model_name_or_path: str,
|
35 |
+
output_dir: str,
|
36 |
+
num_expand: int,
|
37 |
+
shard_size: Optional[str] = "2GB",
|
38 |
+
save_safetensors: Optional[bool] = False,
|
39 |
+
):
|
40 |
+
config: "PretrainedConfig" = AutoConfig.from_pretrained(model_name_or_path)
|
41 |
+
num_layers = getattr(config, "num_hidden_layers")
|
42 |
+
setattr(config, "num_hidden_layers", num_layers + num_expand)
|
43 |
+
config.save_pretrained(output_dir)
|
44 |
+
|
45 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
|
46 |
+
tokenizer.save_pretrained(output_dir)
|
47 |
+
|
48 |
+
config: "PretrainedConfig" = AutoConfig.from_pretrained(model_name_or_path) # load the original one
|
49 |
+
if save_safetensors:
|
50 |
+
setattr(config, "tie_word_embeddings", False) # safetensors does not allow shared weights
|
51 |
+
|
52 |
+
model: "PreTrainedModel" = AutoModelForCausalLM.from_pretrained(
|
53 |
+
model_name_or_path,
|
54 |
+
config=config,
|
55 |
+
torch_dtype="auto",
|
56 |
+
trust_remote_code=True,
|
57 |
+
low_cpu_mem_usage=True,
|
58 |
+
)
|
59 |
+
state_dict = model.state_dict()
|
60 |
+
|
61 |
+
if num_layers % num_expand != 0:
|
62 |
+
raise ValueError("`num_layers` {} should be divisible by `num_expand` {}.".format(num_layers, num_expand))
|
63 |
+
|
64 |
+
split = num_layers // num_expand
|
65 |
+
layer_cnt = 0
|
66 |
+
output_state_dict = OrderedDict()
|
67 |
+
for i in range(num_layers):
|
68 |
+
for key, value in state_dict.items():
|
69 |
+
if ".{:d}.".format(i) in key:
|
70 |
+
output_state_dict[change_name(key, i, layer_cnt)] = value
|
71 |
+
|
72 |
+
print("Add layer {} copied from layer {}".format(layer_cnt, i))
|
73 |
+
layer_cnt += 1
|
74 |
+
if (i + 1) % split == 0:
|
75 |
+
for key, value in state_dict.items():
|
76 |
+
if ".{:d}.".format(i) in key:
|
77 |
+
if "down_proj" in key or "o_proj" in key:
|
78 |
+
output_state_dict[change_name(key, i, layer_cnt)] = torch.zeros_like(value)
|
79 |
+
else:
|
80 |
+
output_state_dict[change_name(key, i, layer_cnt)] = torch.clone(value)
|
81 |
+
|
82 |
+
print("Add layer {} expanded from layer {}".format(layer_cnt, i))
|
83 |
+
layer_cnt += 1
|
84 |
+
|
85 |
+
for key, value in state_dict.items():
|
86 |
+
if key not in output_state_dict:
|
87 |
+
output_state_dict[key] = value
|
88 |
+
|
89 |
+
weights_name = SAFE_WEIGHTS_NAME if save_safetensors else WEIGHTS_NAME
|
90 |
+
shards, index = shard_checkpoint(output_state_dict, max_shard_size=shard_size, weights_name=weights_name)
|
91 |
+
|
92 |
+
for shard_file, shard in tqdm(shards.items(), desc="Save weights"):
|
93 |
+
if save_safetensors:
|
94 |
+
save_file(shard, os.path.join(output_dir, shard_file), metadata={"format": "pt"})
|
95 |
+
else:
|
96 |
+
torch.save(shard, os.path.join(output_dir, shard_file))
|
97 |
+
|
98 |
+
if index is None:
|
99 |
+
print("Model weights saved in {}".format(os.path.join(output_dir, weights_name)))
|
100 |
+
else:
|
101 |
+
index_name = SAFE_WEIGHTS_INDEX_NAME if save_safetensors else WEIGHTS_INDEX_NAME
|
102 |
+
with open(os.path.join(output_dir, index_name), "w", encoding="utf-8") as f:
|
103 |
+
json.dump(index, f, indent=2, sort_keys=True)
|
104 |
+
print("Model weights saved in {}".format(output_dir))
|
105 |
+
|
106 |
+
print("Fine-tune this model with:")
|
107 |
+
print(" --model_name_or_path {} \\".format(output_dir))
|
108 |
+
print(" --finetuning_type freeze \\")
|
109 |
+
print(" --name_module_trainable all \\")
|
110 |
+
print(" --num_layer_trainable {} \\".format(num_expand))
|
111 |
+
print(" --use_llama_pro")
|
112 |
+
|
113 |
+
|
114 |
+
if __name__ == "__main__":
|
115 |
+
fire.Fire(block_expansion)
|
llamafy_baichuan2.py
ADDED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Converts the Baichuan2-7B model in the same format as LLaMA2-7B.
|
3 |
+
# Usage: python llamafy_baichuan2.py --input_dir input --output_dir output
|
4 |
+
# Inspired by: https://huggingface.co/fireballoon/baichuan-llama-7b/blob/main/convert_baichuan_to_llama.py
|
5 |
+
# Converted model: https://huggingface.co/hiyouga/Baichuan2-7B-Base-LLaMAfied
|
6 |
+
|
7 |
+
import json
|
8 |
+
import os
|
9 |
+
from collections import OrderedDict
|
10 |
+
from typing import Any, Dict, Optional
|
11 |
+
|
12 |
+
import fire
|
13 |
+
import torch
|
14 |
+
from safetensors.torch import save_file
|
15 |
+
from tqdm import tqdm
|
16 |
+
from transformers.modeling_utils import (
|
17 |
+
SAFE_WEIGHTS_INDEX_NAME,
|
18 |
+
SAFE_WEIGHTS_NAME,
|
19 |
+
WEIGHTS_INDEX_NAME,
|
20 |
+
WEIGHTS_NAME,
|
21 |
+
shard_checkpoint,
|
22 |
+
)
|
23 |
+
|
24 |
+
|
25 |
+
CONFIG_NAME = "config.json"
|
26 |
+
|
27 |
+
|
28 |
+
def save_weight(input_dir: str, output_dir: str, shard_size: str, save_safetensors: bool):
|
29 |
+
baichuan2_state_dict: Dict[str, torch.Tensor] = OrderedDict()
|
30 |
+
for filepath in tqdm(os.listdir(input_dir), desc="Load weights"):
|
31 |
+
if os.path.isfile(os.path.join(input_dir, filepath)) and filepath.endswith(".bin"):
|
32 |
+
shard_weight = torch.load(os.path.join(input_dir, filepath), map_location="cpu")
|
33 |
+
baichuan2_state_dict.update(shard_weight)
|
34 |
+
|
35 |
+
llama2_state_dict: Dict[str, torch.Tensor] = OrderedDict()
|
36 |
+
for key, value in tqdm(baichuan2_state_dict.items(), desc="Convert format"):
|
37 |
+
if "W_pack" in key:
|
38 |
+
proj_size = value.size(0) // 3
|
39 |
+
llama2_state_dict[key.replace("W_pack", "q_proj")] = value[:proj_size, :]
|
40 |
+
llama2_state_dict[key.replace("W_pack", "k_proj")] = value[proj_size : 2 * proj_size, :]
|
41 |
+
llama2_state_dict[key.replace("W_pack", "v_proj")] = value[2 * proj_size :, :]
|
42 |
+
elif "lm_head" in key:
|
43 |
+
llama2_state_dict[key] = torch.nn.functional.normalize(value)
|
44 |
+
else:
|
45 |
+
llama2_state_dict[key] = value
|
46 |
+
|
47 |
+
weights_name = SAFE_WEIGHTS_NAME if save_safetensors else WEIGHTS_NAME
|
48 |
+
shards, index = shard_checkpoint(llama2_state_dict, max_shard_size=shard_size, weights_name=weights_name)
|
49 |
+
|
50 |
+
for shard_file, shard in tqdm(shards.items(), desc="Save weights"):
|
51 |
+
if save_safetensors:
|
52 |
+
save_file(shard, os.path.join(output_dir, shard_file), metadata={"format": "pt"})
|
53 |
+
else:
|
54 |
+
torch.save(shard, os.path.join(output_dir, shard_file))
|
55 |
+
|
56 |
+
if index is None:
|
57 |
+
print("Model weights saved in {}".format(os.path.join(output_dir, WEIGHTS_NAME)))
|
58 |
+
else:
|
59 |
+
index_name = SAFE_WEIGHTS_INDEX_NAME if save_safetensors else WEIGHTS_INDEX_NAME
|
60 |
+
with open(os.path.join(output_dir, index_name), "w", encoding="utf-8") as f:
|
61 |
+
json.dump(index, f, indent=2, sort_keys=True)
|
62 |
+
print("Model weights saved in {}".format(output_dir))
|
63 |
+
|
64 |
+
|
65 |
+
def save_config(input_dir: str, output_dir: str):
|
66 |
+
with open(os.path.join(input_dir, CONFIG_NAME), "r", encoding="utf-8") as f:
|
67 |
+
llama2_config_dict: Dict[str, Any] = json.load(f)
|
68 |
+
|
69 |
+
llama2_config_dict["architectures"] = ["LlamaForCausalLM"]
|
70 |
+
llama2_config_dict.pop("auto_map", None)
|
71 |
+
llama2_config_dict.pop("tokenizer_class", None)
|
72 |
+
llama2_config_dict["model_type"] = "llama"
|
73 |
+
|
74 |
+
with open(os.path.join(output_dir, CONFIG_NAME), "w", encoding="utf-8") as f:
|
75 |
+
json.dump(llama2_config_dict, f, indent=2)
|
76 |
+
print("Model config saved in {}".format(os.path.join(output_dir, CONFIG_NAME)))
|
77 |
+
|
78 |
+
|
79 |
+
def llamafy_baichuan2(
|
80 |
+
input_dir: str, output_dir: str, shard_size: Optional[str] = "2GB", save_safetensors: Optional[bool] = False
|
81 |
+
):
|
82 |
+
try:
|
83 |
+
os.makedirs(output_dir, exist_ok=False)
|
84 |
+
except Exception as e:
|
85 |
+
raise print("Output dir already exists", e)
|
86 |
+
|
87 |
+
save_weight(input_dir, output_dir, shard_size, save_safetensors)
|
88 |
+
save_config(input_dir, output_dir)
|
89 |
+
|
90 |
+
|
91 |
+
if __name__ == "__main__":
|
92 |
+
fire.Fire(llamafy_baichuan2)
|
llamafy_qwen.py
ADDED
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Converts the Qwen models in the same format as LLaMA2.
|
3 |
+
# Usage: python llamafy_qwen.py --input_dir input --output_dir output
|
4 |
+
# Converted model: https://huggingface.co/hiyouga/Qwen-14B-Chat-LLaMAfied
|
5 |
+
|
6 |
+
import json
|
7 |
+
import os
|
8 |
+
from collections import OrderedDict
|
9 |
+
from typing import Any, Dict, Optional
|
10 |
+
|
11 |
+
import fire
|
12 |
+
import torch
|
13 |
+
from safetensors import safe_open
|
14 |
+
from safetensors.torch import save_file
|
15 |
+
from tqdm import tqdm
|
16 |
+
from transformers.modeling_utils import (
|
17 |
+
SAFE_WEIGHTS_INDEX_NAME,
|
18 |
+
SAFE_WEIGHTS_NAME,
|
19 |
+
WEIGHTS_INDEX_NAME,
|
20 |
+
WEIGHTS_NAME,
|
21 |
+
shard_checkpoint,
|
22 |
+
)
|
23 |
+
from transformers.utils import check_min_version
|
24 |
+
|
25 |
+
|
26 |
+
try:
|
27 |
+
check_min_version("4.34.0")
|
28 |
+
except Exception:
|
29 |
+
raise ValueError("Please upgrade `transformers` to 4.34.0")
|
30 |
+
|
31 |
+
|
32 |
+
CONFIG_NAME = "config.json"
|
33 |
+
|
34 |
+
|
35 |
+
def save_weight(input_dir: str, output_dir: str, shard_size: str, save_safetensors: bool) -> str:
|
36 |
+
qwen_state_dict: Dict[str, torch.Tensor] = OrderedDict()
|
37 |
+
for filepath in tqdm(os.listdir(input_dir), desc="Load weights"):
|
38 |
+
if os.path.isfile(os.path.join(input_dir, filepath)) and filepath.endswith(".safetensors"):
|
39 |
+
with safe_open(os.path.join(input_dir, filepath), framework="pt", device="cpu") as f:
|
40 |
+
for key in f.keys():
|
41 |
+
qwen_state_dict[key] = f.get_tensor(key)
|
42 |
+
|
43 |
+
llama2_state_dict: Dict[str, torch.Tensor] = OrderedDict()
|
44 |
+
torch_dtype = None
|
45 |
+
for key, value in tqdm(qwen_state_dict.items(), desc="Convert format"):
|
46 |
+
if torch_dtype is None:
|
47 |
+
torch_dtype = value.dtype
|
48 |
+
if "wte" in key:
|
49 |
+
llama2_state_dict["model.embed_tokens.weight"] = value
|
50 |
+
elif "ln_f" in key:
|
51 |
+
llama2_state_dict["model.norm.weight"] = value
|
52 |
+
else:
|
53 |
+
key = key.replace("transformer.h", "model.layers")
|
54 |
+
if "attn.c_attn" in key:
|
55 |
+
proj_size = value.size(0) // 3
|
56 |
+
llama2_state_dict[key.replace("attn.c_attn", "self_attn.q_proj")] = value[:proj_size, ...]
|
57 |
+
llama2_state_dict[key.replace("attn.c_attn", "self_attn.k_proj")] = value[
|
58 |
+
proj_size : 2 * proj_size, ...
|
59 |
+
]
|
60 |
+
llama2_state_dict[key.replace("attn.c_attn", "self_attn.v_proj")] = value[2 * proj_size :, ...]
|
61 |
+
elif "attn.c_proj" in key:
|
62 |
+
llama2_state_dict[key.replace("attn.c_proj", "self_attn.o_proj")] = value
|
63 |
+
llama2_state_dict[key.replace("attn.c_proj.weight", "self_attn.o_proj.bias")] = torch.zeros_like(
|
64 |
+
value[:, 0]
|
65 |
+
).squeeze()
|
66 |
+
elif "ln_1" in key:
|
67 |
+
llama2_state_dict[key.replace("ln_1", "input_layernorm")] = value
|
68 |
+
elif "ln_2" in key:
|
69 |
+
llama2_state_dict[key.replace("ln_2", "post_attention_layernorm")] = value
|
70 |
+
elif "mlp.w1" in key:
|
71 |
+
llama2_state_dict[key.replace("mlp.w1", "mlp.up_proj")] = value
|
72 |
+
elif "mlp.w2" in key:
|
73 |
+
llama2_state_dict[key.replace("mlp.w2", "mlp.gate_proj")] = value
|
74 |
+
elif "mlp.c_proj" in key:
|
75 |
+
llama2_state_dict[key.replace("mlp.c_proj", "mlp.down_proj")] = value
|
76 |
+
elif "lm_head" in key:
|
77 |
+
llama2_state_dict[key] = value
|
78 |
+
else:
|
79 |
+
raise KeyError("Unable to process key {}".format(key))
|
80 |
+
|
81 |
+
weights_name = SAFE_WEIGHTS_NAME if save_safetensors else WEIGHTS_NAME
|
82 |
+
shards, index = shard_checkpoint(llama2_state_dict, max_shard_size=shard_size, weights_name=weights_name)
|
83 |
+
|
84 |
+
for shard_file, shard in tqdm(shards.items(), desc="Save weights"):
|
85 |
+
if save_safetensors:
|
86 |
+
save_file(shard, os.path.join(output_dir, shard_file), metadata={"format": "pt"})
|
87 |
+
else:
|
88 |
+
torch.save(shard, os.path.join(output_dir, shard_file))
|
89 |
+
|
90 |
+
if index is None:
|
91 |
+
print("Model weights saved in {}".format(os.path.join(output_dir, weights_name)))
|
92 |
+
else:
|
93 |
+
index_name = SAFE_WEIGHTS_INDEX_NAME if save_safetensors else WEIGHTS_INDEX_NAME
|
94 |
+
with open(os.path.join(output_dir, index_name), "w", encoding="utf-8") as f:
|
95 |
+
json.dump(index, f, indent=2, sort_keys=True)
|
96 |
+
print("Model weights saved in {}".format(output_dir))
|
97 |
+
|
98 |
+
return str(torch_dtype).replace("torch.", "")
|
99 |
+
|
100 |
+
|
101 |
+
def save_config(input_dir: str, output_dir: str, torch_dtype: str):
|
102 |
+
with open(os.path.join(input_dir, CONFIG_NAME), "r", encoding="utf-8") as f:
|
103 |
+
qwen_config_dict: Dict[str, Any] = json.load(f)
|
104 |
+
|
105 |
+
llama2_config_dict: Dict[str, Any] = OrderedDict()
|
106 |
+
llama2_config_dict["architectures"] = ["LlamaForCausalLM"]
|
107 |
+
llama2_config_dict["hidden_act"] = "silu"
|
108 |
+
llama2_config_dict["hidden_size"] = qwen_config_dict["hidden_size"]
|
109 |
+
llama2_config_dict["initializer_range"] = qwen_config_dict["initializer_range"]
|
110 |
+
llama2_config_dict["intermediate_size"] = qwen_config_dict["intermediate_size"] // 2
|
111 |
+
llama2_config_dict["max_position_embeddings"] = qwen_config_dict["max_position_embeddings"]
|
112 |
+
llama2_config_dict["model_type"] = "llama"
|
113 |
+
llama2_config_dict["num_attention_heads"] = qwen_config_dict["num_attention_heads"]
|
114 |
+
llama2_config_dict["num_hidden_layers"] = qwen_config_dict["num_hidden_layers"]
|
115 |
+
llama2_config_dict["num_key_value_heads"] = qwen_config_dict["hidden_size"] // qwen_config_dict["kv_channels"]
|
116 |
+
llama2_config_dict["pretraining_tp"] = 1
|
117 |
+
llama2_config_dict["rms_norm_eps"] = qwen_config_dict["layer_norm_epsilon"]
|
118 |
+
llama2_config_dict["rope_scaling"] = None
|
119 |
+
llama2_config_dict["tie_word_embeddings"] = qwen_config_dict["tie_word_embeddings"]
|
120 |
+
llama2_config_dict["torch_dtype"] = torch_dtype
|
121 |
+
llama2_config_dict["transformers_version"] = "4.34.0"
|
122 |
+
llama2_config_dict["use_cache"] = True
|
123 |
+
llama2_config_dict["vocab_size"] = qwen_config_dict["vocab_size"]
|
124 |
+
llama2_config_dict["attention_bias"] = True
|
125 |
+
|
126 |
+
with open(os.path.join(output_dir, CONFIG_NAME), "w", encoding="utf-8") as f:
|
127 |
+
json.dump(llama2_config_dict, f, indent=2)
|
128 |
+
print("Model config saved in {}".format(os.path.join(output_dir, CONFIG_NAME)))
|
129 |
+
|
130 |
+
|
131 |
+
def llamafy_qwen(
|
132 |
+
input_dir: str, output_dir: str, shard_size: Optional[str] = "2GB", save_safetensors: Optional[bool] = False
|
133 |
+
):
|
134 |
+
try:
|
135 |
+
os.makedirs(output_dir, exist_ok=False)
|
136 |
+
except Exception as e:
|
137 |
+
raise print("Output dir already exists", e)
|
138 |
+
|
139 |
+
torch_dtype = save_weight(input_dir, output_dir, shard_size, save_safetensors)
|
140 |
+
save_config(input_dir, output_dir, torch_dtype)
|
141 |
+
|
142 |
+
|
143 |
+
if __name__ == "__main__":
|
144 |
+
fire.Fire(llamafy_qwen)
|
loftq_init.py
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Initializes LoRA weights with LoRA-fine-tuning-aware Quantization (LoftQ)
|
3 |
+
# Usage: python loftq_init.py --model_name_or_path path_to_model --save_dir output_dir
|
4 |
+
# Inspired by: https://github.com/huggingface/peft/blob/main/examples/loftq_finetuning/quantize_save_load.py
|
5 |
+
|
6 |
+
import os
|
7 |
+
from typing import TYPE_CHECKING, Optional
|
8 |
+
|
9 |
+
import fire
|
10 |
+
import torch
|
11 |
+
import torch.nn as nn
|
12 |
+
from peft import LoftQConfig, LoraConfig, TaskType, get_peft_model
|
13 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
14 |
+
|
15 |
+
|
16 |
+
if TYPE_CHECKING:
|
17 |
+
from transformers import PreTrainedModel
|
18 |
+
|
19 |
+
|
20 |
+
class Shell(nn.Module):
|
21 |
+
def __init__(self, weight: torch.Tensor, bias: Optional[torch.Tensor] = None):
|
22 |
+
super().__init__()
|
23 |
+
self.weight = nn.Parameter(weight, requires_grad=False)
|
24 |
+
if bias is not None:
|
25 |
+
self.bias = nn.Parameter(bias, requires_grad=False)
|
26 |
+
|
27 |
+
|
28 |
+
def unwrap_model(model: nn.Module, pattern=".base_layer") -> None:
|
29 |
+
for name in {k.split(pattern)[0] for k, _ in model.named_modules() if pattern in k}:
|
30 |
+
parent_name = ".".join(name.split(".")[:-1])
|
31 |
+
child_name = name.split(".")[-1]
|
32 |
+
parent_module = model.get_submodule(parent_name)
|
33 |
+
child_module = getattr(parent_module, child_name)
|
34 |
+
base_layer = getattr(child_module, "base_layer")
|
35 |
+
weight = getattr(base_layer, "weight", None)
|
36 |
+
bias = getattr(base_layer, "bias", None)
|
37 |
+
setattr(parent_module, child_name, Shell(weight, bias))
|
38 |
+
|
39 |
+
print("Model unwrapped.")
|
40 |
+
|
41 |
+
|
42 |
+
def quantize_loftq(
|
43 |
+
model_name_or_path: str,
|
44 |
+
save_dir: str,
|
45 |
+
loftq_bits: Optional[int] = 4,
|
46 |
+
loftq_iter: Optional[int] = 1,
|
47 |
+
lora_alpha: Optional[int] = None,
|
48 |
+
lora_rank: Optional[int] = 16,
|
49 |
+
lora_target: Optional[str] = "q_proj,v_proj",
|
50 |
+
save_safetensors: Optional[bool] = False,
|
51 |
+
):
|
52 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True)
|
53 |
+
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, trust_remote_code=True, torch_dtype="auto")
|
54 |
+
loftq_config = LoftQConfig(loftq_bits=loftq_bits, loftq_iter=loftq_iter)
|
55 |
+
lora_config = LoraConfig(
|
56 |
+
task_type=TaskType.CAUSAL_LM,
|
57 |
+
inference_mode=True,
|
58 |
+
r=lora_rank,
|
59 |
+
lora_alpha=lora_alpha if lora_alpha is not None else lora_rank * 2,
|
60 |
+
lora_dropout=0.1,
|
61 |
+
target_modules=[name.strip() for name in lora_target.split(",")],
|
62 |
+
init_lora_weights="loftq",
|
63 |
+
loftq_config=loftq_config,
|
64 |
+
)
|
65 |
+
|
66 |
+
# Init LoftQ model
|
67 |
+
lora_model = get_peft_model(model, lora_config)
|
68 |
+
base_model: "PreTrainedModel" = lora_model.get_base_model()
|
69 |
+
|
70 |
+
# Save LoftQ model
|
71 |
+
setattr(lora_model.base_model.peft_config["default"], "base_model_name_or_path", save_dir)
|
72 |
+
setattr(lora_model.base_model.peft_config["default"], "init_lora_weights", True)
|
73 |
+
lora_model.save_pretrained(os.path.join(save_dir, "adapters"), safe_serialization=save_safetensors)
|
74 |
+
|
75 |
+
# Save base model
|
76 |
+
unwrap_model(base_model)
|
77 |
+
base_model.save_pretrained(save_dir, safe_serialization=save_safetensors)
|
78 |
+
tokenizer.save_pretrained(save_dir)
|
79 |
+
|
80 |
+
|
81 |
+
if __name__ == "__main__":
|
82 |
+
fire.Fire(quantize_loftq)
|