laizeqiang commited on
Commit
1d63199
1 Parent(s): 70444e2
iGPT/models/husky.py CHANGED
@@ -66,13 +66,10 @@ def load_model(
66
  ):
67
  kwargs = {"torch_dtype": torch.float16}
68
 
69
- if not os.path.exists(model_path[1]):
70
- apply_delta(model_path[0], model_path[1], model_path[2])
71
-
72
  tokenizer = AutoTokenizer.from_pretrained(
73
- model_path[1], use_fast=False)
74
  model = Blip2LlaMAForConditionalGeneration.from_pretrained(
75
- model_path[1], low_cpu_mem_usage=True, **kwargs
76
  )
77
 
78
  if load_8bit:
@@ -337,12 +334,41 @@ class Chat:
337
  else:
338
  self.conv = get_default_conv_template(self.model_path).copy()
339
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
340
  class HuskyVQA:
341
  def __init__(
342
  self,
343
  device
344
  ):
345
- model_path=["model_zoo/llama-7b-hf", "model_zoo/husky-7b-v0_01", 'model_zoo/husky-7b-delta-v0_01']
 
 
 
 
346
  load_8bit=True
347
  max_new_tokens=512
348
  self.chat = Chat(
66
  ):
67
  kwargs = {"torch_dtype": torch.float16}
68
 
 
 
 
69
  tokenizer = AutoTokenizer.from_pretrained(
70
+ model_path, use_fast=False)
71
  model = Blip2LlaMAForConditionalGeneration.from_pretrained(
72
+ model_path, low_cpu_mem_usage=True, **kwargs
73
  )
74
 
75
  if load_8bit:
334
  else:
335
  self.conv = get_default_conv_template(self.model_path).copy()
336
 
337
+
338
+ def download_if_not_exists(base_path, delta_path, new_path):
339
+ if os.path.exists(new_path):
340
+ return
341
+
342
+ if not os.path.exists(base_path):
343
+ # download if not exists
344
+ os.system('bash third-party/llama_download.sh')
345
+
346
+ output_dir = os.path.join(os.path.dirname(base_path), 'llama_7B_hf')
347
+
348
+ if not os.path.exists(output_dir):
349
+ # convert to hf format if not exists
350
+ from .husky_src.convert_llama_weights_to_hf import write_model, write_tokenizer
351
+ write_model(
352
+ model_path=output_dir,
353
+ input_base_path=os.path.join(base_path, '7B'),
354
+ model_size="7B",
355
+ )
356
+ spm_path = os.path.join(base_path, "tokenizer.model")
357
+ write_tokenizer(output_dir, spm_path)
358
+
359
+ apply_delta(output_dir, new_path, delta_path)
360
+
361
+
362
  class HuskyVQA:
363
  def __init__(
364
  self,
365
  device
366
  ):
367
+ model_path = 'model_zoo/husky-7b-v0_01'
368
+ download_if_not_exists(base_path="model_zoo/llama",
369
+ delta_path="model_zoo/husky-7b-delta-v0_01",
370
+ new_path=model_path)
371
+
372
  load_8bit=True
373
  max_new_tokens=512
374
  self.chat = Chat(
iGPT/models/husky_src/convert_llama_weights_to_hf.py ADDED
@@ -0,0 +1,281 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2022 EleutherAI and The HuggingFace Inc. team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ # https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/convert_llama_weights_to_hf.py
16
+
17
+ import argparse
18
+ import gc
19
+ import json
20
+ import math
21
+ import os
22
+ import shutil
23
+ import warnings
24
+
25
+ import torch
26
+
27
+ from transformers import LlamaConfig, LlamaForCausalLM, LlamaTokenizer
28
+
29
+
30
+ try:
31
+ from transformers import LlamaTokenizerFast
32
+ except ImportError as e:
33
+ warnings.warn(e)
34
+ warnings.warn(
35
+ "The converted tokenizer will be the `slow` tokenizer. To use the fast, update your `tokenizers` library and re-run the tokenizer conversion"
36
+ )
37
+ LlamaTokenizerFast = None
38
+
39
+ """
40
+ Sample usage:
41
+
42
+ ```
43
+ python src/transformers/models/llama/convert_llama_weights_to_hf.py \
44
+ --input_dir /path/to/downloaded/llama/weights --model_size 7B --output_dir /output/path
45
+ ```
46
+
47
+ Thereafter, models can be loaded via:
48
+
49
+ ```py
50
+ from transformers import LlamaForCausalLM, LlamaTokenizer
51
+
52
+ model = LlamaForCausalLM.from_pretrained("/output/path")
53
+ tokenizer = LlamaTokenizer.from_pretrained("/output/path")
54
+ ```
55
+
56
+ Important note: you need to be able to host the whole model in RAM to execute this script (even if the biggest versions
57
+ come in several checkpoints they each contain a part of each weight of the model, so we need to load them all in RAM).
58
+ """
59
+
60
+ INTERMEDIATE_SIZE_MAP = {
61
+ "7B": 11008,
62
+ "13B": 13824,
63
+ "30B": 17920,
64
+ "65B": 22016,
65
+ }
66
+ NUM_SHARDS = {
67
+ "7B": 1,
68
+ "13B": 2,
69
+ "30B": 4,
70
+ "65B": 8,
71
+ }
72
+
73
+
74
+ def compute_intermediate_size(n):
75
+ return int(math.ceil(n * 8 / 3) + 255) // 256 * 256
76
+
77
+
78
+ def read_json(path):
79
+ with open(path, "r") as f:
80
+ return json.load(f)
81
+
82
+
83
+ def write_json(text, path):
84
+ with open(path, "w") as f:
85
+ json.dump(text, f)
86
+
87
+
88
+ def write_model(model_path, input_base_path, model_size):
89
+ os.makedirs(model_path, exist_ok=True)
90
+ tmp_model_path = os.path.join(model_path, "tmp")
91
+ os.makedirs(tmp_model_path, exist_ok=True)
92
+
93
+ params = read_json(os.path.join(input_base_path, "params.json"))
94
+ num_shards = NUM_SHARDS[model_size]
95
+ n_layers = params["n_layers"]
96
+ n_heads = params["n_heads"]
97
+ n_heads_per_shard = n_heads // num_shards
98
+ dim = params["dim"]
99
+ dims_per_head = dim // n_heads
100
+ base = 10000.0
101
+ inv_freq = 1.0 / (base ** (torch.arange(0, dims_per_head, 2).float() / dims_per_head))
102
+
103
+ # permute for sliced rotary
104
+ def permute(w):
105
+ return w.view(n_heads, dim // n_heads // 2, 2, dim).transpose(1, 2).reshape(dim, dim)
106
+
107
+ print(f"Fetching all parameters from the checkpoint at {input_base_path}.")
108
+ # Load weights
109
+ if model_size == "7B":
110
+ # Not sharded
111
+ # (The sharded implementation would also work, but this is simpler.)
112
+ loaded = torch.load(os.path.join(input_base_path, "consolidated.00.pth"), map_location="cpu")
113
+ else:
114
+ # Sharded
115
+ loaded = [
116
+ torch.load(os.path.join(input_base_path, f"consolidated.{i:02d}.pth"), map_location="cpu")
117
+ for i in range(num_shards)
118
+ ]
119
+ param_count = 0
120
+ index_dict = {"weight_map": {}}
121
+ for layer_i in range(n_layers):
122
+ filename = f"pytorch_model-{layer_i + 1}-of-{n_layers + 1}.bin"
123
+ if model_size == "7B":
124
+ # Unsharded
125
+ state_dict = {
126
+ f"model.layers.{layer_i}.self_attn.q_proj.weight": permute(
127
+ loaded[f"layers.{layer_i}.attention.wq.weight"]
128
+ ),
129
+ f"model.layers.{layer_i}.self_attn.k_proj.weight": permute(
130
+ loaded[f"layers.{layer_i}.attention.wk.weight"]
131
+ ),
132
+ f"model.layers.{layer_i}.self_attn.v_proj.weight": loaded[f"layers.{layer_i}.attention.wv.weight"],
133
+ f"model.layers.{layer_i}.self_attn.o_proj.weight": loaded[f"layers.{layer_i}.attention.wo.weight"],
134
+ f"model.layers.{layer_i}.mlp.gate_proj.weight": loaded[f"layers.{layer_i}.feed_forward.w1.weight"],
135
+ f"model.layers.{layer_i}.mlp.down_proj.weight": loaded[f"layers.{layer_i}.feed_forward.w2.weight"],
136
+ f"model.layers.{layer_i}.mlp.up_proj.weight": loaded[f"layers.{layer_i}.feed_forward.w3.weight"],
137
+ f"model.layers.{layer_i}.input_layernorm.weight": loaded[f"layers.{layer_i}.attention_norm.weight"],
138
+ f"model.layers.{layer_i}.post_attention_layernorm.weight": loaded[f"layers.{layer_i}.ffn_norm.weight"],
139
+ }
140
+ else:
141
+ # Sharded
142
+ # Note that in the 13B checkpoint, not cloning the two following weights will result in the checkpoint
143
+ # becoming 37GB instead of 26GB for some reason.
144
+ state_dict = {
145
+ f"model.layers.{layer_i}.input_layernorm.weight": loaded[0][
146
+ f"layers.{layer_i}.attention_norm.weight"
147
+ ].clone(),
148
+ f"model.layers.{layer_i}.post_attention_layernorm.weight": loaded[0][
149
+ f"layers.{layer_i}.ffn_norm.weight"
150
+ ].clone(),
151
+ }
152
+ state_dict[f"model.layers.{layer_i}.self_attn.q_proj.weight"] = permute(
153
+ torch.cat(
154
+ [
155
+ loaded[i][f"layers.{layer_i}.attention.wq.weight"].view(n_heads_per_shard, dims_per_head, dim)
156
+ for i in range(num_shards)
157
+ ],
158
+ dim=0,
159
+ ).reshape(dim, dim)
160
+ )
161
+ state_dict[f"model.layers.{layer_i}.self_attn.k_proj.weight"] = permute(
162
+ torch.cat(
163
+ [
164
+ loaded[i][f"layers.{layer_i}.attention.wk.weight"].view(n_heads_per_shard, dims_per_head, dim)
165
+ for i in range(num_shards)
166
+ ],
167
+ dim=0,
168
+ ).reshape(dim, dim)
169
+ )
170
+ state_dict[f"model.layers.{layer_i}.self_attn.v_proj.weight"] = torch.cat(
171
+ [
172
+ loaded[i][f"layers.{layer_i}.attention.wv.weight"].view(n_heads_per_shard, dims_per_head, dim)
173
+ for i in range(num_shards)
174
+ ],
175
+ dim=0,
176
+ ).reshape(dim, dim)
177
+
178
+ state_dict[f"model.layers.{layer_i}.self_attn.o_proj.weight"] = torch.cat(
179
+ [loaded[i][f"layers.{layer_i}.attention.wo.weight"] for i in range(num_shards)], dim=1
180
+ )
181
+ state_dict[f"model.layers.{layer_i}.mlp.gate_proj.weight"] = torch.cat(
182
+ [loaded[i][f"layers.{layer_i}.feed_forward.w1.weight"] for i in range(num_shards)], dim=0
183
+ )
184
+ state_dict[f"model.layers.{layer_i}.mlp.down_proj.weight"] = torch.cat(
185
+ [loaded[i][f"layers.{layer_i}.feed_forward.w2.weight"] for i in range(num_shards)], dim=1
186
+ )
187
+ state_dict[f"model.layers.{layer_i}.mlp.up_proj.weight"] = torch.cat(
188
+ [loaded[i][f"layers.{layer_i}.feed_forward.w3.weight"] for i in range(num_shards)], dim=0
189
+ )
190
+
191
+ state_dict[f"model.layers.{layer_i}.self_attn.rotary_emb.inv_freq"] = inv_freq
192
+ for k, v in state_dict.items():
193
+ index_dict["weight_map"][k] = filename
194
+ param_count += v.numel()
195
+ torch.save(state_dict, os.path.join(tmp_model_path, filename))
196
+
197
+ filename = f"pytorch_model-{n_layers + 1}-of-{n_layers + 1}.bin"
198
+ if model_size == "7B":
199
+ # Unsharded
200
+ state_dict = {
201
+ "model.embed_tokens.weight": loaded["tok_embeddings.weight"],
202
+ "model.norm.weight": loaded["norm.weight"],
203
+ "lm_head.weight": loaded["output.weight"],
204
+ }
205
+ else:
206
+ state_dict = {
207
+ "model.norm.weight": loaded[0]["norm.weight"],
208
+ "model.embed_tokens.weight": torch.cat(
209
+ [loaded[i]["tok_embeddings.weight"] for i in range(num_shards)], dim=1
210
+ ),
211
+ "lm_head.weight": torch.cat([loaded[i]["output.weight"] for i in range(num_shards)], dim=0),
212
+ }
213
+
214
+ for k, v in state_dict.items():
215
+ index_dict["weight_map"][k] = filename
216
+ param_count += v.numel()
217
+ torch.save(state_dict, os.path.join(tmp_model_path, filename))
218
+
219
+ # Write configs
220
+ index_dict["metadata"] = {"total_size": param_count * 2}
221
+ write_json(index_dict, os.path.join(tmp_model_path, "pytorch_model.bin.index.json"))
222
+
223
+ config = LlamaConfig(
224
+ hidden_size=dim,
225
+ intermediate_size=compute_intermediate_size(dim),
226
+ num_attention_heads=params["n_heads"],
227
+ num_hidden_layers=params["n_layers"],
228
+ rms_norm_eps=params["norm_eps"],
229
+ )
230
+ config.save_pretrained(tmp_model_path)
231
+
232
+ # Make space so we can load the model properly now.
233
+ del state_dict
234
+ del loaded
235
+ gc.collect()
236
+
237
+ print("Loading the checkpoint in a Llama model.")
238
+ model = LlamaForCausalLM.from_pretrained(tmp_model_path, torch_dtype=torch.float16, low_cpu_mem_usage=True)
239
+ # Avoid saving this as part of the config.
240
+ del model.config._name_or_path
241
+
242
+ print("Saving in the Transformers format.")
243
+ model.save_pretrained(model_path)
244
+ shutil.rmtree(tmp_model_path)
245
+
246
+
247
+ def write_tokenizer(tokenizer_path, input_tokenizer_path):
248
+ # Initialize the tokenizer based on the `spm` model
249
+ tokenizer_class = LlamaTokenizer if LlamaTokenizerFast is None else LlamaTokenizerFast
250
+ print(f"Saving a {tokenizer_class.__name__} to {tokenizer_path}.")
251
+ tokenizer = tokenizer_class(input_tokenizer_path)
252
+ tokenizer.save_pretrained(tokenizer_path)
253
+
254
+
255
+ def main():
256
+ parser = argparse.ArgumentParser()
257
+ parser.add_argument(
258
+ "--input_dir",
259
+ help="Location of LLaMA weights, which contains tokenizer.model and model folders",
260
+ )
261
+ parser.add_argument(
262
+ "--model_size",
263
+ choices=["7B", "13B", "30B", "65B", "tokenizer_only"],
264
+ )
265
+ parser.add_argument(
266
+ "--output_dir",
267
+ help="Location to write HF model and tokenizer",
268
+ )
269
+ args = parser.parse_args()
270
+ if args.model_size != "tokenizer_only":
271
+ write_model(
272
+ model_path=args.output_dir,
273
+ input_base_path=os.path.join(args.input_dir, args.model_size),
274
+ model_size=args.model_size,
275
+ )
276
+ spm_path = os.path.join(args.input_dir, "tokenizer.model")
277
+ write_tokenizer(args.output_dir, spm_path)
278
+
279
+
280
+ if __name__ == "__main__":
281
+ main()
iGPT/models/inpainting.py CHANGED
@@ -8,6 +8,7 @@ from .utils import gen_new_name, prompts
8
  import torch
9
  from omegaconf import OmegaConf
10
  import numpy as np
 
11
  from .inpainting_src.ldm_inpainting.ldm.models.diffusion.ddim import DDIMSampler
12
  from .inpainting_src.ldm_inpainting.ldm.util import instantiate_from_config
13
  from .utils import cal_dilate_factor, dilate_mask
@@ -35,16 +36,22 @@ def make_batch(image, mask, device):
35
 
36
  class LDMInpainting:
37
  def __init__(self, device):
38
- ckpt_path = 'model_zoo/ldm_inpainting_big.ckpt'
39
  config = './iGPT/models/inpainting_src/ldm_inpainting/config.yaml'
40
  self.ddim_steps = 50
41
  self.device = device
42
  config = OmegaConf.load(config)
43
  model = instantiate_from_config(config.model)
44
- model.load_state_dict(torch.load(ckpt_path)["state_dict"], strict=False)
 
45
  self.model = model.to(device=device)
46
  self.sampler = DDIMSampler(model)
47
 
 
 
 
 
 
48
  @prompts(name="Remove the Masked Object",
49
  description="useful when you want to remove an object by masking the region in the image. "
50
  "like: remove masked object or inpaint the masked region.. "
8
  import torch
9
  from omegaconf import OmegaConf
10
  import numpy as np
11
+ import wget
12
  from .inpainting_src.ldm_inpainting.ldm.models.diffusion.ddim import DDIMSampler
13
  from .inpainting_src.ldm_inpainting.ldm.util import instantiate_from_config
14
  from .utils import cal_dilate_factor, dilate_mask
36
 
37
  class LDMInpainting:
38
  def __init__(self, device):
39
+ self.model_checkpoint_path = 'model_zoo/ldm_inpainting_big.ckpt'
40
  config = './iGPT/models/inpainting_src/ldm_inpainting/config.yaml'
41
  self.ddim_steps = 50
42
  self.device = device
43
  config = OmegaConf.load(config)
44
  model = instantiate_from_config(config.model)
45
+ self.download_parameters()
46
+ model.load_state_dict(torch.load(self.model_checkpoint_path)["state_dict"], strict=False)
47
  self.model = model.to(device=device)
48
  self.sampler = DDIMSampler(model)
49
 
50
+ def download_parameters(self):
51
+ url = 'https://heibox.uni-heidelberg.de/f/4d9ac7ea40c64582b7c9/?dl=1'
52
+ if not os.path.exists(self.model_checkpoint_path):
53
+ wget.download(url, out=self.model_checkpoint_path)
54
+
55
  @prompts(name="Remove the Masked Object",
56
  description="useful when you want to remove an object by masking the region in the image. "
57
  "like: remove masked object or inpaint the masked region.. "
iGPT/models/utils.py CHANGED
@@ -38,11 +38,12 @@ def gen_new_name(orginal_name, suffix="update", ext="png"):
38
  name_split = os.path.splitext(filename)[0].split('_')
39
  this_new_uuid = str(uuid.uuid4())[:3]
40
  timestamp = int(math.modf(time.time())[0] * 1000)
41
- if len(name_split) == 1:
42
- prev_file_name = name_split[0]
43
- else:
44
- # assert len(name_split) == 3
45
- prev_file_name = name_split[1]
 
46
  if len(suffix.strip()) == 0:
47
  new_file_name = f'{this_new_uuid}{timestamp:03d}_{prev_file_name}.{ext}'
48
  else:
38
  name_split = os.path.splitext(filename)[0].split('_')
39
  this_new_uuid = str(uuid.uuid4())[:3]
40
  timestamp = int(math.modf(time.time())[0] * 1000)
41
+ prev_file_name = name_split[0]
42
+ # if len(name_split) == 1:
43
+ # prev_file_name = name_split[0]
44
+ # else:
45
+ # # assert len(name_split) == 3
46
+ # prev_file_name = name_split[0]
47
  if len(suffix.strip()) == 0:
48
  new_file_name = f'{this_new_uuid}{timestamp:03d}_{prev_file_name}.{ext}'
49
  else:
requirements.txt CHANGED
@@ -23,4 +23,5 @@ kornia==0.5.0
23
  sentencepiece==0.1.98
24
  accelerate==0.18.0
25
  timm==0.6.13
26
- git+https://github.com/facebookresearch/segment-anything.git
 
23
  sentencepiece==0.1.98
24
  accelerate==0.18.0
25
  timm==0.6.13
26
+ git+https://github.com/facebookresearch/segment-anything.git
27
+ wget
third-party/llama_download.sh ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # This software may be used and distributed according to the terms of the GNU General Public License version 3.
3
+
4
+ PRESIGNED_URL="" # replace with presigned url from email
5
+ MODEL_SIZE="7B" # edit this list with the model sizes you wish to download
6
+ TARGET_FOLDER="model_zoo/llama" # where all files should end up
7
+
8
+ declare -A N_SHARD_DICT
9
+
10
+ N_SHARD_DICT["7B"]="0"
11
+ N_SHARD_DICT["13B"]="1"
12
+ N_SHARD_DICT["30B"]="3"
13
+ N_SHARD_DICT["65B"]="7"
14
+
15
+ echo "Downloading tokenizer"
16
+ wget ${PRESIGNED_URL/'*'/"tokenizer.model"} -O ${TARGET_FOLDER}"/tokenizer.model"
17
+ wget ${PRESIGNED_URL/'*'/"tokenizer_checklist.chk"} -O ${TARGET_FOLDER}"/tokenizer_checklist.chk"
18
+
19
+ (cd ${TARGET_FOLDER} && md5sum -c tokenizer_checklist.chk)
20
+
21
+ for i in ${MODEL_SIZE//,/ }
22
+ do
23
+ echo "Downloading ${i}"
24
+ mkdir -p ${TARGET_FOLDER}"/${i}"
25
+ for s in $(seq -f "0%g" 0 ${N_SHARD_DICT[$i]})
26
+ do
27
+ wget ${PRESIGNED_URL/'*'/"${i}/consolidated.${s}.pth"} -O ${TARGET_FOLDER}"/${i}/consolidated.${s}.pth"
28
+ done
29
+ wget ${PRESIGNED_URL/'*'/"${i}/params.json"} -O ${TARGET_FOLDER}"/${i}/params.json"
30
+ wget ${PRESIGNED_URL/'*'/"${i}/checklist.chk"} -O ${TARGET_FOLDER}"/${i}/checklist.chk"
31
+ echo "Checking checksums"
32
+ (cd ${TARGET_FOLDER}"/${i}" && md5sum -c checklist.chk)
33
+ done