dylanebert HF staff commited on
Commit
6b83d25
1 Parent(s): a283418
README.md CHANGED
@@ -3,10 +3,28 @@ license: openrail
3
  pipeline_tag: image-to-3d
4
  ---
5
 
 
 
6
  This is a duplicate of [ashawkey/imagedream-ipmv-diffusers](https://huggingface.co/ashawkey/imagedream-ipmv-diffusers).
7
 
8
  It is hosted here for the purpose of persistence and reproducibility for the ML for 3D course.
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  Original model card below.
11
 
12
  ---
 
3
  pipeline_tag: image-to-3d
4
  ---
5
 
6
+ # Overview
7
+
8
  This is a duplicate of [ashawkey/imagedream-ipmv-diffusers](https://huggingface.co/ashawkey/imagedream-ipmv-diffusers).
9
 
10
  It is hosted here for the purpose of persistence and reproducibility for the ML for 3D course.
11
 
12
+ ### Usage
13
+
14
+ This project can be used from other projects as follows.
15
+
16
+ ```
17
+ import torch
18
+ from diffusers import DiffusionPipeline
19
+
20
+ pipeline = DiffusionPipeline.from_pretrained(
21
+ "ashawkey/mvdream-sd2.1-diffusers",
22
+ custom_pipeline="dylanebert/multi_view_diffusion",
23
+ torch_dtype=torch.float16,
24
+ trust_remote_code=True,
25
+ )
26
+ ```
27
+
28
  Original model card below.
29
 
30
  ---
convert_mvdream_to_diffusers.py DELETED
@@ -1,597 +0,0 @@
1
- # Modified from https://github.com/huggingface/diffusers/blob/bc691231360a4cbc7d19a58742ebb8ed0f05e027/scripts/convert_original_stable_diffusion_to_diffusers.py
2
-
3
- import argparse
4
- import torch
5
- import sys
6
-
7
- sys.path.insert(0, ".")
8
-
9
- from diffusers.models import (
10
- AutoencoderKL,
11
- )
12
- from omegaconf import OmegaConf
13
- from diffusers.schedulers import DDIMScheduler
14
- from diffusers.utils import logging
15
- from typing import Any
16
- from accelerate import init_empty_weights
17
- from accelerate.utils import set_module_tensor_to_device
18
- from transformers import CLIPTextModel, CLIPTokenizer, CLIPVisionModel, CLIPImageProcessor
19
-
20
- from mv_unet import MultiViewUNetModel
21
- from pipeline import MVDreamPipeline
22
- import kiui
23
-
24
- logger = logging.get_logger(__name__)
25
-
26
-
27
- def assign_to_checkpoint(
28
- paths,
29
- checkpoint,
30
- old_checkpoint,
31
- attention_paths_to_split=None,
32
- additional_replacements=None,
33
- config=None,
34
- ):
35
- """
36
- This does the final conversion step: take locally converted weights and apply a global renaming to them. It splits
37
- attention layers, and takes into account additional replacements that may arise.
38
- Assigns the weights to the new checkpoint.
39
- """
40
- assert isinstance(
41
- paths, list
42
- ), "Paths should be a list of dicts containing 'old' and 'new' keys."
43
-
44
- # Splits the attention layers into three variables.
45
- if attention_paths_to_split is not None:
46
- for path, path_map in attention_paths_to_split.items():
47
- old_tensor = old_checkpoint[path]
48
- channels = old_tensor.shape[0] // 3
49
-
50
- target_shape = (-1, channels) if len(old_tensor.shape) == 3 else (-1)
51
-
52
- assert config is not None
53
- num_heads = old_tensor.shape[0] // config["num_head_channels"] // 3
54
-
55
- old_tensor = old_tensor.reshape(
56
- (num_heads, 3 * channels // num_heads) + old_tensor.shape[1:]
57
- )
58
- query, key, value = old_tensor.split(channels // num_heads, dim=1)
59
-
60
- checkpoint[path_map["query"]] = query.reshape(target_shape)
61
- checkpoint[path_map["key"]] = key.reshape(target_shape)
62
- checkpoint[path_map["value"]] = value.reshape(target_shape)
63
-
64
- for path in paths:
65
- new_path = path["new"]
66
-
67
- # These have already been assigned
68
- if (
69
- attention_paths_to_split is not None
70
- and new_path in attention_paths_to_split
71
- ):
72
- continue
73
-
74
- # Global renaming happens here
75
- new_path = new_path.replace("middle_block.0", "mid_block.resnets.0")
76
- new_path = new_path.replace("middle_block.1", "mid_block.attentions.0")
77
- new_path = new_path.replace("middle_block.2", "mid_block.resnets.1")
78
-
79
- if additional_replacements is not None:
80
- for replacement in additional_replacements:
81
- new_path = new_path.replace(replacement["old"], replacement["new"])
82
-
83
- # proj_attn.weight has to be converted from conv 1D to linear
84
- is_attn_weight = "proj_attn.weight" in new_path or (
85
- "attentions" in new_path and "to_" in new_path
86
- )
87
- shape = old_checkpoint[path["old"]].shape
88
- if is_attn_weight and len(shape) == 3:
89
- checkpoint[new_path] = old_checkpoint[path["old"]][:, :, 0]
90
- elif is_attn_weight and len(shape) == 4:
91
- checkpoint[new_path] = old_checkpoint[path["old"]][:, :, 0, 0]
92
- else:
93
- checkpoint[new_path] = old_checkpoint[path["old"]]
94
-
95
-
96
- def shave_segments(path, n_shave_prefix_segments=1):
97
- """
98
- Removes segments. Positive values shave the first segments, negative shave the last segments.
99
- """
100
- if n_shave_prefix_segments >= 0:
101
- return ".".join(path.split(".")[n_shave_prefix_segments:])
102
- else:
103
- return ".".join(path.split(".")[:n_shave_prefix_segments])
104
-
105
-
106
- def create_vae_diffusers_config(original_config, image_size):
107
- """
108
- Creates a config for the diffusers based on the config of the LDM model.
109
- """
110
-
111
-
112
- if 'imagedream' in original_config.model.target:
113
- vae_params = original_config.model.params.vae_config.params.ddconfig
114
- _ = original_config.model.params.vae_config.params.embed_dim
115
- vae_key = "vae_model."
116
- else:
117
- vae_params = original_config.model.params.first_stage_config.params.ddconfig
118
- _ = original_config.model.params.first_stage_config.params.embed_dim
119
- vae_key = "first_stage_model."
120
-
121
- block_out_channels = [vae_params.ch * mult for mult in vae_params.ch_mult]
122
- down_block_types = ["DownEncoderBlock2D"] * len(block_out_channels)
123
- up_block_types = ["UpDecoderBlock2D"] * len(block_out_channels)
124
-
125
- config = {
126
- "sample_size": image_size,
127
- "in_channels": vae_params.in_channels,
128
- "out_channels": vae_params.out_ch,
129
- "down_block_types": tuple(down_block_types),
130
- "up_block_types": tuple(up_block_types),
131
- "block_out_channels": tuple(block_out_channels),
132
- "latent_channels": vae_params.z_channels,
133
- "layers_per_block": vae_params.num_res_blocks,
134
- }
135
- return config, vae_key
136
-
137
-
138
- def convert_ldm_vae_checkpoint(checkpoint, config, vae_key):
139
- # extract state dict for VAE
140
- vae_state_dict = {}
141
- keys = list(checkpoint.keys())
142
- for key in keys:
143
- if key.startswith(vae_key):
144
- vae_state_dict[key.replace(vae_key, "")] = checkpoint.get(key)
145
-
146
- new_checkpoint = {}
147
-
148
- new_checkpoint["encoder.conv_in.weight"] = vae_state_dict["encoder.conv_in.weight"]
149
- new_checkpoint["encoder.conv_in.bias"] = vae_state_dict["encoder.conv_in.bias"]
150
- new_checkpoint["encoder.conv_out.weight"] = vae_state_dict[
151
- "encoder.conv_out.weight"
152
- ]
153
- new_checkpoint["encoder.conv_out.bias"] = vae_state_dict["encoder.conv_out.bias"]
154
- new_checkpoint["encoder.conv_norm_out.weight"] = vae_state_dict[
155
- "encoder.norm_out.weight"
156
- ]
157
- new_checkpoint["encoder.conv_norm_out.bias"] = vae_state_dict[
158
- "encoder.norm_out.bias"
159
- ]
160
-
161
- new_checkpoint["decoder.conv_in.weight"] = vae_state_dict["decoder.conv_in.weight"]
162
- new_checkpoint["decoder.conv_in.bias"] = vae_state_dict["decoder.conv_in.bias"]
163
- new_checkpoint["decoder.conv_out.weight"] = vae_state_dict[
164
- "decoder.conv_out.weight"
165
- ]
166
- new_checkpoint["decoder.conv_out.bias"] = vae_state_dict["decoder.conv_out.bias"]
167
- new_checkpoint["decoder.conv_norm_out.weight"] = vae_state_dict[
168
- "decoder.norm_out.weight"
169
- ]
170
- new_checkpoint["decoder.conv_norm_out.bias"] = vae_state_dict[
171
- "decoder.norm_out.bias"
172
- ]
173
-
174
- new_checkpoint["quant_conv.weight"] = vae_state_dict["quant_conv.weight"]
175
- new_checkpoint["quant_conv.bias"] = vae_state_dict["quant_conv.bias"]
176
- new_checkpoint["post_quant_conv.weight"] = vae_state_dict["post_quant_conv.weight"]
177
- new_checkpoint["post_quant_conv.bias"] = vae_state_dict["post_quant_conv.bias"]
178
-
179
- # Retrieves the keys for the encoder down blocks only
180
- num_down_blocks = len(
181
- {
182
- ".".join(layer.split(".")[:3])
183
- for layer in vae_state_dict
184
- if "encoder.down" in layer
185
- }
186
- )
187
- down_blocks = {
188
- layer_id: [key for key in vae_state_dict if f"down.{layer_id}" in key]
189
- for layer_id in range(num_down_blocks)
190
- }
191
-
192
- # Retrieves the keys for the decoder up blocks only
193
- num_up_blocks = len(
194
- {
195
- ".".join(layer.split(".")[:3])
196
- for layer in vae_state_dict
197
- if "decoder.up" in layer
198
- }
199
- )
200
- up_blocks = {
201
- layer_id: [key for key in vae_state_dict if f"up.{layer_id}" in key]
202
- for layer_id in range(num_up_blocks)
203
- }
204
-
205
- for i in range(num_down_blocks):
206
- resnets = [
207
- key
208
- for key in down_blocks[i]
209
- if f"down.{i}" in key and f"down.{i}.downsample" not in key
210
- ]
211
-
212
- if f"encoder.down.{i}.downsample.conv.weight" in vae_state_dict:
213
- new_checkpoint[
214
- f"encoder.down_blocks.{i}.downsamplers.0.conv.weight"
215
- ] = vae_state_dict.pop(f"encoder.down.{i}.downsample.conv.weight")
216
- new_checkpoint[
217
- f"encoder.down_blocks.{i}.downsamplers.0.conv.bias"
218
- ] = vae_state_dict.pop(f"encoder.down.{i}.downsample.conv.bias")
219
-
220
- paths = renew_vae_resnet_paths(resnets)
221
- meta_path = {"old": f"down.{i}.block", "new": f"down_blocks.{i}.resnets"}
222
- assign_to_checkpoint(
223
- paths,
224
- new_checkpoint,
225
- vae_state_dict,
226
- additional_replacements=[meta_path],
227
- config=config,
228
- )
229
-
230
- mid_resnets = [key for key in vae_state_dict if "encoder.mid.block" in key]
231
- num_mid_res_blocks = 2
232
- for i in range(1, num_mid_res_blocks + 1):
233
- resnets = [key for key in mid_resnets if f"encoder.mid.block_{i}" in key]
234
-
235
- paths = renew_vae_resnet_paths(resnets)
236
- meta_path = {"old": f"mid.block_{i}", "new": f"mid_block.resnets.{i - 1}"}
237
- assign_to_checkpoint(
238
- paths,
239
- new_checkpoint,
240
- vae_state_dict,
241
- additional_replacements=[meta_path],
242
- config=config,
243
- )
244
-
245
- mid_attentions = [key for key in vae_state_dict if "encoder.mid.attn" in key]
246
- paths = renew_vae_attention_paths(mid_attentions)
247
- meta_path = {"old": "mid.attn_1", "new": "mid_block.attentions.0"}
248
- assign_to_checkpoint(
249
- paths,
250
- new_checkpoint,
251
- vae_state_dict,
252
- additional_replacements=[meta_path],
253
- config=config,
254
- )
255
- conv_attn_to_linear(new_checkpoint)
256
-
257
- for i in range(num_up_blocks):
258
- block_id = num_up_blocks - 1 - i
259
- resnets = [
260
- key
261
- for key in up_blocks[block_id]
262
- if f"up.{block_id}" in key and f"up.{block_id}.upsample" not in key
263
- ]
264
-
265
- if f"decoder.up.{block_id}.upsample.conv.weight" in vae_state_dict:
266
- new_checkpoint[
267
- f"decoder.up_blocks.{i}.upsamplers.0.conv.weight"
268
- ] = vae_state_dict[f"decoder.up.{block_id}.upsample.conv.weight"]
269
- new_checkpoint[
270
- f"decoder.up_blocks.{i}.upsamplers.0.conv.bias"
271
- ] = vae_state_dict[f"decoder.up.{block_id}.upsample.conv.bias"]
272
-
273
- paths = renew_vae_resnet_paths(resnets)
274
- meta_path = {"old": f"up.{block_id}.block", "new": f"up_blocks.{i}.resnets"}
275
- assign_to_checkpoint(
276
- paths,
277
- new_checkpoint,
278
- vae_state_dict,
279
- additional_replacements=[meta_path],
280
- config=config,
281
- )
282
-
283
- mid_resnets = [key for key in vae_state_dict if "decoder.mid.block" in key]
284
- num_mid_res_blocks = 2
285
- for i in range(1, num_mid_res_blocks + 1):
286
- resnets = [key for key in mid_resnets if f"decoder.mid.block_{i}" in key]
287
-
288
- paths = renew_vae_resnet_paths(resnets)
289
- meta_path = {"old": f"mid.block_{i}", "new": f"mid_block.resnets.{i - 1}"}
290
- assign_to_checkpoint(
291
- paths,
292
- new_checkpoint,
293
- vae_state_dict,
294
- additional_replacements=[meta_path],
295
- config=config,
296
- )
297
-
298
- mid_attentions = [key for key in vae_state_dict if "decoder.mid.attn" in key]
299
- paths = renew_vae_attention_paths(mid_attentions)
300
- meta_path = {"old": "mid.attn_1", "new": "mid_block.attentions.0"}
301
- assign_to_checkpoint(
302
- paths,
303
- new_checkpoint,
304
- vae_state_dict,
305
- additional_replacements=[meta_path],
306
- config=config,
307
- )
308
- conv_attn_to_linear(new_checkpoint)
309
- return new_checkpoint
310
-
311
-
312
- def renew_vae_resnet_paths(old_list, n_shave_prefix_segments=0):
313
- """
314
- Updates paths inside resnets to the new naming scheme (local renaming)
315
- """
316
- mapping = []
317
- for old_item in old_list:
318
- new_item = old_item
319
-
320
- new_item = new_item.replace("nin_shortcut", "conv_shortcut")
321
- new_item = shave_segments(
322
- new_item, n_shave_prefix_segments=n_shave_prefix_segments
323
- )
324
-
325
- mapping.append({"old": old_item, "new": new_item})
326
-
327
- return mapping
328
-
329
-
330
- def renew_vae_attention_paths(old_list, n_shave_prefix_segments=0):
331
- """
332
- Updates paths inside attentions to the new naming scheme (local renaming)
333
- """
334
- mapping = []
335
- for old_item in old_list:
336
- new_item = old_item
337
-
338
- new_item = new_item.replace("norm.weight", "group_norm.weight")
339
- new_item = new_item.replace("norm.bias", "group_norm.bias")
340
-
341
- new_item = new_item.replace("q.weight", "to_q.weight")
342
- new_item = new_item.replace("q.bias", "to_q.bias")
343
-
344
- new_item = new_item.replace("k.weight", "to_k.weight")
345
- new_item = new_item.replace("k.bias", "to_k.bias")
346
-
347
- new_item = new_item.replace("v.weight", "to_v.weight")
348
- new_item = new_item.replace("v.bias", "to_v.bias")
349
-
350
- new_item = new_item.replace("proj_out.weight", "to_out.0.weight")
351
- new_item = new_item.replace("proj_out.bias", "to_out.0.bias")
352
-
353
- new_item = shave_segments(
354
- new_item, n_shave_prefix_segments=n_shave_prefix_segments
355
- )
356
-
357
- mapping.append({"old": old_item, "new": new_item})
358
-
359
- return mapping
360
-
361
-
362
- def conv_attn_to_linear(checkpoint):
363
- keys = list(checkpoint.keys())
364
- attn_keys = ["query.weight", "key.weight", "value.weight"]
365
- for key in keys:
366
- if ".".join(key.split(".")[-2:]) in attn_keys:
367
- if checkpoint[key].ndim > 2:
368
- checkpoint[key] = checkpoint[key][:, :, 0, 0]
369
- elif "proj_attn.weight" in key:
370
- if checkpoint[key].ndim > 2:
371
- checkpoint[key] = checkpoint[key][:, :, 0]
372
-
373
-
374
- def create_unet_config(original_config) -> Any:
375
- return OmegaConf.to_container(
376
- original_config.model.params.unet_config.params, resolve=True
377
- )
378
-
379
-
380
- def convert_from_original_mvdream_ckpt(checkpoint_path, original_config_file, device):
381
- checkpoint = torch.load(checkpoint_path, map_location=device)
382
- # print(f"Checkpoint: {checkpoint.keys()}")
383
- torch.cuda.empty_cache()
384
-
385
- original_config = OmegaConf.load(original_config_file)
386
- # print(f"Original Config: {original_config}")
387
- prediction_type = "epsilon"
388
- image_size = 256
389
- num_train_timesteps = (
390
- getattr(original_config.model.params, "timesteps", None) or 1000
391
- )
392
- beta_start = getattr(original_config.model.params, "linear_start", None) or 0.02
393
- beta_end = getattr(original_config.model.params, "linear_end", None) or 0.085
394
- scheduler = DDIMScheduler(
395
- beta_end=beta_end,
396
- beta_schedule="scaled_linear",
397
- beta_start=beta_start,
398
- num_train_timesteps=num_train_timesteps,
399
- steps_offset=1,
400
- clip_sample=False,
401
- set_alpha_to_one=False,
402
- prediction_type=prediction_type,
403
- )
404
- scheduler.register_to_config(clip_sample=False)
405
-
406
- unet_config = create_unet_config(original_config)
407
-
408
- # remove unused configs
409
- unet_config.pop('legacy', None)
410
- unet_config.pop('use_linear_in_transformer', None)
411
- unet_config.pop('use_spatial_transformer', None)
412
-
413
- unet_config.pop('ip_mode', None)
414
- unet_config.pop('with_ip', None)
415
-
416
- unet = MultiViewUNetModel(**unet_config)
417
- unet.register_to_config(**unet_config)
418
- # print(f"Unet State Dict: {unet.state_dict().keys()}")
419
- unet.load_state_dict(
420
- {
421
- key.replace("model.diffusion_model.", ""): value
422
- for key, value in checkpoint.items()
423
- if key.replace("model.diffusion_model.", "") in unet.state_dict()
424
- }
425
- )
426
- for param_name, param in unet.state_dict().items():
427
- set_module_tensor_to_device(unet, param_name, device=device, value=param)
428
-
429
- # Convert the VAE model.
430
- vae_config, vae_key = create_vae_diffusers_config(original_config, image_size=image_size)
431
- converted_vae_checkpoint = convert_ldm_vae_checkpoint(checkpoint, vae_config, vae_key)
432
-
433
- if (
434
- "model" in original_config
435
- and "params" in original_config.model
436
- and "scale_factor" in original_config.model.params
437
- ):
438
- vae_scaling_factor = original_config.model.params.scale_factor
439
- else:
440
- vae_scaling_factor = 0.18215 # default SD scaling factor
441
-
442
- vae_config["scaling_factor"] = vae_scaling_factor
443
-
444
- with init_empty_weights():
445
- vae = AutoencoderKL(**vae_config)
446
-
447
- for param_name, param in converted_vae_checkpoint.items():
448
- set_module_tensor_to_device(vae, param_name, device=device, value=param)
449
-
450
- # we only supports SD 2.1 based model
451
- tokenizer: CLIPTokenizer = CLIPTokenizer.from_pretrained("stabilityai/stable-diffusion-2-1", subfolder="tokenizer")
452
- text_encoder: CLIPTextModel = CLIPTextModel.from_pretrained("stabilityai/stable-diffusion-2-1", subfolder="text_encoder").to(device=device) # type: ignore
453
-
454
- # imagedream variant
455
- if unet.ip_dim > 0:
456
- feature_extractor: CLIPImageProcessor = CLIPImageProcessor.from_pretrained("laion/CLIP-ViT-H-14-laion2B-s32B-b79K")
457
- image_encoder: CLIPVisionModel = CLIPVisionModel.from_pretrained("laion/CLIP-ViT-H-14-laion2B-s32B-b79K")
458
- else:
459
- feature_extractor = None
460
- image_encoder = None
461
-
462
- pipe = MVDreamPipeline(
463
- vae=vae,
464
- unet=unet,
465
- tokenizer=tokenizer,
466
- text_encoder=text_encoder,
467
- scheduler=scheduler,
468
- feature_extractor=feature_extractor,
469
- image_encoder=image_encoder,
470
- )
471
-
472
- return pipe
473
-
474
-
475
- if __name__ == "__main__":
476
- parser = argparse.ArgumentParser()
477
-
478
- parser.add_argument(
479
- "--checkpoint_path",
480
- default=None,
481
- type=str,
482
- required=True,
483
- help="Path to the checkpoint to convert.",
484
- )
485
- parser.add_argument(
486
- "--original_config_file",
487
- default=None,
488
- type=str,
489
- help="The YAML config file corresponding to the original architecture.",
490
- )
491
- parser.add_argument(
492
- "--to_safetensors",
493
- action="store_true",
494
- help="Whether to store pipeline in safetensors format or not.",
495
- )
496
- parser.add_argument(
497
- "--half", action="store_true", help="Save weights in half precision."
498
- )
499
- parser.add_argument(
500
- "--test",
501
- action="store_true",
502
- help="Whether to test inference after convertion.",
503
- )
504
- parser.add_argument(
505
- "--dump_path",
506
- default=None,
507
- type=str,
508
- required=True,
509
- help="Path to the output model.",
510
- )
511
- parser.add_argument(
512
- "--device", type=str, help="Device to use (e.g. cpu, cuda:0, cuda:1, etc.)"
513
- )
514
- args = parser.parse_args()
515
-
516
- args.device = torch.device(
517
- args.device
518
- if args.device is not None
519
- else "cuda"
520
- if torch.cuda.is_available()
521
- else "cpu"
522
- )
523
-
524
- pipe = convert_from_original_mvdream_ckpt(
525
- checkpoint_path=args.checkpoint_path,
526
- original_config_file=args.original_config_file,
527
- device=args.device,
528
- )
529
-
530
- if args.half:
531
- pipe.to(torch_dtype=torch.float16)
532
-
533
- print(f"Saving pipeline to {args.dump_path}...")
534
- pipe.save_pretrained(args.dump_path, safe_serialization=args.to_safetensors)
535
-
536
- if args.test:
537
- try:
538
- # mvdream
539
- if pipe.unet.ip_dim == 0:
540
- print(f"Testing each subcomponent of the pipeline...")
541
- images = pipe(
542
- prompt="Head of Hatsune Miku",
543
- negative_prompt="painting, bad quality, flat",
544
- output_type="pil",
545
- guidance_scale=7.5,
546
- num_inference_steps=50,
547
- device=args.device,
548
- )
549
- for i, image in enumerate(images):
550
- image.save(f"test_image_{i}.png") # type: ignore
551
-
552
- print(f"Testing entire pipeline...")
553
- loaded_pipe = MVDreamPipeline.from_pretrained(args.dump_path) # type: ignore
554
- images = loaded_pipe(
555
- prompt="Head of Hatsune Miku",
556
- negative_prompt="painting, bad quality, flat",
557
- output_type="pil",
558
- guidance_scale=7.5,
559
- num_inference_steps=50,
560
- device=args.device,
561
- )
562
- for i, image in enumerate(images):
563
- image.save(f"test_image_{i}.png") # type: ignore
564
- # imagedream
565
- else:
566
- input_image = kiui.read_image('data/anya_rgba.png', mode='float')
567
- print(f"Testing each subcomponent of the pipeline...")
568
- images = pipe(
569
- image=input_image,
570
- prompt="",
571
- negative_prompt="",
572
- output_type="pil",
573
- guidance_scale=5.0,
574
- num_inference_steps=50,
575
- device=args.device,
576
- )
577
- for i, image in enumerate(images):
578
- image.save(f"test_image_{i}.png") # type: ignore
579
-
580
- print(f"Testing entire pipeline...")
581
- loaded_pipe = MVDreamPipeline.from_pretrained(args.dump_path) # type: ignore
582
- images = loaded_pipe(
583
- image=input_image,
584
- prompt="",
585
- negative_prompt="",
586
- output_type="pil",
587
- guidance_scale=5.0,
588
- num_inference_steps=50,
589
- device=args.device,
590
- )
591
- for i, image in enumerate(images):
592
- image.save(f"test_image_{i}.png") # type: ignore
593
-
594
-
595
- print("Inference test passed!")
596
- except Exception as e:
597
- print(f"Failed to test inference: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.lock.txt DELETED
@@ -1,7 +0,0 @@
1
- omegaconf == 2.3.0
2
- diffusers == 0.23.1
3
- safetensors == 0.4.1
4
- huggingface_hub == 0.19.4
5
- transformers == 4.35.2
6
- accelerate == 0.25.0.dev0
7
- kiui == 0.2.0
 
 
 
 
 
 
 
 
requirements.txt DELETED
@@ -1,9 +0,0 @@
1
- omegaconf
2
- diffusers
3
- safetensors
4
- huggingface_hub
5
- transformers
6
- accelerate
7
- kiui
8
- einops
9
- rich