williamberman commited on
Commit
1f522d4
1 Parent(s): 3f9bc4d

update to diffusers code

Browse files
Files changed (1) hide show
  1. app.py +7 -28
app.py CHANGED
@@ -4,8 +4,8 @@ import uuid
4
  import gradio as gr
5
  from PIL import Image
6
  import torch
7
- from muse import PipelineMuse, MaskGiTUViT, VQGANModel
8
  from compel import Compel, ReturnedEmbeddingsType
 
9
 
10
  # from swin_ir_2 import load_model, preprocesss_image, postprocess_image
11
 
@@ -23,24 +23,12 @@ def save_images(image_array):
23
  return paths
24
 
25
  device = "cuda" if torch.cuda.is_available() else "cpu"
26
- # pipe = PipelineMuse.from_pretrained("openMUSE/muse-laiona6-uvit-clip-220k").to(device)
27
 
28
- pipe = PipelineMuse.from_pretrained(
29
- transformer_path="valhalla/research-run",
30
- text_encoder_path="openMUSE/clip-vit-large-patch14-text-enc",
31
- vae_path="openMUSE/vqgan-f16-8192-laion",
32
  ).to(device)
33
- pipe.transformer = MaskGiTUViT.from_pretrained("valhalla/research-run-finetuned-journeydb", subfolder="ema_model", revision="06bcd6ab6580a2ed3275ddfc17f463b8574457da").to(device)
34
- pipe.vae = VQGANModel.from_pretrained("valhalla/vqgan-finetune-512-2").to(device)
35
- pipe.tokenizer.pad_token_id = 49407
36
-
37
- # sr_model = load_model().to(device)
38
-
39
- if device == "cuda":
40
- pipe.text_encoder.to(torch.float16)
41
- pipe.transformer.to(torch.float16)
42
- pipe.transformer.enable_xformers_memory_efficient_attention()
43
-
44
 
45
  compel = Compel(tokenizer=pipe.tokenizer, text_encoder=pipe.text_encoder, returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED, requires_pooled=True, truncate_long_prompts=False)
46
 
@@ -52,22 +40,13 @@ def infer(prompt, negative="", scale=10, progress=gr.Progress(track_tqdm=True)):
52
  conditioning, negative_conditioning = compel.pad_conditioning_tensors_to_same_length([conditioning, negative_conditioning])
53
 
54
  images = pipe(
55
- prompt,
56
- timesteps=16,
57
- negative_text=negative,
58
  prompt_embeds=conditioning,
59
- pooled_embeds=pooled,
60
  negative_prompt_embeds=negative_conditioning,
61
- negative_pooled_embeds=negative_pooled,
62
  guidance_scale=scale,
63
  num_images_per_prompt=4,
64
  temperature=(3, 1),
65
- orig_size=(512, 512),
66
- crop_coords=(0, 0),
67
- aesthetic_score=6,
68
- use_fp16=device == "cuda",
69
- transformer_seq_len=1024,
70
- use_tqdm=True,
71
  )
72
  print("Done Generating!")
73
  print("Num Images:", len(images))
 
4
  import gradio as gr
5
  from PIL import Image
6
  import torch
 
7
  from compel import Compel, ReturnedEmbeddingsType
8
+ from diffusers import DiffusionPipeline
9
 
10
  # from swin_ir_2 import load_model, preprocesss_image, postprocess_image
11
 
 
23
  return paths
24
 
25
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
26
 
27
+ pipe = DiffusionPipeline.from_pretrained(
28
+ "amused/amused-512",
29
+ variant="fp16",
30
+ torch_dtype=torch.float16,
31
  ).to(device)
 
 
 
 
 
 
 
 
 
 
 
32
 
33
  compel = Compel(tokenizer=pipe.tokenizer, text_encoder=pipe.text_encoder, returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED, requires_pooled=True, truncate_long_prompts=False)
34
 
 
40
  conditioning, negative_conditioning = compel.pad_conditioning_tensors_to_same_length([conditioning, negative_conditioning])
41
 
42
  images = pipe(
 
 
 
43
  prompt_embeds=conditioning,
44
+ encoder_hidden_states=pooled,
45
  negative_prompt_embeds=negative_conditioning,
46
+ negative_encoder_hidden_states=negative_pooled,
47
  guidance_scale=scale,
48
  num_images_per_prompt=4,
49
  temperature=(3, 1),
 
 
 
 
 
 
50
  )
51
  print("Done Generating!")
52
  print("Num Images:", len(images))