rynmurdock commited on
Commit
05e29c3
1 Parent(s): f2cdd37

using torch.compile over sfast for compatibility; other smol changes

Browse files
Files changed (2) hide show
  1. app.py +25 -23
  2. requirements.txt +0 -1
app.py CHANGED
@@ -1,23 +1,22 @@
1
  DEVICE = 'cuda'
2
 
3
- from sfast.compilers.diffusion_pipeline_compiler import (compile,
4
- CompilationConfig)
5
- config = CompilationConfig.Default()
6
-
7
  import gradio as gr
8
  import numpy as np
9
  from sklearn.svm import LinearSVC
10
  from sklearn import preprocessing
11
  import pandas as pd
12
 
13
- from diffusers import LCMScheduler, AutoencoderTiny, EulerDiscreteScheduler, UNet2DConditionModel, AutoPipelineForText2Image
14
  from diffusers.models import ImageProjection
15
  import torch
16
 
 
 
17
  import random
18
  import time
19
 
20
- import torch
 
21
  from urllib.request import urlopen
22
 
23
  from PIL import Image
@@ -27,7 +26,6 @@ from io import BytesIO, StringIO
27
  from transformers import CLIPVisionModelWithProjection
28
  from huggingface_hub import hf_hub_download
29
  from safetensors.torch import load_file
30
- #import spaces
31
 
32
  prompt_list = [p for p in list(set(
33
  pd.read_csv('./twitter_prompts.csv').iloc[:, 1].tolist())) if type(p) == str]
@@ -35,6 +33,7 @@ prompt_list = [p for p in list(set(
35
  start_time = time.time()
36
 
37
  ####################### Setup Model
 
38
  model_id = "stabilityai/stable-diffusion-xl-base-1.0"
39
  sdxl_lightening = "ByteDance/SDXL-Lightning"
40
  ckpt = "sdxl_lightning_2step_unet.safetensors"
@@ -46,20 +45,20 @@ pipe = AutoPipelineForText2Image.from_pretrained(model_id, unet=unet, torch_dtyp
46
  pipe.unet._load_ip_adapter_weights(torch.load(hf_hub_download('h94/IP-Adapter', 'sdxl_models/ip-adapter_sdxl_vit-h.bin')))
47
  pipe.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl_vit-h.bin")
48
  pipe.register_modules(image_encoder = image_encoder)
 
49
 
50
  pipe.vae = AutoencoderTiny.from_pretrained("madebyollin/taesdxl", torch_dtype=torch.float16)
51
  pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")
 
52
  pipe.to(device=DEVICE)
53
- pipe = compile(pipe, config=config)
54
 
55
- image = pipe(prompt_embeds=torch.zeros(1, 1, 2048, dtype=torch.float16, device=DEVICE),
56
- pooled_prompt_embeds=torch.zeros(1, 1280, dtype=torch.float16, device=DEVICE),
57
- ip_adapter_image_embeds=[torch.zeros(1, 1, 1024, dtype=torch.float16, device=DEVICE)],
58
- height=1024,
59
- width=1024,
60
- num_inference_steps=2,
61
- guidance_scale=0,
62
- ).images[0]
63
 
64
 
65
  output_hidden_state = False
@@ -76,7 +75,6 @@ class BottleneckT5Autoencoder:
76
  self.tokenizer = AutoTokenizer.from_pretrained(model_path, model_max_length=512, torch_dtype=torch.bfloat16)
77
  self.model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True).to(self.device)
78
  self.model.eval()
79
- # self.model = torch.compile(self.model)
80
 
81
 
82
  def embed(self, text: str) -> torch.FloatTensor:
@@ -88,7 +86,7 @@ class BottleneckT5Autoencoder:
88
  encode_only=True,
89
  )
90
 
91
- def generate_from_latent(self, latent: torch.FloatTensor, max_length=512, temperature=1., top_p=.8, length_penalty=10, min_new_tokens=30) -> str:
92
  dummy_text = '.'
93
  dummy = self.embed(dummy_text)
94
  perturb_vector = latent - dummy
@@ -101,7 +99,6 @@ class BottleneckT5Autoencoder:
101
  temperature=temperature,
102
  top_p=top_p,
103
  num_return_sequences=1,
104
- length_penalty=length_penalty,
105
  min_new_tokens=min_new_tokens,
106
  # num_beams=8,
107
  )
@@ -109,8 +106,11 @@ class BottleneckT5Autoencoder:
109
 
110
  autoencoder = BottleneckT5Autoencoder(model_path='thesephist/contra-bottleneck-t5-xl-wikipedia')
111
 
 
112
  #######################
113
 
 
 
114
  def generate(prompt, in_embs=None,):
115
  if prompt != '':
116
  print(prompt)
@@ -119,12 +119,12 @@ def generate(prompt, in_embs=None,):
119
  else:
120
  print('From embeds.')
121
  in_embs = in_embs / in_embs.abs().max() * .15
122
- text = autoencoder.generate_from_latent(in_embs.to('cuda').to(dtype=torch.bfloat16), temperature=.3, top_p=.99, min_new_tokens=5)
123
  return text, in_embs.to('cpu')
124
 
125
 
126
-
127
- #@spaces.GPU
128
  def predict(
129
  prompt,
130
  im_emb=None,
@@ -145,6 +145,7 @@ def predict(
145
  width=1024,
146
  num_inference_steps=2,
147
  guidance_scale=0,
 
148
  ).images[0]
149
  else:
150
  image = pipe(
@@ -154,6 +155,7 @@ def predict(
154
  width=1024,
155
  num_inference_steps=2,
156
  guidance_scale=0,
 
157
  ).images[0]
158
  im_emb, _ = pipe.encode_image(
159
  image, DEVICE, 1, output_hidden_state
@@ -232,7 +234,7 @@ def next_image(embs, img_embs, ys, calibrate_prompts):
232
  rng_prompt = random.choice(prompt_list)
233
  w = 1.4# if len(embs) % 2 == 0 else 0
234
 
235
- prompt= '' if glob_idx % 2 == 0 else rng_prompt
236
  prompt, _ = generate(prompt, in_embs=im_s)
237
  print(prompt)
238
  im_emb = autoencoder.embed(prompt)
 
1
  DEVICE = 'cuda'
2
 
 
 
 
 
3
  import gradio as gr
4
  import numpy as np
5
  from sklearn.svm import LinearSVC
6
  from sklearn import preprocessing
7
  import pandas as pd
8
 
9
+ from diffusers import LCMScheduler, AutoencoderTiny, EulerDiscreteScheduler, UNet2DConditionModel, AutoPipelineForText2Image, DiffusionPipeline
10
  from diffusers.models import ImageProjection
11
  import torch
12
 
13
+ torch.set_float32_matmul_precision('high')
14
+
15
  import random
16
  import time
17
 
18
+ # TODO put back
19
+ import spaces
20
  from urllib.request import urlopen
21
 
22
  from PIL import Image
 
26
  from transformers import CLIPVisionModelWithProjection
27
  from huggingface_hub import hf_hub_download
28
  from safetensors.torch import load_file
 
29
 
30
  prompt_list = [p for p in list(set(
31
  pd.read_csv('./twitter_prompts.csv').iloc[:, 1].tolist())) if type(p) == str]
 
33
  start_time = time.time()
34
 
35
  ####################### Setup Model
36
+
37
  model_id = "stabilityai/stable-diffusion-xl-base-1.0"
38
  sdxl_lightening = "ByteDance/SDXL-Lightning"
39
  ckpt = "sdxl_lightning_2step_unet.safetensors"
 
45
  pipe.unet._load_ip_adapter_weights(torch.load(hf_hub_download('h94/IP-Adapter', 'sdxl_models/ip-adapter_sdxl_vit-h.bin')))
46
  pipe.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl_vit-h.bin")
47
  pipe.register_modules(image_encoder = image_encoder)
48
+ pipe.set_ip_adapter_scale(0.8)
49
 
50
  pipe.vae = AutoencoderTiny.from_pretrained("madebyollin/taesdxl", torch_dtype=torch.float16)
51
  pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")
52
+
53
  pipe.to(device=DEVICE)
 
54
 
55
+
56
+ # TODO put back
57
+ @spaces.GPU
58
+ def compile_em():
59
+ pipe.unet = torch.compile(pipe.unet)
60
+ pipe.vae = torch.compile(pipe.vae)
61
+ autoencoder.model.forward = torch.compile(autoencoder.model.forward, backend='inductor', dynamic=True)
 
62
 
63
 
64
  output_hidden_state = False
 
75
  self.tokenizer = AutoTokenizer.from_pretrained(model_path, model_max_length=512, torch_dtype=torch.bfloat16)
76
  self.model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True).to(self.device)
77
  self.model.eval()
 
78
 
79
 
80
  def embed(self, text: str) -> torch.FloatTensor:
 
86
  encode_only=True,
87
  )
88
 
89
+ def generate_from_latent(self, latent: torch.FloatTensor, max_length=512, temperature=1., top_p=.8, min_new_tokens=30) -> str:
90
  dummy_text = '.'
91
  dummy = self.embed(dummy_text)
92
  perturb_vector = latent - dummy
 
99
  temperature=temperature,
100
  top_p=top_p,
101
  num_return_sequences=1,
 
102
  min_new_tokens=min_new_tokens,
103
  # num_beams=8,
104
  )
 
106
 
107
  autoencoder = BottleneckT5Autoencoder(model_path='thesephist/contra-bottleneck-t5-xl-wikipedia')
108
 
109
+ compile_em()
110
  #######################
111
 
112
+ # TODO put back
113
+ @spaces.GPU
114
  def generate(prompt, in_embs=None,):
115
  if prompt != '':
116
  print(prompt)
 
119
  else:
120
  print('From embeds.')
121
  in_embs = in_embs / in_embs.abs().max() * .15
122
+ text = autoencoder.generate_from_latent(in_embs.to('cuda').to(dtype=torch.bfloat16), temperature=.8, top_p=.94, min_new_tokens=5)
123
  return text, in_embs.to('cpu')
124
 
125
 
126
+ # TODO put back
127
+ @spaces.GPU
128
  def predict(
129
  prompt,
130
  im_emb=None,
 
145
  width=1024,
146
  num_inference_steps=2,
147
  guidance_scale=0,
148
+ # timesteps=[800],
149
  ).images[0]
150
  else:
151
  image = pipe(
 
155
  width=1024,
156
  num_inference_steps=2,
157
  guidance_scale=0,
158
+ # timesteps=[800],
159
  ).images[0]
160
  im_emb, _ = pipe.encode_image(
161
  image, DEVICE, 1, output_hidden_state
 
234
  rng_prompt = random.choice(prompt_list)
235
  w = 1.4# if len(embs) % 2 == 0 else 0
236
 
237
+ prompt= '' if glob_idx % 3 == 0 else rng_prompt
238
  prompt, _ = generate(prompt, in_embs=im_s)
239
  print(prompt)
240
  im_emb = autoencoder.embed(prompt)
requirements.txt CHANGED
@@ -8,4 +8,3 @@ diffusers
8
  accelerate
9
  transformers
10
  peft
11
- https://github.com/chengzeyi/stable-fast/releases/download/v1.0.4/stable_fast-1.0.4+torch220cu121-cp310-cp310-manylinux2014_x86_64.whl
 
8
  accelerate
9
  transformers
10
  peft