rynmurdock commited on
Commit
6f68207
1 Parent(s): 1146833

May be faster; will be different qualitatively; may revert

Browse files
Files changed (1) hide show
  1. app.py +59 -34
app.py CHANGED
@@ -1,4 +1,4 @@
1
- DEVICE = 'cpu'
2
 
3
  import gradio as gr
4
  import numpy as np
@@ -6,11 +6,9 @@ from sklearn.svm import LinearSVC
6
  from sklearn import preprocessing
7
  import pandas as pd
8
 
9
- from diffusers import LCMScheduler, AutoencoderTiny, EulerDiscreteScheduler, UNet2DConditionModel
10
  from diffusers.models import ImageProjection
11
- from patch_sdxl import SDEmb
12
  import torch
13
- import spaces
14
 
15
  import random
16
  import time
@@ -22,8 +20,10 @@ from PIL import Image
22
  import requests
23
  from io import BytesIO, StringIO
24
 
 
25
  from huggingface_hub import hf_hub_download
26
  from safetensors.torch import load_file
 
27
 
28
  prompt_list = [p for p in list(set(
29
  pd.read_csv('./twitter_prompts.csv').iloc[:, 1].tolist())) if type(p) == str]
@@ -36,11 +36,17 @@ sdxl_lightening = "ByteDance/SDXL-Lightning"
36
  ckpt = "sdxl_lightning_2step_unet.safetensors"
37
  unet = UNet2DConditionModel.from_config(model_id, subfolder="unet").to("cuda", torch.float16)
38
  unet.load_state_dict(load_file(hf_hub_download(sdxl_lightening, ckpt), device="cuda"))
39
- pipe = SDEmb.from_pretrained(model_id, unet=unet, torch_dtype=torch.float16, variant="fp16").to("cuda")
 
 
 
 
 
 
40
  pipe.vae = AutoencoderTiny.from_pretrained("madebyollin/taesdxl", torch_dtype=torch.float16)
41
  pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")
42
  pipe.to(device='cuda')
43
- pipe.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin")
44
 
45
  output_hidden_state = False
46
  #######################
@@ -54,14 +60,27 @@ def predict(
54
  """Run a single prediction on the model"""
55
  with torch.no_grad():
56
  if im_emb == None:
57
- im_emb = torch.zeros(1, 1280, dtype=torch.float16, device='cuda')
58
- image = pipe(
59
- prompt=prompt,
60
- ip_adapter_emb=im_emb.to('cuda'),
61
- height=1024,
62
- width=1024,
63
- num_inference_steps=2,
64
- guidance_scale=0,
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  ).images[0]
66
  im_emb, _ = pipe.encode_image(
67
  image, 'cuda', 1, output_hidden_state
@@ -77,8 +96,8 @@ def next_image(embs, ys, calibrate_prompts):
77
 
78
  # handle case where every instance of calibration prompts is 'Neither' or 'Like' or 'Dislike'
79
  if len(calibrate_prompts) == 0 and len(list(set(ys))) <= 1:
80
- embs.append(.01*torch.randn(1, 1280))
81
- embs.append(.01*torch.randn(1, 1280))
82
  ys.append(0)
83
  ys.append(1)
84
 
@@ -92,35 +111,41 @@ def next_image(embs, ys, calibrate_prompts):
92
  return image, embs, ys, calibrate_prompts
93
  else:
94
  print('######### Roaming #########')
95
- # sample only as many negatives as there are positives
96
- indices = range(len(ys))
97
- pos_indices = [i for i in indices if ys[i] == 1]
98
- neg_indices = [i for i in indices if ys[i] == 0]
99
- lower = min(len(pos_indices), len(neg_indices))
100
- neg_indices = random.sample(neg_indices, lower)
101
- pos_indices = random.sample(pos_indices, lower)
102
-
103
- cut_embs = [embs[i] for i in neg_indices] + [embs[i] for i in pos_indices]
104
- cut_ys = [ys[i] for i in neg_indices] + [ys[i] for i in pos_indices]
105
-
106
- feature_embs = torch.stack([e[0].detach().cpu() for e in cut_embs])
 
 
 
 
 
 
107
  scaler = preprocessing.StandardScaler().fit(feature_embs)
108
  feature_embs = scaler.transform(feature_embs)
109
- print(np.array(feature_embs).shape, np.array(ys).shape)
110
 
111
- lin_class = LinearSVC(max_iter=50000, dual='auto', class_weight='balanced').fit(np.array(feature_embs), np.array(cut_ys))
112
  lin_class.coef_ = torch.tensor(lin_class.coef_, dtype=torch.double)
113
  lin_class.coef_ = (lin_class.coef_.flatten() / (lin_class.coef_.flatten().norm())).unsqueeze(0)
114
 
115
-
116
  rng_prompt = random.choice(prompt_list)
117
-
118
  w = 1# if len(embs) % 2 == 0 else 0
119
  im_emb = w * lin_class.coef_.to(device=DEVICE, dtype=torch.float16)
120
- prompt= 'an image' if glob_idx % 2 == 0 else rng_prompt
121
- print(prompt)
122
  image, im_emb = predict(prompt, im_emb)
123
  embs.append(im_emb)
 
 
 
124
  return image, embs, ys, calibrate_prompts
125
 
126
 
 
1
+ DEVICE = 'cuda'
2
 
3
  import gradio as gr
4
  import numpy as np
 
6
  from sklearn import preprocessing
7
  import pandas as pd
8
 
9
+ from diffusers import LCMScheduler, AutoencoderTiny, EulerDiscreteScheduler, UNet2DConditionModel, AutoPipelineForText2Image
10
  from diffusers.models import ImageProjection
 
11
  import torch
 
12
 
13
  import random
14
  import time
 
20
  import requests
21
  from io import BytesIO, StringIO
22
 
23
+ from transformers import CLIPVisionModelWithProjection
24
  from huggingface_hub import hf_hub_download
25
  from safetensors.torch import load_file
26
+ import spaces
27
 
28
  prompt_list = [p for p in list(set(
29
  pd.read_csv('./twitter_prompts.csv').iloc[:, 1].tolist())) if type(p) == str]
 
36
  ckpt = "sdxl_lightning_2step_unet.safetensors"
37
  unet = UNet2DConditionModel.from_config(model_id, subfolder="unet").to("cuda", torch.float16)
38
  unet.load_state_dict(load_file(hf_hub_download(sdxl_lightening, ckpt), device="cuda"))
39
+
40
+ image_encoder = CLIPVisionModelWithProjection.from_pretrained("h94/IP-Adapter", subfolder="models/image_encoder", torch_dtype=torch.float16,).to("cuda")
41
+ pipe = AutoPipelineForText2Image.from_pretrained(model_id, unet=unet, torch_dtype=torch.float16, variant="fp16", image_encoder=image_encoder).to("cuda")
42
+ pipe.unet._load_ip_adapter_weights(torch.load(hf_hub_download('h94/IP-Adapter', 'sdxl_models/ip-adapter_sdxl_vit-h.bin')))
43
+ pipe.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl_vit-h.bin")
44
+ pipe.register_modules(image_encoder = image_encoder)
45
+
46
  pipe.vae = AutoencoderTiny.from_pretrained("madebyollin/taesdxl", torch_dtype=torch.float16)
47
  pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")
48
  pipe.to(device='cuda')
49
+
50
 
51
  output_hidden_state = False
52
  #######################
 
60
  """Run a single prediction on the model"""
61
  with torch.no_grad():
62
  if im_emb == None:
63
+ im_emb = torch.zeros(1, 1024, dtype=torch.float16, device='cuda')
64
+
65
+ im_emb = [im_emb.to('cuda').unsqueeze(0)]
66
+ if prompt == '':
67
+ image = pipe(
68
+ prompt_embeds=torch.zeros(1, 1, 2048, dtype=torch.float16, device='cuda'),
69
+ pooled_prompt_embeds=torch.zeros(1, 1280, dtype=torch.float16, device='cuda'),
70
+ ip_adapter_image_embeds=im_emb,
71
+ height=1024,
72
+ width=1024,
73
+ num_inference_steps=2,
74
+ guidance_scale=0,
75
+ ).images[0]
76
+ else:
77
+ image = pipe(
78
+ prompt=prompt,
79
+ ip_adapter_image_embeds=im_emb,
80
+ height=1024,
81
+ width=1024,
82
+ num_inference_steps=2,
83
+ guidance_scale=0,
84
  ).images[0]
85
  im_emb, _ = pipe.encode_image(
86
  image, 'cuda', 1, output_hidden_state
 
96
 
97
  # handle case where every instance of calibration prompts is 'Neither' or 'Like' or 'Dislike'
98
  if len(calibrate_prompts) == 0 and len(list(set(ys))) <= 1:
99
+ embs.append(.01*torch.randn(1, 1024))
100
+ embs.append(.01*torch.randn(1, 1024))
101
  ys.append(0)
102
  ys.append(1)
103
 
 
111
  return image, embs, ys, calibrate_prompts
112
  else:
113
  print('######### Roaming #########')
114
+ # sample a .8 of rated embeddings for some stochasticity, or at least two embeddings.
115
+ n_to_choose = max(int(len(embs)*.8), 2)
116
+ indices = random.sample(range(len(embs)), n_to_choose)
117
+
118
+ # also add the latest 0 and the latest 1
119
+ has_0 = False
120
+ has_1 = False
121
+ for i in reversed(range(len(ys))):
122
+ if ys[i] == 0 and has_0 == False:
123
+ indices.append(i)
124
+ has_0 = True
125
+ elif ys[i] == 1 and has_1 == False:
126
+ indices.append(i)
127
+ has_1 = True
128
+ if has_0 and has_1:
129
+ break
130
+
131
+ feature_embs = np.array(torch.cat([embs[i] for i in indices]).to('cpu'))
132
  scaler = preprocessing.StandardScaler().fit(feature_embs)
133
  feature_embs = scaler.transform(feature_embs)
 
134
 
135
+ lin_class = LinearSVC(max_iter=50000, dual='auto', class_weight='balanced').fit(feature_embs, np.array([ys[i] for i in indices]))
136
  lin_class.coef_ = torch.tensor(lin_class.coef_, dtype=torch.double)
137
  lin_class.coef_ = (lin_class.coef_.flatten() / (lin_class.coef_.flatten().norm())).unsqueeze(0)
138
 
 
139
  rng_prompt = random.choice(prompt_list)
 
140
  w = 1# if len(embs) % 2 == 0 else 0
141
  im_emb = w * lin_class.coef_.to(device=DEVICE, dtype=torch.float16)
142
+ prompt= '' if glob_idx % 2 == 0 else rng_prompt
143
+ print(prompt, len(ys))
144
  image, im_emb = predict(prompt, im_emb)
145
  embs.append(im_emb)
146
+ if len(embs) > 100:
147
+ embs.pop(0)
148
+ ys.pop(0)
149
  return image, embs, ys, calibrate_prompts
150
 
151