Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -15,89 +15,9 @@ from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer
|
|
15 |
import gradio as gr
|
16 |
import random
|
17 |
|
18 |
-
print("os.environ: ", os.environ)
|
19 |
-
|
20 |
device = "cuda"
|
21 |
model_path = "CompVis/stable-diffusion-v1-4"
|
22 |
|
23 |
-
pipe = StableDiffusionInpaintingPipeline.from_pretrained(
|
24 |
-
model_path,
|
25 |
-
revision="fp16",
|
26 |
-
torch_dtype=torch.float16,
|
27 |
-
use_auth_token=os.environ.get("access_token")).to(device)
|
28 |
-
|
29 |
-
import gdown
|
30 |
-
def download_gdrive_url():
|
31 |
-
url = 'https://drive.google.com/u/0/uc?id=1PPO2MCttsmSqyB-vKh5C7SumwFKuhgyj&export=download'
|
32 |
-
output = 'haarcascade_frontalface_default.xml'
|
33 |
-
gdown.download(url, output, quiet=False)
|
34 |
-
|
35 |
-
from torch import autocast
|
36 |
-
def inpaint(p, init_image, mask_image=None, strength=0.75, guidance_scale=7.5, generator=None, num_samples=1, n_iter=1):
|
37 |
-
all_images = []
|
38 |
-
for _ in range(n_iter):
|
39 |
-
with autocast("cuda"):
|
40 |
-
images = pipe(
|
41 |
-
prompt=[p] * num_samples,
|
42 |
-
init_image=init_image,
|
43 |
-
mask_image=mask_image,
|
44 |
-
strength=strength,
|
45 |
-
guidance_scale=guidance_scale,
|
46 |
-
generator=generator,
|
47 |
-
num_inference_steps=75
|
48 |
-
)["sample"]
|
49 |
-
all_images.extend(images)
|
50 |
-
print(len(all_images))
|
51 |
-
return all_images[0]
|
52 |
-
|
53 |
-
def identify_face(user_image):
|
54 |
-
img = cv2.imread(user_image.name) # read the resized image in cv2
|
55 |
-
print(img.shape)
|
56 |
-
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # convert to grayscale
|
57 |
-
download_gdrive_url() #download the haarcascade face recognition stuff
|
58 |
-
haar_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
|
59 |
-
faces_rect = haar_cascade.detectMultiScale(gray_img, scaleFactor=1.1, minNeighbors=9)
|
60 |
-
for (x, y, w, h) in faces_rect[:1]:
|
61 |
-
mask = np.zeros(img.shape[:2], dtype="uint8")
|
62 |
-
print(mask.shape)
|
63 |
-
cv2.rectangle(mask, (x, y), (x+w, y+h), 255, -1)
|
64 |
-
print(mask.shape)
|
65 |
-
inverted_image = cv2.bitwise_not(mask)
|
66 |
-
return inverted_image
|
67 |
-
|
68 |
-
def sample_images(init_image, mask_image):
|
69 |
-
p = "4K UHD professional profile picture of a person wearing a suit for work"
|
70 |
-
strength=0.65
|
71 |
-
guidance_scale=10
|
72 |
-
num_samples = 1
|
73 |
-
n_iter = 1
|
74 |
-
|
75 |
-
generator = torch.Generator(device="cuda").manual_seed(random.randint(0, 1000000)) # change the seed to get different results
|
76 |
-
all_images = inpaint(p, init_image, mask_image, strength=strength, guidance_scale=guidance_scale, generator=generator, num_samples=num_samples, n_iter=n_iter)
|
77 |
-
return all_images
|
78 |
-
|
79 |
-
def preprocess_image(image):
|
80 |
-
w, h = image.size
|
81 |
-
w, h = map(lambda x: x - x % 32, (w, h)) # resize to integer multiple of 32
|
82 |
-
image = image.resize((w, h), resample=PIL.Image.LANCZOS)
|
83 |
-
image = np.array(image).astype(np.float32) / 255.0
|
84 |
-
image = image[None].transpose(0, 3, 1, 2)
|
85 |
-
image = torch.from_numpy(image)
|
86 |
-
return 2.0 * image - 1.0
|
87 |
-
|
88 |
-
def preprocess_mask(mask):
|
89 |
-
mask=mask.convert("L")
|
90 |
-
w, h = mask.size
|
91 |
-
w, h = map(lambda x: x - x % 32, (w, h)) # resize to integer multiple of 32
|
92 |
-
mask = mask.resize((w//8, h//8), resample=PIL.Image.NEAREST)
|
93 |
-
mask = np.array(mask).astype(np.float32) / 255.0
|
94 |
-
mask = np.tile(mask,(4,1,1))
|
95 |
-
mask = mask[None].transpose(0, 1, 2, 3)#what does this step do?
|
96 |
-
mask = 1 - mask #repaint white, keep black
|
97 |
-
mask = torch.from_numpy(mask)
|
98 |
-
return mask
|
99 |
-
|
100 |
-
|
101 |
class StableDiffusionInpaintingPipeline(DiffusionPipeline):
|
102 |
def __init__(
|
103 |
self,
|
@@ -257,7 +177,84 @@ class StableDiffusionInpaintingPipeline(DiffusionPipeline):
|
|
257 |
image = self.numpy_to_pil(image)
|
258 |
|
259 |
return {"sample": image, "nsfw_content_detected": has_nsfw_concept}
|
260 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
261 |
# accept an image input
|
262 |
# trigger the set of functions to occur => identify face, generate mask, save the inverted face mask, sample for the inverted images
|
263 |
# output the sampled images
|
|
|
15 |
import gradio as gr
|
16 |
import random
|
17 |
|
|
|
|
|
18 |
device = "cuda"
|
19 |
model_path = "CompVis/stable-diffusion-v1-4"
|
20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
class StableDiffusionInpaintingPipeline(DiffusionPipeline):
|
22 |
def __init__(
|
23 |
self,
|
|
|
177 |
image = self.numpy_to_pil(image)
|
178 |
|
179 |
return {"sample": image, "nsfw_content_detected": has_nsfw_concept}
|
180 |
+
|
181 |
+
pipe = StableDiffusionInpaintingPipeline.from_pretrained(
|
182 |
+
model_path,
|
183 |
+
revision="fp16",
|
184 |
+
torch_dtype=torch.float16,
|
185 |
+
use_auth_token=os.environ.get("access_token")).to(device)
|
186 |
+
|
187 |
+
import gdown
|
188 |
+
def download_gdrive_url():
|
189 |
+
url = 'https://drive.google.com/u/0/uc?id=1PPO2MCttsmSqyB-vKh5C7SumwFKuhgyj&export=download'
|
190 |
+
output = 'haarcascade_frontalface_default.xml'
|
191 |
+
gdown.download(url, output, quiet=False)
|
192 |
+
|
193 |
+
from torch import autocast
|
194 |
+
def inpaint(p, init_image, mask_image=None, strength=0.75, guidance_scale=7.5, generator=None, num_samples=1, n_iter=1):
|
195 |
+
all_images = []
|
196 |
+
for _ in range(n_iter):
|
197 |
+
with autocast("cuda"):
|
198 |
+
images = pipe(
|
199 |
+
prompt=[p] * num_samples,
|
200 |
+
init_image=init_image,
|
201 |
+
mask_image=mask_image,
|
202 |
+
strength=strength,
|
203 |
+
guidance_scale=guidance_scale,
|
204 |
+
generator=generator,
|
205 |
+
num_inference_steps=75
|
206 |
+
)["sample"]
|
207 |
+
all_images.extend(images)
|
208 |
+
print(len(all_images))
|
209 |
+
return all_images[0]
|
210 |
+
|
211 |
+
def identify_face(user_image):
|
212 |
+
img = cv2.imread(user_image.name) # read the resized image in cv2
|
213 |
+
print(img.shape)
|
214 |
+
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # convert to grayscale
|
215 |
+
download_gdrive_url() #download the haarcascade face recognition stuff
|
216 |
+
haar_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
|
217 |
+
faces_rect = haar_cascade.detectMultiScale(gray_img, scaleFactor=1.1, minNeighbors=9)
|
218 |
+
for (x, y, w, h) in faces_rect[:1]:
|
219 |
+
mask = np.zeros(img.shape[:2], dtype="uint8")
|
220 |
+
print(mask.shape)
|
221 |
+
cv2.rectangle(mask, (x, y), (x+w, y+h), 255, -1)
|
222 |
+
print(mask.shape)
|
223 |
+
inverted_image = cv2.bitwise_not(mask)
|
224 |
+
return inverted_image
|
225 |
+
|
226 |
+
def sample_images(init_image, mask_image):
|
227 |
+
p = "4K UHD professional profile picture of a person wearing a suit for work"
|
228 |
+
strength=0.65
|
229 |
+
guidance_scale=10
|
230 |
+
num_samples = 1
|
231 |
+
n_iter = 1
|
232 |
+
|
233 |
+
generator = torch.Generator(device="cuda").manual_seed(random.randint(0, 1000000)) # change the seed to get different results
|
234 |
+
all_images = inpaint(p, init_image, mask_image, strength=strength, guidance_scale=guidance_scale, generator=generator, num_samples=num_samples, n_iter=n_iter)
|
235 |
+
return all_images
|
236 |
+
|
237 |
+
def preprocess_image(image):
|
238 |
+
w, h = image.size
|
239 |
+
w, h = map(lambda x: x - x % 32, (w, h)) # resize to integer multiple of 32
|
240 |
+
image = image.resize((w, h), resample=PIL.Image.LANCZOS)
|
241 |
+
image = np.array(image).astype(np.float32) / 255.0
|
242 |
+
image = image[None].transpose(0, 3, 1, 2)
|
243 |
+
image = torch.from_numpy(image)
|
244 |
+
return 2.0 * image - 1.0
|
245 |
+
|
246 |
+
def preprocess_mask(mask):
|
247 |
+
mask=mask.convert("L")
|
248 |
+
w, h = mask.size
|
249 |
+
w, h = map(lambda x: x - x % 32, (w, h)) # resize to integer multiple of 32
|
250 |
+
mask = mask.resize((w//8, h//8), resample=PIL.Image.NEAREST)
|
251 |
+
mask = np.array(mask).astype(np.float32) / 255.0
|
252 |
+
mask = np.tile(mask,(4,1,1))
|
253 |
+
mask = mask[None].transpose(0, 1, 2, 3)#what does this step do?
|
254 |
+
mask = 1 - mask #repaint white, keep black
|
255 |
+
mask = torch.from_numpy(mask)
|
256 |
+
return mask
|
257 |
+
|
258 |
# accept an image input
|
259 |
# trigger the set of functions to occur => identify face, generate mask, save the inverted face mask, sample for the inverted images
|
260 |
# output the sampled images
|