Upload 2 files
Browse files- cog.yaml +20 -0
- predict.py +250 -0
cog.yaml
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Configuration for Cog ⚙️
|
2 |
+
# Reference: https://github.com/replicate/cog/blob/main/docs/yaml.md
|
3 |
+
|
4 |
+
build:
|
5 |
+
gpu: true
|
6 |
+
cuda: "11.8"
|
7 |
+
python_version: "3.11"
|
8 |
+
python_packages:
|
9 |
+
- "accelerate==0.25.0"
|
10 |
+
- "diffusers==0.24.0"
|
11 |
+
- "safetensors==0.4.1"
|
12 |
+
- "torch==2.0.1"
|
13 |
+
- "torchvision==0.15.2"
|
14 |
+
- "transformers==4.36.0"
|
15 |
+
|
16 |
+
run:
|
17 |
+
- curl -o /usr/local/bin/pget -L "https://github.com/replicate/pget/releases/download/v0.3.1/pget" && chmod +x /usr/local/bin/pget
|
18 |
+
|
19 |
+
# predict.py defines how predictions are run on your model
|
20 |
+
predict: "predict.py:Predictor"
|
predict.py
ADDED
@@ -0,0 +1,250 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Prediction interface for Cog ⚙️
|
2 |
+
# https://github.com/replicate/cog/blob/main/docs/python.md
|
3 |
+
|
4 |
+
from cog import BasePredictor, Input, Path
|
5 |
+
import os
|
6 |
+
import time
|
7 |
+
import torch
|
8 |
+
import shutil
|
9 |
+
import subprocess
|
10 |
+
import numpy as np
|
11 |
+
from typing import List
|
12 |
+
from diffusers.utils import load_image
|
13 |
+
from transformers import CLIPImageProcessor
|
14 |
+
from diffusers import (
|
15 |
+
StableDiffusionXLPipeline,
|
16 |
+
StableDiffusionXLImg2ImgPipeline,
|
17 |
+
StableDiffusionXLInpaintPipeline,
|
18 |
+
DDIMScheduler,
|
19 |
+
DPMSolverMultistepScheduler,
|
20 |
+
EulerAncestralDiscreteScheduler,
|
21 |
+
EulerDiscreteScheduler,
|
22 |
+
HeunDiscreteScheduler,
|
23 |
+
PNDMScheduler,
|
24 |
+
KDPM2AncestralDiscreteScheduler,
|
25 |
+
AutoencoderKL
|
26 |
+
)
|
27 |
+
from diffusers.pipelines.stable_diffusion.safety_checker import (
|
28 |
+
StableDiffusionSafetyChecker,
|
29 |
+
)
|
30 |
+
|
31 |
+
MODEL_NAME = "dataautogpt3/ProteusV0.4-Lightning"
|
32 |
+
MODEL_CACHE = "checkpoints"
|
33 |
+
SAFETY_CACHE = "safety-cache"
|
34 |
+
FEATURE_EXTRACTOR = "feature-extractor"
|
35 |
+
SAFETY_URL = "https://weights.replicate.delivery/default/sdxl/safety-1.0.tar"
|
36 |
+
MODEL_URL = "https://weights.replicate.delivery/default/dataautogpt3/proteusv0.4-lightning.tar"
|
37 |
+
|
38 |
+
class KarrasDPM:
|
39 |
+
def from_config(config):
|
40 |
+
return DPMSolverMultistepScheduler.from_config(config, use_karras_sigmas=True)
|
41 |
+
|
42 |
+
SCHEDULERS = {
|
43 |
+
"DDIM": DDIMScheduler,
|
44 |
+
"DPMSolverMultistep": DPMSolverMultistepScheduler,
|
45 |
+
"HeunDiscrete": HeunDiscreteScheduler,
|
46 |
+
"KarrasDPM": KarrasDPM,
|
47 |
+
"K_EULER_ANCESTRAL": EulerAncestralDiscreteScheduler,
|
48 |
+
"K_EULER": EulerDiscreteScheduler,
|
49 |
+
"PNDM": PNDMScheduler,
|
50 |
+
"DPM++2MSDE": KDPM2AncestralDiscreteScheduler,
|
51 |
+
}
|
52 |
+
|
53 |
+
def download_weights(url, dest):
|
54 |
+
start = time.time()
|
55 |
+
print("downloading url: ", url)
|
56 |
+
print("downloading to: ", dest)
|
57 |
+
subprocess.check_call(["pget", "-x", url, dest], close_fds=False)
|
58 |
+
print("downloading took: ", time.time() - start)
|
59 |
+
|
60 |
+
class Predictor(BasePredictor):
|
61 |
+
def setup(self) -> None:
|
62 |
+
"""Load the model into memory to make running multiple predictions efficient"""
|
63 |
+
start = time.time()
|
64 |
+
print("Loading safety checker...")
|
65 |
+
if not os.path.exists(SAFETY_CACHE):
|
66 |
+
download_weights(SAFETY_URL, SAFETY_CACHE)
|
67 |
+
print("Loading model")
|
68 |
+
if not os.path.exists(MODEL_CACHE):
|
69 |
+
download_weights(MODEL_URL, MODEL_CACHE)
|
70 |
+
self.safety_checker = StableDiffusionSafetyChecker.from_pretrained(
|
71 |
+
SAFETY_CACHE, torch_dtype=torch.float16
|
72 |
+
).to("cuda")
|
73 |
+
self.feature_extractor = CLIPImageProcessor.from_pretrained(FEATURE_EXTRACTOR)
|
74 |
+
print("Loading vae")
|
75 |
+
vae = AutoencoderKL.from_pretrained(
|
76 |
+
"madebyollin/sdxl-vae-fp16-fix",
|
77 |
+
torch_dtype=torch.float16
|
78 |
+
)
|
79 |
+
print("Loading txt2img pipeline...")
|
80 |
+
self.txt2img_pipe = StableDiffusionXLPipeline.from_pretrained(
|
81 |
+
MODEL_NAME,
|
82 |
+
vae=vae,
|
83 |
+
torch_dtype=torch.float16,
|
84 |
+
cache_dir=MODEL_CACHE,
|
85 |
+
).to('cuda')
|
86 |
+
print("Loading SDXL img2img pipeline...")
|
87 |
+
self.img2img_pipe = StableDiffusionXLImg2ImgPipeline(
|
88 |
+
vae=self.txt2img_pipe.vae,
|
89 |
+
text_encoder=self.txt2img_pipe.text_encoder,
|
90 |
+
text_encoder_2=self.txt2img_pipe.text_encoder_2,
|
91 |
+
tokenizer=self.txt2img_pipe.tokenizer,
|
92 |
+
tokenizer_2=self.txt2img_pipe.tokenizer_2,
|
93 |
+
unet=self.txt2img_pipe.unet,
|
94 |
+
scheduler=self.txt2img_pipe.scheduler,
|
95 |
+
).to("cuda")
|
96 |
+
print("Loading SDXL inpaint pipeline...")
|
97 |
+
self.inpaint_pipe = StableDiffusionXLInpaintPipeline(
|
98 |
+
vae=self.txt2img_pipe.vae,
|
99 |
+
text_encoder=self.txt2img_pipe.text_encoder,
|
100 |
+
text_encoder_2=self.txt2img_pipe.text_encoder_2,
|
101 |
+
tokenizer=self.txt2img_pipe.tokenizer,
|
102 |
+
tokenizer_2=self.txt2img_pipe.tokenizer_2,
|
103 |
+
unet=self.txt2img_pipe.unet,
|
104 |
+
scheduler=self.txt2img_pipe.scheduler,
|
105 |
+
).to("cuda")
|
106 |
+
print("setup took: ", time.time() - start)
|
107 |
+
|
108 |
+
def load_image(self, path):
|
109 |
+
shutil.copyfile(path, "/tmp/image.png")
|
110 |
+
return load_image("/tmp/image.png").convert("RGB")
|
111 |
+
|
112 |
+
def run_safety_checker(self, image):
|
113 |
+
safety_checker_input = self.feature_extractor(image, return_tensors="pt").to(
|
114 |
+
"cuda"
|
115 |
+
)
|
116 |
+
np_image = [np.array(val) for val in image]
|
117 |
+
image, has_nsfw_concept = self.safety_checker(
|
118 |
+
images=np_image,
|
119 |
+
clip_input=safety_checker_input.pixel_values.to(torch.float16),
|
120 |
+
)
|
121 |
+
return image, has_nsfw_concept
|
122 |
+
|
123 |
+
@torch.inference_mode()
|
124 |
+
def predict(
|
125 |
+
self,
|
126 |
+
prompt: str = Input(
|
127 |
+
description="Input prompt",
|
128 |
+
default="3 fish in a fish tank wearing adorable outfits, best quality, hd"
|
129 |
+
),
|
130 |
+
negative_prompt: str = Input(
|
131 |
+
description="Negative Input prompt",
|
132 |
+
default="nsfw, bad quality, bad anatomy, worst quality, low quality, low resolutions, extra fingers, blur, blurry, ugly, wrongs proportions, watermark, image artifacts, lowres, ugly, jpeg artifacts, deformed, noisy image"
|
133 |
+
),
|
134 |
+
image: Path = Input(
|
135 |
+
description="Input image for img2img or inpaint mode",
|
136 |
+
default=None,
|
137 |
+
),
|
138 |
+
mask: Path = Input(
|
139 |
+
description="Input mask for inpaint mode. Black areas will be preserved, white areas will be inpainted.",
|
140 |
+
default=None,
|
141 |
+
),
|
142 |
+
width: int = Input(
|
143 |
+
description="Width of output image. Recommended 1024 or 1280",
|
144 |
+
default=1024
|
145 |
+
),
|
146 |
+
height: int = Input(
|
147 |
+
description="Height of output image. Recommended 1024 or 1280",
|
148 |
+
default=1024
|
149 |
+
),
|
150 |
+
num_outputs: int = Input(
|
151 |
+
description="Number of images to output.",
|
152 |
+
ge=1,
|
153 |
+
le=4,
|
154 |
+
default=1,
|
155 |
+
),
|
156 |
+
scheduler: str = Input(
|
157 |
+
description="scheduler",
|
158 |
+
choices=SCHEDULERS.keys(),
|
159 |
+
default="K_EULER_ANCESTRAL",
|
160 |
+
),
|
161 |
+
num_inference_steps: int = Input(
|
162 |
+
description="Number of denoising steps", ge=1, le=100, default=8
|
163 |
+
),
|
164 |
+
guidance_scale: float = Input(
|
165 |
+
description="Scale for classifier-free guidance", ge=0, le=10, default=2
|
166 |
+
),
|
167 |
+
prompt_strength: float = Input(
|
168 |
+
description="Prompt strength when using img2img / inpaint. 1.0 corresponds to full destruction of information in image",
|
169 |
+
ge=0.0,
|
170 |
+
le=1.0,
|
171 |
+
default=0.8,
|
172 |
+
),
|
173 |
+
seed: int = Input(
|
174 |
+
description="Random seed. Leave blank to randomize the seed", default=None
|
175 |
+
),
|
176 |
+
apply_watermark: bool = Input(
|
177 |
+
description="Applies a watermark to enable determining if an image is generated in downstream applications. If you have other provisions for generating or deploying images safely, you can use this to disable watermarking.",
|
178 |
+
default=True,
|
179 |
+
),
|
180 |
+
disable_safety_checker: bool = Input(
|
181 |
+
description="Disable safety checker for generated images. This feature is only available through the API. See https://replicate.com/docs/how-does-replicate-work#safety",
|
182 |
+
default=False
|
183 |
+
)
|
184 |
+
) -> List[Path]:
|
185 |
+
"""Run a single prediction on the model"""
|
186 |
+
if seed is None:
|
187 |
+
seed = int.from_bytes(os.urandom(4), "big")
|
188 |
+
print(f"Using seed: {seed}")
|
189 |
+
generator = torch.Generator("cuda").manual_seed(seed)
|
190 |
+
|
191 |
+
sdxl_kwargs = {}
|
192 |
+
print(f"Prompt: {prompt}")
|
193 |
+
if image and mask:
|
194 |
+
print("inpainting mode")
|
195 |
+
sdxl_kwargs["image"] = self.load_image(image)
|
196 |
+
sdxl_kwargs["mask_image"] = self.load_image(mask)
|
197 |
+
sdxl_kwargs["strength"] = prompt_strength
|
198 |
+
sdxl_kwargs["width"] = width
|
199 |
+
sdxl_kwargs["height"] = height
|
200 |
+
pipe = self.inpaint_pipe
|
201 |
+
elif image:
|
202 |
+
print("img2img mode")
|
203 |
+
sdxl_kwargs["image"] = self.load_image(image)
|
204 |
+
sdxl_kwargs["strength"] = prompt_strength
|
205 |
+
pipe = self.img2img_pipe
|
206 |
+
else:
|
207 |
+
print("txt2img mode")
|
208 |
+
sdxl_kwargs["width"] = width
|
209 |
+
sdxl_kwargs["height"] = height
|
210 |
+
pipe = self.txt2img_pipe
|
211 |
+
|
212 |
+
# toggles watermark for this prediction
|
213 |
+
if not apply_watermark:
|
214 |
+
watermark_cache = pipe.watermark
|
215 |
+
pipe.watermark = None
|
216 |
+
|
217 |
+
pipe.scheduler = SCHEDULERS[scheduler].from_config(pipe.scheduler.config)
|
218 |
+
|
219 |
+
common_args = {
|
220 |
+
"prompt": [prompt] * num_outputs,
|
221 |
+
"negative_prompt": [negative_prompt] * num_outputs,
|
222 |
+
"guidance_scale": guidance_scale,
|
223 |
+
"generator": generator,
|
224 |
+
"num_inference_steps": num_inference_steps,
|
225 |
+
}
|
226 |
+
|
227 |
+
output = pipe(**common_args, **sdxl_kwargs)
|
228 |
+
|
229 |
+
if not apply_watermark:
|
230 |
+
pipe.watermark = watermark_cache
|
231 |
+
|
232 |
+
if not disable_safety_checker:
|
233 |
+
_, has_nsfw_content = self.run_safety_checker(output.images)
|
234 |
+
|
235 |
+
output_paths = []
|
236 |
+
for i, image in enumerate(output.images):
|
237 |
+
if not disable_safety_checker:
|
238 |
+
if has_nsfw_content[i]:
|
239 |
+
print(f"NSFW content detected in image {i}")
|
240 |
+
continue
|
241 |
+
output_path = f"/tmp/out-{i}.png"
|
242 |
+
image.save(output_path)
|
243 |
+
output_paths.append(Path(output_path))
|
244 |
+
|
245 |
+
if len(output_paths) == 0:
|
246 |
+
raise Exception(
|
247 |
+
f"NSFW content detected. Try running it again, or try a different prompt."
|
248 |
+
)
|
249 |
+
|
250 |
+
return output_paths
|