Spaces:
Running
Running
# Prediction interface for Cog ⚙️ | |
# https://github.com/replicate/cog/blob/main/docs/python.md | |
from cog import BasePredictor, Input, Path | |
import torch | |
import numpy as np | |
import random | |
import os | |
from PIL import Image | |
import logging | |
import time | |
from typing import List | |
import shutil | |
from diffusers.utils import load_image | |
from diffusers import EulerDiscreteScheduler | |
from photomaker.pipeline import PhotoMakerStableDiffusionXLPipeline | |
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s %(message)s") | |
logger = logging.getLogger(__name__) | |
base_model_path = 'SG161222/RealVisXL_V3.0' | |
photomaker_path = 'release_model/photomaker-v1.bin' | |
device = "cuda" | |
class Predictor(BasePredictor): | |
def setup(self) -> None: | |
"""Load the model into memory to make running multiple predictions efficient""" | |
start = time.time() | |
logger.info("Loading model...") | |
self.pipe = PhotoMakerStableDiffusionXLPipeline.from_pretrained( | |
base_model_path, | |
torch_dtype=torch.bfloat16, | |
use_safetensors=True, | |
variant="fp16" | |
).to(device) | |
self.pipe.load_photomaker_adapter( | |
os.path.dirname(photomaker_path), | |
subfolder="", | |
weight_name=os.path.basename(photomaker_path), | |
trigger_word="img" | |
) | |
self.pipe.scheduler = EulerDiscreteScheduler.from_config(self.pipe.scheduler.config) | |
self.pipe.fuse_lora() | |
logger.info(f"Loaded model in {time.time() - start:.06}s") | |
def _load_image(self, path): | |
shutil.copyfile(path, "/tmp/image.png") | |
return load_image("/tmp/image.png").convert("RGB") | |
def predict( | |
self, | |
prompt: str = Input( | |
description="Input prompt", | |
default="sci-fi, closeup portrait photo of a man img wearing the sunglasses in Iron man suit, face, slim body, high quality, film grain" | |
), | |
negative_prompt: str = Input( | |
description="Negative Input prompt", | |
default="(asymmetry, worst quality, low quality, illustration, 3d, 2d, painting, cartoons, sketch), open mouth" | |
), | |
image: Path = Input( | |
description="Input image for img2img or inpaint mode", | |
default=None, | |
), | |
seed: int = Input( | |
description="Random seed. Leave blank to randomize the seed", default=None | |
), | |
num_outputs: int = Input( | |
description="Number of images to output.", | |
ge=1, | |
le=4, | |
default=1, | |
), | |
num_inference_steps: int = Input( | |
description="Number of denoising steps", ge=1, le=500, default=40 | |
) | |
) -> List[Path]: | |
"""Run a single prediction on the model""" | |
if seed is None: | |
seed = int.from_bytes(os.urandom(4), "big") | |
logger.info(f"Using seed: {seed}") | |
generator = torch.Generator("cuda").manual_seed(seed) | |
style_strength_ratio = 20 | |
start_merge_step = int(float(style_strength_ratio) / 100 * num_inference_steps) | |
if start_merge_step > 30: | |
start_merge_step = 30 | |
images = self.pipe( | |
prompt=prompt, | |
input_id_images=[self._load_image(image)], | |
negative_prompt=negative_prompt, | |
num_images_per_prompt=num_outputs, | |
num_inference_steps=num_inference_steps, | |
start_merge_step=start_merge_step, | |
generator=generator, | |
).images | |
output_paths = [] | |
for i, image in enumerate(images): | |
output_path = f"/tmp/out-{i}.png" | |
image.save(output_path) | |
output_paths.append(Path(output_path)) | |
return output_paths |