Spaces:
Running
on
Zero
Running
on
Zero
from __future__ import annotations | |
import pathlib | |
import gradio as gr | |
import torch | |
import os | |
import PIL | |
import torchvision.transforms as T | |
import torch.nn.functional as F | |
import numpy as np | |
import cv2 | |
import matplotlib.pyplot as plt | |
from typing import Any | |
from transformers import ( | |
CLIPTextModelWithProjection, | |
CLIPVisionModelWithProjection, | |
CLIPImageProcessor, | |
CLIPTokenizer, | |
) | |
from transformers import CLIPTokenizer | |
from src.priors.lambda_prior_transformer import ( | |
PriorTransformer, | |
) # original huggingface prior transformer without time conditioning | |
from src.pipelines.pipeline_kandinsky_subject_prior import KandinskyPriorPipeline | |
from diffusers import DiffusionPipeline | |
from PIL import Image | |
import random | |
import spaces | |
__device__ = "cuda" | |
__dtype__ = torch.float16 | |
class Model: | |
def __init__(self): | |
self.device = __device__ | |
self.text_encoder = ( | |
CLIPTextModelWithProjection.from_pretrained( | |
"laion/CLIP-ViT-bigG-14-laion2B-39B-b160k", | |
projection_dim=1280, | |
torch_dtype=__dtype__, | |
) | |
.eval() | |
.requires_grad_(False) | |
).to(self.device) | |
self.tokenizer = CLIPTokenizer.from_pretrained( | |
"laion/CLIP-ViT-bigG-14-laion2B-39B-b160k", | |
) | |
prior = PriorTransformer.from_pretrained( | |
"ECLIPSE-Community/Lambda-ECLIPSE-Prior-v1.0", | |
torch_dtype=__dtype__, | |
) | |
self.pipe_prior = KandinskyPriorPipeline.from_pretrained( | |
"kandinsky-community/kandinsky-2-2-prior", | |
prior=prior, | |
torch_dtype=__dtype__, | |
).to(self.device) | |
self.pipe = DiffusionPipeline.from_pretrained( | |
"kandinsky-community/kandinsky-2-2-decoder", torch_dtype=__dtype__ | |
).to(self.device) | |
def inference(self, raw_data, seed): | |
if seed is None: | |
seed = random.randint(0, 10000000) | |
generator = torch.Generator(device="cuda").manual_seed(seed) | |
image_emb, negative_image_emb = self.pipe_prior( | |
raw_data=raw_data, | |
generator=generator, | |
).to_tuple() | |
image = self.pipe( | |
image_embeds=image_emb, | |
negative_image_embeds=negative_image_emb, | |
num_inference_steps=50, | |
guidance_scale=7.5, | |
generator=generator, | |
).images[0] | |
return image | |
def run( | |
self, | |
image: dict[str, PIL.Image.Image], | |
keyword: str, | |
image2: dict[str, PIL.Image.Image], | |
keyword2: str, | |
text: str, | |
seed: int, | |
): | |
sub_imgs = [image["composite"]] | |
sun_keywords = [keyword] | |
if keyword2 and keyword2 != "no subject": | |
sun_keywords.append(keyword2) | |
if image2: | |
sub_imgs.append(image2["composite"]) | |
raw_data = { | |
"prompt": text, | |
"subject_images": sub_imgs, | |
"subject_keywords": sun_keywords, | |
} | |
image = self.inference(raw_data, seed) | |
return image | |
def create_demo(): | |
USAGE = """## To run the demo, you should: | |
1. Upload your image. | |
2. <span style='color: red;'>**Upload a masked subject image with white blankspace or whiten out manually using brush tool.** | |
3. Input a Keyword i.e. 'Dog' | |
4. For MultiSubject personalization, | |
4-1. Upload another image. | |
4-2. Input the Keyword i.e. 'Sunglasses' | |
3. Input proper text prompts, such as "A photo of Dog" or "A Dog wearing sunglasses", Please use the same keyword in the prompt. | |
4. Click the Run button. | |
""" | |
model = Model() | |
with gr.Blocks() as demo: | |
gr.HTML( | |
"""<h1 style="text-align: center;"><b><i>λ-ECLIPSE</i>: Multi-Concept Personalized Text-to-Image Diffusion Models by Leveraging CLIP Latent Space</b></h1> | |
<h1 style='text-align: center;'><a href='https://eclipse-t2i.github.io/Lambda-ECLIPSE/'>Project Page</a> | <a href='#'>Paper</a> </h1> | |
<p style="text-align: center; color: red;">This demo is currently hosted on either a small GPU or CPU. We will soon provide high-end GPU support.</p> | |
<p style="text-align: center; color: red;">Please follow the instructions from here to run it locally: <a href="https://github.com/eclipse-t2i/lambda-eclipse-inference">GitHub Inference Code</a></p> | |
<a href="https://colab.research.google.com/drive/1VcqzXZmilntec3AsIyzCqlstEhX4Pa1o?usp=sharing" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a> | |
""" | |
) | |
gr.Markdown(USAGE) | |
with gr.Row(): | |
with gr.Column(): | |
with gr.Group(): | |
gr.Markdown( | |
"Upload your first masked subject image or mask out marginal space" | |
) | |
image = gr.ImageEditor( | |
label="Input", | |
type="pil", | |
brush=gr.Brush(colors=["#FFFFFF"], color_mode="fixed"), | |
) | |
keyword = gr.Text( | |
label="Keyword", | |
placeholder='e.g. "Dog", "Goofie"', | |
info="Keyword for first subject", | |
) | |
gr.Markdown( | |
"For Multi-Subject generation : Upload your second masked subject image or mask out marginal space" | |
) | |
image2 = gr.ImageEditor( | |
label="Input", | |
type="pil", | |
brush=gr.Brush(colors=["#FFFFFF"], color_mode="fixed"), | |
) | |
keyword2 = gr.Text( | |
label="Keyword", | |
placeholder='e.g. "Sunglasses", "Grand Canyon"', | |
info="Keyword for second subject", | |
) | |
prompt = gr.Text( | |
label="Prompt", | |
placeholder='e.g. "A photo of dog", "A dog wearing sunglasses"', | |
info="Keep the keywords used previously in the prompt", | |
) | |
run_button = gr.Button("Run") | |
with gr.Column(): | |
result = gr.Image(label="Result") | |
inputs = [ | |
image, | |
keyword, | |
image2, | |
keyword2, | |
prompt, | |
] | |
gr.Examples( | |
examples=[ | |
[ | |
os.path.join(os.path.dirname(__file__), "./assets/luffy.jpg"), | |
"luffy", | |
os.path.join(os.path.dirname(__file__), "./assets/white.jpg"), | |
"no subject", | |
"luffy holding a sword", | |
], | |
[ | |
os.path.join(os.path.dirname(__file__), "./assets/luffy.jpg"), | |
"luffy", | |
os.path.join(os.path.dirname(__file__), "./assets/white.jpg"), | |
"no subject", | |
"luffy in the living room", | |
], | |
[ | |
os.path.join(os.path.dirname(__file__), "./assets/teapot.jpg"), | |
"teapot", | |
os.path.join(os.path.dirname(__file__), "./assets/white.jpg"), | |
"no subject", | |
"teapot on a cobblestone street", | |
], | |
[ | |
os.path.join(os.path.dirname(__file__), "./assets/trex.jpg"), | |
"trex", | |
os.path.join(os.path.dirname(__file__), "./assets/white.jpg"), | |
"no subject", | |
"trex near a river", | |
], | |
[ | |
os.path.join(os.path.dirname(__file__), "./assets/cat.png"), | |
"cat", | |
os.path.join( | |
os.path.dirname(__file__), "./assets/blue_sunglasses.png" | |
), | |
"glasses", | |
"A cat wearing glasses on a snowy field", | |
], | |
[ | |
os.path.join(os.path.dirname(__file__), "./assets/statue.jpg"), | |
"statue", | |
os.path.join(os.path.dirname(__file__), "./assets/toilet.jpg"), | |
"toilet", | |
"statue sitting on a toilet", | |
], | |
[ | |
os.path.join(os.path.dirname(__file__), "./assets/teddy.jpg"), | |
"teddy", | |
os.path.join(os.path.dirname(__file__), "./assets/luffy_hat.jpg"), | |
"hat", | |
"a teddy wearing the hat at a beach", | |
], | |
[ | |
os.path.join(os.path.dirname(__file__), "./assets/chair.jpg"), | |
"chair", | |
os.path.join(os.path.dirname(__file__), "./assets/table.jpg"), | |
"table", | |
"a chair and table in living room", | |
], | |
], | |
inputs=inputs, | |
fn=model.run, | |
outputs=result, | |
) | |
run_button.click(fn=model.run, inputs=inputs, outputs=result) | |
return demo | |
if __name__ == "__main__": | |
demo = create_demo() | |
demo.queue(max_size=20).launch() | |