import gradio as gr
from PIL import Image
from io import BytesIO
import torch
import os
from diffusers import DiffusionPipeline, DDIMScheduler
MY_SECRET_TOKEN=os.environ.get('HF_TOKEN_SD')
has_cuda = torch.cuda.is_available()
device = "cuda"
pipe = DiffusionPipeline.from_pretrained(
"CompVis/stable-diffusion-v1-4",
safety_checker=None,
use_auth_token=MY_SECRET_TOKEN,
custom_pipeline="imagic_stable_diffusion",
scheduler = DDIMScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", clip_sample=False, set_alpha_to_one=False)
).to(device)
generator = torch.Generator("cuda").manual_seed(0)
def infer(prompt, init_image):
init_image = Image.open(init_image).convert("RGB")
init_image = init_image.resize((512, 512))
res = pipe.train(
prompt,
init_image,
guidance_scale=7.5,
num_inference_steps=50,
generator=generator)
res = pipe(alpha=1)
return res.images[0]
title = """
Imagic Stable Diffusion • Community Pipeline
Text-Based Real Image Editing with Diffusion Models
This pipeline aims to implement this paper to Stable Diffusion, allowing for real-world image editing.
You can skip the queue by duplicating this space:
"""
article = """
"""
css = '''
#col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
a {text-decoration-line: underline; font-weight: 600;}
.footer {
margin-bottom: 45px;
margin-top: 35px;
text-align: center;
border-bottom: 1px solid #e5e5e5;
}
.footer>p {
font-size: .8rem;
display: inline-block;
padding: 0 10px;
transform: translateY(10px);
background: white;
}
.dark .footer {
border-color: #303030;
}
.dark .footer>p {
background: #0b0f19;
}
'''
with gr.Blocks(css=css) as block:
with gr.Column(elem_id="col-container"):
gr.HTML(title)
prompt_input = gr.Textbox(label="Target text", placeholder="Describe the image with what you want to change about the subject")
image_init = gr.Image(source="upload", type="filepath",label="Input Image")
submit_btn = gr.Button("Submit")
image_output = gr.Image(label="Edited image")
gr.HTML(article)
submit_btn.click(fn=infer, inputs=[prompt_input,image_init], outputs=[image_output])
block.queue(max_size=12).launch(show_api=False)