Spaces:

nielsr
/

text-based-inpainting

Runtime error

File size: 2,825 Bytes

23f0a7f
 
1123e86
23f0a7f
 
 
e7c961c
23f0a7f
 
b18c116
 
1123e86
 
23f0a7f
 
 
 
 
 
 
 
 
 
1123e86
 
23f0a7f
 
 
 
 
3b2cfcf
 
 
 
 
 
 
 
 
 
 
 
 
23f0a7f
 
3b2cfcf
23f0a7f
 
 
 
 
 
 
 
 
 
 
 
 
55896ac
23f0a7f

from PIL import Image
import requests
import os

from transformers import CLIPSegProcessor, CLIPSegForImageSegmentation
from diffusers import DiffusionPipeline
import torch
from torch import autocast

import gradio as gr

auth_token = os.environ.get("API_TOKEN") or True

url = "https://github.com/timojl/clipseg/blob/master/example_image.jpg?raw=true"
image = Image.open(requests.get(url, stream=True).raw)

processor = CLIPSegProcessor.from_pretrained("CIDAS/clipseg-rd64-refined")
model = CLIPSegForImageSegmentation.from_pretrained("CIDAS/clipseg-rd64-refined")

pipe = DiffusionPipeline.from_pretrained(
    "runwayml/stable-diffusion-inpainting",
    custom_pipeline="text_inpainting",
    segmentation_model=model,
    segmentation_processor=processor,
    use_auth_token=auth_token,
)

device = "cuda" if torch.cuda.is_available() else "cpu"
pipe = pipe.to(device)

def pad_image(image):
    w, h = image.size
    if w == h:
        return image
    elif w > h:
        new_image = Image.new(image.mode, (w, w), (0, 0, 0))
        new_image.paste(image, (0, (w - h) // 2))
        return new_image
    else:
        new_image = Image.new(image.mode, (h, h), (0, 0, 0))
        new_image.paste(image, ((h - w) // 2, 0))
        return new_image


def process_image(image, text, prompt):
  image = pad_image(image)
  image = image.resize((512, 512))
  with autocast("cuda"):
      inpainted_image = pipe(image=image, text=text, prompt=prompt).images[0]
  return inpainted_image
 

title = "Interactive demo: Text-based inpainting with CLIPSeg x Stable Diffusion"
description = "Demo for using CLIPSeg, a CLIP-based model for zero- and one-shot image segmentation. This model can be used to segment things in an image based on text. This way, one can use it to provide a binary mask for Stable Diffusion, which the latter needs to inpaint. To use it, simply upload an image and add a text to mask as well as a text which indicates what to replace, or use one of the examples below and click 'submit'. Results will show up in a few seconds."
article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2112.10003'>CLIPSeg: Image Segmentation Using Text and Image Prompts</a> | <a href='https://huggingface.co/docs/transformers/main/en/model_doc/clipseg'>HuggingFace docs</a></p>"

examples = [["example_image.png", "a glass", "a cup"]]
   
interface = gr.Interface(fn=process_image, 
                     inputs=[gr.Image(type="pil"), gr.Textbox(label="What's the thing you want to replace?"), gr.Textbox(label="What do you want as replacement?")], 
                     outputs=gr.Image(type="pil"),
                     title=title,
                     description=description,
                     article=article,
                     examples=examples)
                     
interface.launch(debug=True)