Spaces:
Sleeping
Sleeping
File size: 4,393 Bytes
8141c16 2ffd8bc 3c9d565 c731ccc 319cffc c731ccc 7cbee76 c731ccc 17aaf2d 319cffc 568fb5d 319cffc 2ffd8bc 17aaf2d 3c9d565 6f8a75f 2ffd8bc c731ccc 2ffd8bc 319cffc 2ffd8bc 319cffc 2ffd8bc 319cffc 2ffd8bc 319cffc c731ccc 319cffc 2ffd8bc 3c9d565 2ffd8bc 319cffc 2ffd8bc 3c9d565 49c3033 319cffc 17aaf2d 2ffd8bc c731ccc 2ffd8bc c731ccc 2ffd8bc c731ccc 319cffc 3c9d565 319cffc 3c9d565 319cffc 2ffd8bc 319cffc 2ffd8bc 3c9d565 2ffd8bc 3c9d565 2ffd8bc 17aaf2d 2ffd8bc 17aaf2d 319cffc 97448a3 c731ccc 2ffd8bc 17aaf2d 319cffc 97448a3 17aaf2d 97448a3 17aaf2d 8e780a8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
import gradio as gr
import spaces
import time
import cv2
from diffusers import AutoPipelineForInpainting
from transformers import pipeline
from ultralytics import YOLO
from PIL import Image
import numpy as np
import torch
import base64
from io import BytesIO
import difflib
# Constants
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
# Helper functions
def image_to_base64(image: Image.Image):
buffered = BytesIO()
image.save(buffered, format="JPEG")
return base64.b64encode(buffered.getvalue()).decode("utf-8")
def get_most_similar_string(target_string, string_array):
best_match = string_array[0]
best_match_ratio = 0
for candidate_string in string_array:
similarity_ratio = difflib.SequenceMatcher(None, target_string, candidate_string).ratio()
if similarity_ratio > best_match_ratio:
best_match = candidate_string
best_match_ratio = similarity_ratio
return best_match
# Load YOLO model on CPU
yoloModel = YOLO('yolov8x-seg.pt')
yoloModel.to('cpu')
def getClasses(model, img1):
results = model([img1])
out = []
for r in results:
im_array = r.plot()
out.append(r)
return r, im_array[..., ::-1], results
def getMasks(out):
allout = {}
class_masks = {}
for a in out:
class_name = a['name']
mask = a['img']
if class_name in class_masks:
class_masks[class_name] = Image.fromarray(
np.maximum(np.array(class_masks[class_name]), np.array(mask))
)
else:
class_masks[class_name] = mask
for class_name, mask in class_masks.items():
allout[class_name] = mask
return allout
def joinClasses(classes):
out = []
for r in classes:
masks = r.masks
name0 = r.names[int(r.boxes.cls.cpu().numpy()[0])]
mask1 = masks[0]
mask = mask1.data[0].cpu().numpy()
mask_normalized = ((mask - mask.min()) * (255 / (mask.max() - mask.min()))).astype(np.uint8)
kernel = np.ones((10, 10), np.uint8)
mask_with_border = cv2.dilate(mask_normalized, kernel, iterations=1)
mask_img = Image.fromarray(mask_with_border, "L")
out.append({'name': name0, 'img': mask_img})
allMask = getMasks(out)
return allMask
def getSegments(yoloModel, img1):
classes, image, results1 = getClasses(yoloModel, img1)
im = Image.fromarray(image)
im.save('classes.jpg')
allMask = joinClasses(classes)
return allMask
@spaces.GPU
def getDescript(img1):
image_captioner = pipeline("image-to-text", model="Abdou/vit-swin-base-224-gpt2-image-captioning", device='cuda')
base64_img = image_to_base64(img1)
caption = image_captioner(base64_img)[0]['generated_text']
return caption
def rmGPT(caption, remove_class, change):
arstr = caption.split(' ')
popular = get_most_similar_string(remove_class, arstr)
ind = arstr.index(popular)
if len(change) < 3:
new = []
rng = round(len(arstr) / 5)
for i in range(len(arstr)):
if i not in list(range(ind - rng, ind + rng)):
new.append(arstr[i])
return ' '.join(new)
else:
arstr[ind] = change
return ' '.join(arstr)
@spaces.GPU
def ChangeOBJ(img1, response, mask1):
sdxl = AutoPipelineForInpainting.from_pretrained(
"diffusers/stable-diffusion-xl-1.0-inpainting-0.1", torch_dtype=torch.float16
).to('cuda')
size = img1.size
image = sdxl(prompt=response, image=img1, mask_image=mask1).images[0]
return image.resize((size[0], size[1]))
def full_pipeline(image, target, change):
img1 = Image.fromarray(image.astype('uint8'), 'RGB')
allMask = getSegments(yoloModel, img1)
target_to_remove = get_most_similar_string(target, list(allMask.keys()))
caption = getDescript(img1)
response = rmGPT(caption, target_to_remove, change)
mask1 = allMask[target_to_remove]
remimg = ChangeOBJ(img1, response, mask1)
return remimg, caption, response
iface = gr.Interface(
fn=full_pipeline,
inputs=[
gr.Image(label="Upload Image"),
gr.Textbox(label="What to delete?"),
gr.Textbox(label="Change?"),
],
outputs=[
gr.Image(label="Result Image", type="numpy"),
gr.Textbox(label="Caption"),
gr.Textbox(label="Message"),
],
live=False
)
iface.launch(share=True)
|