Spaces:
Runtime error
Runtime error
File size: 2,849 Bytes
008bbdb c64f929 be31d6b 008bbdb ec5658e 008bbdb ce80bdb 008bbdb 0d6916b 008bbdb 7b13a4f 008bbdb 21ef9b6 008bbdb c64f929 008bbdb 6217eeb 008bbdb c64f929 415d5da 008bbdb 3e99bd6 008bbdb e1ccc30 3e99bd6 c64f929 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
import csv
import gradio as gr
import glob
import pprint as pp
from sys import excepthook
from re import T
from urllib.parse import parse_qs, urlparse
import clip
import numpy as np
import requests
import torch
import io
from IPython.display import Image, display
from PIL import Image, ImageFont
import os
import cv2
import torch
import glob
# Model
def predict(img,text):
import tempfile
model = torch.hub.load('ultralytics/yolov5', 'yolov5s')
results = model(img)
dirpath = tempfile.mkdtemp()
results.crop(save_dir=dirpath)
path= dirpath+'/crops/**/*.jpg'
txtfiles = []
for file in glob.glob(path):
txtfiles.append(file)
from PIL import Image
l = []
#keyList = list(range(len(txtfiles)))
for filename in glob.glob(path):
foo = Image.open(filename).convert('RGB')
#resized_image = foo.resize((250,250))
l.append(foo)
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)
images = torch.stack([preprocess(im) for im in l]).to(device)
with torch.no_grad():
image_features = model.encode_image(images)
image_features /= image_features.norm(dim=-1, keepdim=True)
image_features.cpu().numpy()
image_mean = torch.tensor([0.48145466, 0.4578275, 0.40821073])
image_std = torch.tensor([0.26862954, 0.26130258, 0.27577711])
images = [preprocess(im) for im in l]
image_input = torch.tensor(np.stack(images))
image_input -= image_mean[:, None, None]
image_input /= image_std[:, None, None]
with torch.no_grad():
image_features = model.encode_image(image_input).float()
image_features /= image_features.norm(dim=-1, keepdim=True)
def get_top_N_semantic_similarity(similarity_list,N):
results = zip(range(len(similarity_list)), similarity_list)
results = sorted(results, key=lambda x: x[1],reverse= True)
top_N_images = []
scores=[]
for index,score in results[:N]:
scores.append(score)
top_N_images.append(l[index])
return scores,top_N_images
#search_query = text
with torch.no_grad():
# Encode and normalize the description using CLIP
text_encoded = model.encode_text(clip.tokenize(text).to(device))
text_encoded /= text_encoded.norm(dim=-1, keepdim=True)
similarity = text_encoded.cpu().numpy() @ image_features.cpu().numpy().T
similarity = similarity[0]
scores,imgs= get_top_N_semantic_similarity(similarity,N=1)
#print ("scores ",scores)
#ipyplot.plot_images(imgs,img_width=350)
return imgs[0]
#text = gr.inputs.Textbox(lines=1, label="Text query", placeholder="Introduce the search text...",)
#img = gr.inputs.Image()
#img = "image"
gr.Interface(predict, ["image", gr.inputs.Textbox(lines=1, label="Text query", placeholder="Introduce the search text...",)], outputs="image", title='Search inside image').launch();
|