Spaces:
Runtime error
Runtime error
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import csv
|
2 |
+
import gradio as gr
|
3 |
+
import glob
|
4 |
+
import pprint as pp
|
5 |
+
from sys import excepthook
|
6 |
+
from re import T
|
7 |
+
from urllib.parse import parse_qs, urlparse
|
8 |
+
import clip
|
9 |
+
import numpy as np
|
10 |
+
import requests
|
11 |
+
import torch
|
12 |
+
from sklearn.utils.extmath import softmax
|
13 |
+
import io
|
14 |
+
|
15 |
+
|
16 |
+
from IPython.display import Image, display
|
17 |
+
from PIL import Image, ImageFont
|
18 |
+
import os
|
19 |
+
import cv2
|
20 |
+
import torch
|
21 |
+
import glob
|
22 |
+
|
23 |
+
# Model
|
24 |
+
|
25 |
+
def predict(text,img):
|
26 |
+
model = torch.hub.load('ultralytics/yolov5', 'yolov5s')
|
27 |
+
img = cv2.imread(img)
|
28 |
+
results = model(img1)
|
29 |
+
dirpath = tempfile.mkdtemp()
|
30 |
+
results.crop(save_dir=dirpath)
|
31 |
+
path= dirpath+'/crops/**/*.jpg'
|
32 |
+
txtfiles = []
|
33 |
+
for file in glob.glob(path):
|
34 |
+
txtfiles.append(file)
|
35 |
+
|
36 |
+
import ipyplot
|
37 |
+
from PIL import Image
|
38 |
+
l = []
|
39 |
+
#keyList = list(range(len(txtfiles)))
|
40 |
+
for filename in glob.glob(path):
|
41 |
+
foo = Image.open(filename).convert('RGB')
|
42 |
+
#resized_image = foo.resize((250,250))
|
43 |
+
l.append(foo)
|
44 |
+
|
45 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
46 |
+
model, preprocess = clip.load("ViT-B/32", device=device)
|
47 |
+
|
48 |
+
images = torch.stack([preprocess(im) for im in l]).to(device)
|
49 |
+
with torch.no_grad():
|
50 |
+
image_features = model.encode_image(images)
|
51 |
+
image_features /= image_features.norm(dim=-1, keepdim=True)
|
52 |
+
|
53 |
+
image_features.cpu().numpy()
|
54 |
+
|
55 |
+
image_mean = torch.tensor([0.48145466, 0.4578275, 0.40821073]).cuda()
|
56 |
+
image_std = torch.tensor([0.26862954, 0.26130258, 0.27577711]).cuda()
|
57 |
+
|
58 |
+
images = [preprocess(im) for im in l]
|
59 |
+
image_input = torch.tensor(np.stack(images)).cuda()
|
60 |
+
image_input -= image_mean[:, None, None]
|
61 |
+
image_input /= image_std[:, None, None]
|
62 |
+
with torch.no_grad():
|
63 |
+
image_features = model.encode_image(image_input).float()
|
64 |
+
image_features /= image_features.norm(dim=-1, keepdim=True)
|
65 |
+
|
66 |
+
def get_top_N_semantic_similarity(similarity_list,N):
|
67 |
+
results = zip(range(len(similarity_list)), similarity_list)
|
68 |
+
results = sorted(results, key=lambda x: x[1],reverse= True)
|
69 |
+
top_N_images = []
|
70 |
+
scores=[]
|
71 |
+
for index,score in results[:N]:
|
72 |
+
scores.append(score)
|
73 |
+
top_N_images.append(l[index])
|
74 |
+
return scores,top_N_images
|
75 |
+
|
76 |
+
search_query = "White car"
|
77 |
+
|
78 |
+
with torch.no_grad():
|
79 |
+
# Encode and normalize the description using CLIP
|
80 |
+
text_encoded = model.encode_text(clip.tokenize(search_query).to(device))
|
81 |
+
text_encoded /= text_encoded.norm(dim=-1, keepdim=True)
|
82 |
+
|
83 |
+
similarity = text_encoded.cpu().numpy() @ image_features.cpu().numpy().T
|
84 |
+
similarity = similarity[0]
|
85 |
+
scores,imgs= get_top_N_semantic_similarity(similarity,N=1)
|
86 |
+
#print ("scores ",scores)
|
87 |
+
#ipyplot.plot_images(imgs,img_width=350)
|
88 |
+
return imgs
|
89 |
+
|
90 |
+
text = gr.inputs.Textbox(lines=5, label="Context")
|
91 |
+
img = gr.inputs.Image()
|
92 |
+
|
93 |
+
|
94 |
+
gr_interface = gr.Interface(fn=predict, [img, text], outputs="image", title='Search inside image').launch();
|