Vijish commited on
Commit
008bbdb
1 Parent(s): 0a539a9

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +94 -0
app.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import csv
2
+ import gradio as gr
3
+ import glob
4
+ import pprint as pp
5
+ from sys import excepthook
6
+ from re import T
7
+ from urllib.parse import parse_qs, urlparse
8
+ import clip
9
+ import numpy as np
10
+ import requests
11
+ import torch
12
+ from sklearn.utils.extmath import softmax
13
+ import io
14
+
15
+
16
+ from IPython.display import Image, display
17
+ from PIL import Image, ImageFont
18
+ import os
19
+ import cv2
20
+ import torch
21
+ import glob
22
+
23
+ # Model
24
+
25
+ def predict(text,img):
26
+ model = torch.hub.load('ultralytics/yolov5', 'yolov5s')
27
+ img = cv2.imread(img)
28
+ results = model(img1)
29
+ dirpath = tempfile.mkdtemp()
30
+ results.crop(save_dir=dirpath)
31
+ path= dirpath+'/crops/**/*.jpg'
32
+ txtfiles = []
33
+ for file in glob.glob(path):
34
+ txtfiles.append(file)
35
+
36
+ import ipyplot
37
+ from PIL import Image
38
+ l = []
39
+ #keyList = list(range(len(txtfiles)))
40
+ for filename in glob.glob(path):
41
+ foo = Image.open(filename).convert('RGB')
42
+ #resized_image = foo.resize((250,250))
43
+ l.append(foo)
44
+
45
+ device = "cuda" if torch.cuda.is_available() else "cpu"
46
+ model, preprocess = clip.load("ViT-B/32", device=device)
47
+
48
+ images = torch.stack([preprocess(im) for im in l]).to(device)
49
+ with torch.no_grad():
50
+ image_features = model.encode_image(images)
51
+ image_features /= image_features.norm(dim=-1, keepdim=True)
52
+
53
+ image_features.cpu().numpy()
54
+
55
+ image_mean = torch.tensor([0.48145466, 0.4578275, 0.40821073]).cuda()
56
+ image_std = torch.tensor([0.26862954, 0.26130258, 0.27577711]).cuda()
57
+
58
+ images = [preprocess(im) for im in l]
59
+ image_input = torch.tensor(np.stack(images)).cuda()
60
+ image_input -= image_mean[:, None, None]
61
+ image_input /= image_std[:, None, None]
62
+ with torch.no_grad():
63
+ image_features = model.encode_image(image_input).float()
64
+ image_features /= image_features.norm(dim=-1, keepdim=True)
65
+
66
+ def get_top_N_semantic_similarity(similarity_list,N):
67
+ results = zip(range(len(similarity_list)), similarity_list)
68
+ results = sorted(results, key=lambda x: x[1],reverse= True)
69
+ top_N_images = []
70
+ scores=[]
71
+ for index,score in results[:N]:
72
+ scores.append(score)
73
+ top_N_images.append(l[index])
74
+ return scores,top_N_images
75
+
76
+ search_query = "White car"
77
+
78
+ with torch.no_grad():
79
+ # Encode and normalize the description using CLIP
80
+ text_encoded = model.encode_text(clip.tokenize(search_query).to(device))
81
+ text_encoded /= text_encoded.norm(dim=-1, keepdim=True)
82
+
83
+ similarity = text_encoded.cpu().numpy() @ image_features.cpu().numpy().T
84
+ similarity = similarity[0]
85
+ scores,imgs= get_top_N_semantic_similarity(similarity,N=1)
86
+ #print ("scores ",scores)
87
+ #ipyplot.plot_images(imgs,img_width=350)
88
+ return imgs
89
+
90
+ text = gr.inputs.Textbox(lines=5, label="Context")
91
+ img = gr.inputs.Image()
92
+
93
+
94
+ gr_interface = gr.Interface(fn=predict, [img, text], outputs="image", title='Search inside image').launch();