RobotJelly commited on
Commit
b4c60c9
1 Parent(s): 81b51c7
Files changed (1) hide show
  1. app.py +8 -76
app.py CHANGED
@@ -9,63 +9,14 @@ from io import BytesIO
9
  import requests
10
  import gradio as gr
11
  import os
12
- #from transformers import CLIPProcessor, CLIPModel, CLIPTokenizer
13
  import sentence_transformers
14
  from sentence_transformers import SentenceTransformer, util
15
 
16
  # check if CUDA available
17
  device = "cuda" if torch.cuda.is_available() else "cpu"
18
-
19
- # Load the openAI's CLIP model
20
- #model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
21
- #processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
22
- #tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32")
23
-
24
- # taking photo IDs
25
- #photo_ids = pd.read_csv("./photo_ids.csv")
26
- #photo_ids = list(photo_ids['photo_id'])
27
-
28
- # Photo dataset
29
- #photos = pd.read_csv("./photos.tsv000", sep="\t", header=0)
30
-
31
- # taking features vectors
32
- #photo_features = np.load("./features.npy")
33
 
34
  IMAGES_DIR = Path("./photos/")
35
- #def show_output_image(matched_images) :
36
- #image=[]
37
- #for photo_id in matched_images:
38
- # photo_image_url = f"https://unsplash.com/photos/{photo_id}/download?w=280"
39
- #response = requests.get(photo_image_url, stream=True)
40
- #img = Image.open(BytesIO(response.content))
41
- # response = requests.get(photo_image_url, stream=True).raw
42
- # img = Image.open(response)
43
- #photo = photo_id + '.jpg'
44
- #img = Image.open(response).convert("RGB")
45
- #img = Image.open(os.path.join(IMAGES_DIR, photo))
46
- #image.append(img)
47
- #return image
48
-
49
 
50
- # Encode and normalize the search query using CLIP
51
- #def encode_search_query(search_query, model, device):
52
- # with torch.no_grad():
53
- # inputs = tokenizer([search_query], padding=True, return_tensors="pt")
54
- #inputs = processor(text=[search_query], images=None, return_tensors="pt", padding=True)
55
- # text_features = model.get_text_features(**inputs).cpu().numpy()
56
- # return text_features
57
-
58
- # Find all matched photos
59
- #def find_matches(features, photo_ids, results_count=4):
60
- # Compute the similarity between the search query and each photo using the Cosine similarity
61
- #text_features = np.array(text_features)
62
- #similarities = (photo_features @ features.T).squeeze(1)
63
- # Sort the photos by their similarity score
64
- #best_photo_idx = (-similarities).argsort()
65
- # Return the photo IDs of the best matches
66
- #matches = [photo_ids[i] for i in best_photo_idx[:results_count]]
67
- #return matches
68
-
69
  #Load CLIP model
70
  model = SentenceTransformer('clip-ViT-B-32')
71
 
@@ -85,40 +36,21 @@ def display_matches(similarity, topk):
85
  def image_search(Option, topk, search_text, search_image):
86
 
87
  # Input Text Query
88
- #search_query = "The feeling when your program finally works"
89
  if Option == "Text-To-Image" :
90
- # Extracting text features embeddings
91
- #text_features = encode_search_query(search_text, model, device)
92
  text_emb = model.encode([search_text], convert_to_tensor=True)
 
93
  similarity = util.cos_sim(img_emb, text_emb)
 
 
94
  return display_matches(similarity, topk)
95
- # Find the matched Images
96
- #matched_images = find_matches(text_features, photo_features, photo_ids, 4)
97
- #matched_results = util.semantic_search(text_emb, img_emb, top_k=4)[0]
98
-
99
- # top 4 highest ranked images
100
- #return display_matches(matched_results)
101
  elif Option == "Image-To-Image":
102
- # Input Image for Search
103
- #search_image = Image.fromarray(search_image.astype('uint8'), 'RGB')
104
-
105
- #with torch.no_grad():
106
- # processed_image = processor(text=None, images=search_image, return_tensors="pt", padding=True)["pixel_values"]
107
- # image_feature = model.get_image_features(processed_image.to(device))
108
- # image_feature /= image_feature.norm(dim=-1, keepdim=True)
109
- #image_feature = image_feature.cpu().numpy()
110
- # Find the matched Images
111
- #matched_images = find_matches(image_feature, photo_ids, 4)
112
-
113
- #image_emb = model.encode(Image.open(search_image), convert_to_tensor=True)
114
- #image_emb = model.encode(Image.open(search_image))
115
- # Find the matched Images
116
- #matched_images = find_matches(text_features, photo_features, photo_ids, 4)
117
- #similarity = util.cos_sim(image_emb, img_emb)
118
- #matched_results = util.semantic_search(image_emb, img_emb, 4)[0]
119
-
120
  image_emb = model.encode([Image.fromarray(search_image)], convert_to_tensor=True)
 
121
  similarity = util.cos_sim(img_emb, image_emb)
 
 
122
  return display_matches(similarity, topk)
123
 
124
  gr.Interface(fn=image_search, title="Search Image",
9
  import requests
10
  import gradio as gr
11
  import os
 
12
  import sentence_transformers
13
  from sentence_transformers import SentenceTransformer, util
14
 
15
  # check if CUDA available
16
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  IMAGES_DIR = Path("./photos/")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  #Load CLIP model
21
  model = SentenceTransformer('clip-ViT-B-32')
22
 
36
  def image_search(Option, topk, search_text, search_image):
37
 
38
  # Input Text Query
 
39
  if Option == "Text-To-Image" :
40
+ # Encode the given Input text for Search & take it in tensor form
 
41
  text_emb = model.encode([search_text], convert_to_tensor=True)
42
+ # Compute cosine similarities between encoded input text (in tensor) & encoded images from unsplash dataset
43
  similarity = util.cos_sim(img_emb, text_emb)
44
+
45
+ #using the computed similarities, find the topk best matches
46
  return display_matches(similarity, topk)
 
 
 
 
 
 
47
  elif Option == "Image-To-Image":
48
+ # Encode the given Input Image for Search & take it in tensor form
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  image_emb = model.encode([Image.fromarray(search_image)], convert_to_tensor=True)
50
+ # Compute cosine similarities between encoded input image (in tensor) & encoded images from unsplash dataset
51
  similarity = util.cos_sim(img_emb, image_emb)
52
+
53
+ #using the computed similarities, find the topk best matches
54
  return display_matches(similarity, topk)
55
 
56
  gr.Interface(fn=image_search, title="Search Image",