File size: 3,158 Bytes
087fe06
 
 
 
 
7c286b0
087fe06
 
 
 
06ffe20
7c286b0
 
b564a70
90348c5
 
b564a70
7717b60
e59258f
7c286b0
 
 
 
 
1fbfc7c
 
7c286b0
fd0cac7
1fbfc7c
fd0cac7
1fbfc7c
 
 
7c286b0
087fe06
fd0cac7
c35bf48
087fe06
fd0cac7
b4c60c9
950373e
b4c60c9
0f017d1
b4c60c9
 
c35bf48
fd0cac7
b4c60c9
f402651
b4c60c9
9a9f244
b4c60c9
 
c35bf48
087fe06
fd0cac7
9d2355f
fd0cac7
 
 
81b51c7
 
9d2355f
fd0cac7
 
 
 
 
569ccef
087fe06
fd0cac7
f889699
a8854b2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# Import Libraries
from pathlib import Path
import pandas as pd
import numpy as np
import torch
import pickle
from PIL import Image
from io import BytesIO
import requests
import gradio as gr
import os
import sentence_transformers
from sentence_transformers import SentenceTransformer, util

# check if CUDA available 
device = "cuda" if torch.cuda.is_available() else "cpu"

IMAGES_DIR = Path("photos/")
   
#Load CLIP model
model = SentenceTransformer('clip-ViT-B-32') 

# pre-computed embeddings
emb_filename = 'unsplash-25k-photos-embeddings.pkl'
with open(emb_filename, 'rb') as emb:
        img_names, img_emb = pickle.load(emb)
 
def display_matches(similarity, topk):
    best_matched_images = []
    top_k_indices = torch.topk(similarity, topk, 0).indices
    for matched_image in top_k_indices:
        img = Image.open(IMAGES_DIR / img_names[matched_image])
        best_matched_images.append(img)
    return best_matched_images
  
def image_search(Option, topk, search_text, search_image):
  topk = topk+1
  # Input Text Query
  if Option == "Text-To-Image" :
    # Encode the given Input  text for Search & take it in tensor form
     text_emb = model.encode([search_text], convert_to_tensor=True)
     # Compute cosine similarities between encoded input text (in tensor) & encoded images from unsplash dataset
     similarity = util.cos_sim(img_emb, text_emb)
     
     #using the computed similarities, find the topk best matches
     return display_matches(similarity, topk)
  elif Option == "Image-To-Image":
      # Encode the given Input Image for Search & take it in tensor form
      image_emb = model.encode([Image.fromarray(search_image)], convert_to_tensor=True)
      # Compute cosine similarities between encoded input image (in tensor) & encoded images from unsplash dataset
      similarity = util.cos_sim(img_emb, image_emb)
      
      #using the computed similarities, find the topk best matches
      return display_matches(similarity, topk)
  
gr.Interface(fn=image_search, title="Search Image",
             description="Enter the text or image to search for the most relevant images...",
             article=""" 
                     Instructions:- 
                      1. Select the option - `Text to Image`  OR `Image To Image`.
                      2. Select the no. of most relevant images you want to see. 
                      3. Then accordingly enter the text or image.
                      4. Then you will get the images on right. To enter another text/image first clear it then follow steps 1-3.
                     """,
             theme="huggingface",
            inputs=[gr.inputs.Dropdown(["Text-To-Image", "Image-To-Image"]),
                    gr.inputs.Dropdown(["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"], type="index", default="1", label="Select Top K Images"),
                    gr.inputs.Textbox(lines=3, label="Input Text", placeholder="Enter the text..."),
                    gr.inputs.Image(optional=True)
                    ], 
            outputs=gr.outputs.Carousel([gr.outputs.Image(type="pil")]),
            enable_queue=True
             ).launch(debug=True,share=True)