# Import Libraries
from pathlib import Path
import pandas as pd
import numpy as np
import torch
import pickle
from PIL import Image
from io import BytesIO
import requests
import gradio as gr
import os
import sentence_transformers
from sentence_transformers import SentenceTransformer, util

# check if CUDA available 
device = "cuda" if torch.cuda.is_available() else "cpu"

IMAGES_DIR = Path("photos/")
   
#Load CLIP model
model = SentenceTransformer('clip-ViT-B-32') 

# pre-computed embeddings
emb_filename = 'unsplash-25k-photos-embeddings.pkl'
with open(emb_filename, 'rb') as emb:
        img_names, img_emb = pickle.load(emb)
 
def display_matches(similarity, topk):
    best_matched_images = []
    top_k_indices = torch.topk(similarity, topk, 0).indices
    for matched_image in top_k_indices:
        img = Image.open(IMAGES_DIR / img_names[matched_image])
        best_matched_images.append(img)
    return best_matched_images
  
def image_search(Option, topk, search_text, search_image):
  topk = topk+1
  # Input Text Query
  if Option == "Text-To-Image" :
    # Encode the given Input  text for Search & take it in tensor form
     text_emb = model.encode([search_text], convert_to_tensor=True)
     # Compute cosine similarities between encoded input text (in tensor) & encoded images from unsplash dataset
     similarity = util.cos_sim(img_emb, text_emb)
     
     #using the computed similarities, find the topk best matches
     return display_matches(similarity, topk)
  elif Option == "Image-To-Image":
      # Encode the given Input Image for Search & take it in tensor form
      image_emb = model.encode([Image.fromarray(search_image)], convert_to_tensor=True)
      # Compute cosine similarities between encoded input image (in tensor) & encoded images from unsplash dataset
      similarity = util.cos_sim(img_emb, image_emb)
      
      #using the computed similarities, find the topk best matches
      return display_matches(similarity, topk)
  
gr.Interface(fn=image_search, title="Search Image",
             description="Enter the text or image to search for the most relevant images...",
             article=""" 
                     Instructions:- 
                      1. Select the option - `Text to Image`  OR `Image To Image`.
                      2. Select the no. of most relevant images you want to see. 
                      3. Then accordingly enter the text or image.
                      4. Then you will get the images on right. To enter another text/image first clear it then follow steps 1-3.
                     """,
             theme="huggingface",
            inputs=[gr.inputs.Dropdown(["Text-To-Image", "Image-To-Image"]),
                    gr.inputs.Dropdown(["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"], type="index", default="1", label="Select Top K Images"),
                    gr.inputs.Textbox(lines=3, label="Input Text", placeholder="Enter the text..."),
                    gr.inputs.Image(optional=True)
                    ], 
            outputs=gr.outputs.Carousel([gr.outputs.Image(type="pil")]),
            enable_queue=True
             ).launch(debug=True,share=True)