Spaces:

marcelcastrobr
/

CLIP-image-search

Runtime error

File size: 2,771 Bytes

8cf7f66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8e0a0ad
 
 
8cf7f66
 
 
38e3100
 
8cf7f66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8e0a0ad
8cf7f66
 
8e0a0ad
8cf7f66
 
38e3100
8cf7f66
 
d98a4ef
 
 
8cf7f66
8e0a0ad
d98a4ef
 
8e0a0ad
 
 
 
8cf7f66
 
 
 
f0a42ef
fef47b0
8cf7f66

import gradio as gr
from transformers import CLIPProcessor, CLIPModel, CLIPTokenizer
import sentence_transformers
from sentence_transformers import SentenceTransformer, util
import pickle
from PIL import Image
import os


## Define model
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32")

#Open the precomputed embeddings
emb_filename = 'unsplash-25k-photos-embeddings.pkl'
with open(emb_filename, 'rb') as fIn:
        img_names, img_emb = pickle.load(fIn)
        #print(f'img_emb: {print(img_emb)}')
        #print(f'img_names: {print(img_names)}')  


def search_text(query, top_k=1):
    """" Search an image based on the text query.
    
    Args:
        query ([string]): [query you want search for]
        top_k (int, optional): [Amount of images o return]. Defaults to 1.

    Returns:
        [list]: [list of images that are related to the query.]
    """
    # First, we encode the query.
    inputs = tokenizer([query],  padding=True, return_tensors="pt")
    query_emb = model.get_text_features(**inputs)
    
    # Then, we use the util.semantic_search function, which computes the cosine-similarity
    # between the query embedding and all image embeddings.
    # It then returns the top_k highest ranked images, which we output
    hits = util.semantic_search(query_emb, img_emb, top_k=top_k)[0]
    
    image=[]
    for hit in hits:
        #print(img_names[hit['corpus_id']])
        object = Image.open(os.path.join("photos/", img_names[hit['corpus_id']]))
        image.append(object)
        #print(f'array length is: {len(image)}')

    return image
    

iface = gr.Interface(
    title = "Text to Image using CLIP Model 📸",
    description = "Gradio Demo fo CLIP model. \n This demo is based on assessment for the 🤗  Huggingface course 2. \n To use it, simply write which image you are looking for. Read more at the links below.",
    article = "You find more information about this demo on my ✨ github repository [marcelcastrobr](https://github.com/marcelcastrobr/huggingface_course2)",
    fn=search_text, 
    inputs=[gr.Textbox(lines=4,
        label="Write what you are looking for in an image...",
        placeholder="Text Here..."),
        gr.Slider(0, 5, step=1)],
    outputs=[gr.Gallery(
            label="Generated images", show_label=False, elem_id="gallery"
        ).style(grid=[2], height="auto")]
    ,examples=[[("Dog in the beach"), 2], 
        [("Paris during night."), 1], 
        [("A cute kangaroo"), 5],
        [("Dois cachorros"), 2],
        [("un homme marchant sur le parc"), 3],
        [("et høyt fjell"), 2]]
    ).launch(debug=True)