File size: 1,645 Bytes
71701b9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# Initialize a retriever using Qdrant and SentenceTransformer embeddings
from langchain.vectorstores import Qdrant
from langchain.retrievers import EnsembleRetriever
from langchain.embeddings import SentenceTransformerEmbeddings
from qdrant_client import QdrantClient
import pandas as pd
import gradio as gr


embeddings = SentenceTransformerEmbeddings(model_name='sentence-transformers/clip-ViT-B-32')

def get_results(search_results):
    filtered_img_ids = [doc.metadata.get("image_id") for doc in search_results]
    return filtered_img_ids

vector_db_key = user_secrets.get_secret("vector_db_key")

client = QdrantClient(
    url="https://763bc1da-0673-4535-91ac-b5538ec0287f.us-east4-0.gcp.cloud.qdrant.io:6333", 
    api_key='UOqiBgqhhu8BBWP98mwjGl7h4IhL2vMAqzO4EI9PEB66A50n9GoIiQ',
) # Persists changes to disk, fast prototyping

COLLECTION_NAME="semantic_image_search"


dense_vector_retriever = Qdrant(client, COLLECTION_NAME, embeddings)
images_data = pd.read_csv("/kaggle/input/fashion-product-images-dataset/fashion-dataset/images.csv", on_bad_lines='skip')

def get_link(query):
    Search_Query = query
    neutral_retiever = EnsembleRetriever(retrievers=[dense_vector_retriever.as_retriever()])
    result = neutral_retiever.get_relevant_documents(Search_Query)
    filtered_images = get_results(result)
    filtered_img_ids = [doc.metadata.get("image_id") for doc in result]
    
    links = [images_data.loc[id, 'link'] for id in filtered_img_ids]
#     final = '[' + ','.join(links) + ']'
    return links

# print(get_link("black shirt for men"))
gr.Interface(fn = get_link, inputs = 'textbox', outputs = 'textbox').launch()