import gradio as gr import pandas as pd import tiktoken import time from sentence_transformers import SentenceTransformer import os import torch from openai.embeddings_utils import get_embedding, cosine_similarity df = pd.read_pickle('entire_data.pkl') embedder = SentenceTransformer('all-mpnet-base-v2') def search(query): n = 15 query_embedding = embedder.encode(query) df["similarity"] = df.embedding.apply(lambda x: cosine_similarity(x, query_embedding.reshape(768,-1))) results = ( df.sort_values("similarity", ascending=False) .head(n)) resultlist = [] hlist = [] for r in results.index: if results.name[r] not in hlist: smalldf = results.loc[results.name == results.name[r]] smallarr = smalldf.similarity[r].max() sm =smalldf.rating[r].mean() if smalldf.shape[1] > 3: smalldf = smalldf[:3] resultlist.append( { "name":results.name[r], "description":results.description[r], "relevance score": smallarr.tolist(), "rating": sm.tolist(), "relevant_reviews": [ smalldf.text[s] for s in smalldf.index] }) hlist.append(results.name[r]) return resultlist def greet(query): bm25 = search(query) return bm25 iface = gr.Interface(fn=greet, inputs="text", outputs="json") iface.launch()