import gradio as gr import numpy as np from sklearn.metrics.pairwise import cosine_similarity import pandas as pd import cohere,datasets,os API_KEY = os.environ.get('API_KEY') co = cohere.Client(API_KEY) df = datasets.load_dataset('Binaryy/cream_listings', split='train').to_pandas() def get_embeddings(texts,model='embed-english-v2.0'): output = co.embed( model=model, texts=texts) return output.embeddings def add_column_name_to_values(df): new_df = df.copy() # Create a copy of the original DataFrame to avoid modifying it directly for column_name in df.columns: new_df[column_name] = df[column_name].apply(lambda x: f"{column_name} :{x} ") return new_df housing_new = add_column_name_to_values(df) housing_mashup = [' '.join(housing_new.iloc[i].values) for i in range(df.shape[0])] df['query_embeds'] = get_embeddings(housing_mashup) def get_similarity(target,candidates): # Turn list into array candidates = np.array(candidates) target = np.expand_dims(np.array(target),axis=0) # Calculate cosine similarity sim = cosine_similarity(target,candidates) sim = np.squeeze(sim).tolist() sort_index = np.argsort(sim)[::-1] sort_score = [sim[i] for i in sort_index] similarity_scores = zip(sort_index,sort_score) # Return similarity scores return similarity_scores def search(new_query): # Get embeddings of the new query new_query_embeds = get_embeddings([new_query])[0] top_2_recommendations = list(get_similarity(new_query_embeds, df.query_embeds.tolist()))[:2] returned_listings = [ df.iloc[i[0]].to_json() for i in top_2_recommendations ] return returned_listings iface = gr.Interface( fn=search, inputs="text", outputs="json", title="CREAM Semantic Search Engine.", description="Enter your query to perform a semantic search on Cream listings.", # examples = ['Recommend a duplex in Lagos with 5 bedrooms and a balcony', # 'Find me a Mecedes Benz 2016 model with a sunroof', # "I'm Looking for a 4 bedroom apartment in Lekki"] ) # Launch the Gradio interface iface.launch(share = False)