import pandas as pd import tiktoken import os import openai from openai.embeddings_utils import get_embedding, cosine_similarity import numpy as np import streamlit as st input_datapath = "fine_food_reviews_with_embeddings_1k.csv" df = pd.read_csv(input_datapath, index_col=0) #os.environ["OPENAI_API_KEY"] = st.secrets("OPENAI_API_KEY") #openai.api_key = st.secrets("OPENAI_API_KEY") st.title("Semantic Search") #adding another column having the summary as title and the actual text as content df["combined"] = ( "Title: " + df.Summary.str.strip() + "; Content: " + df.Text.str.strip() ) # embedding model parameters embedding_model = "text-embedding-ada-002" embedding_encoding = "cl100k_base" # this the encoding for text-embedding-ada-002 max_tokens = 8000 # the maximum for text-embedding-ada-002 is 8191 encoding = tiktoken.get_encoding(embedding_encoding) top_n = 500 # omit reviews that are too long to embed df["n_tokens"] = df.combined.apply(lambda x: len(encoding.encode(x))) df = df[df.n_tokens <= max_tokens].tail(top_n) datafile_path = "fine_food_reviews_with_embeddings_1k.csv" df = pd.read_csv(datafile_path) df["embedding"] = df.embedding.apply(eval).apply(np.array) # search through the reviews for a specific product def search_reviews(df, product_description, n=3, pprint=True): product_embedding = get_embedding( product_description, engine="text-embedding-ada-002" ) df["similarity"] = df.embedding.apply(lambda x: cosine_similarity(x, product_embedding)) results = ( df.sort_values("similarity", ascending=False) .head(n) .combined.str.replace("Title: ", "") .str.replace("; Content:", ": ") ) product = ( df.sort_values("similarity", ascending=False) .head(n) .ProductId ) if pprint: for r in range(n): idx = results.index[r] print("Product : ",product[idx]) print(results[idx]) print() return results,product prompt = st.text_input("What do you want to search for? : ","pizza") top_n = st.number_input("How many results do you want to see? : ", min_value = 1) results,product = search_reviews(df, prompt, top_n) if st.button("Search Reviews"): st.write(product,results)