semantic_search / app.py
hbbhsy's picture
Update app.py
628c49e
# app.py
import streamlit as st
import pandas as pd
from sentence_transformers import SentenceTransformer, util
import torch
st.title('Semantic Search Tool Using SBERT')
st.markdown(
"""
Fetch provides value to our user base through the rich variety of offers
that are active in the app. We want our users to be able to easily seek
out offers in the app, so that they get the most out of using the app
and our partners get the most out of their relationship with Fetch.
Semantic Search model was utilized to search for relevant offers based
on user-provided keywords, such as categories, brands, or retailers.
"""
)
embedder = SentenceTransformer('msmarco-distilbert-base-v4')
embedder.max_seq_length = 256
df = pd.read_csv('data.csv', index_col=0).fillna("")
corpus = df['corpus']
corpus_embeddings = embedder.encode(corpus, convert_to_tensor=True, show_progress_bar=True)
def search(query, top_k):
"""Search the closest k results to input query
Args:
query: A string value
top_k: A integer between 1 and corpus length
Returns:
top_results: a tuple contains the scores and indices of the top k results.
"""
query_embedding = embedder.encode(query, convert_to_tensor=True)
cos_scores = util.cos_sim(query_embedding, corpus_embeddings)[0]
top_results = torch.topk(cos_scores, k=top_k)
return top_results
with st.form("my_form"):
query = st.text_input("Enter keywords to search for relevant offers, e.g. 'Walmart', 'ice cream', etc. ", placeholder = "Search:")
k = st.number_input('Top K relevent offers', min_value=3, max_value=10)
submitted = st.form_submit_button("Submit")
if submitted:
top_results = search(query, k)
for score, idx in zip(top_results[0], top_results[1]):
st.write(df.iloc[idx.item()]['offer'], "(Score: {:.4f})".format(score))
st.write("\nFinished searching.")