Spaces:
Sleeping
Sleeping
# app.py | |
import streamlit as st | |
import pandas as pd | |
from sentence_transformers import SentenceTransformer, util | |
import torch | |
st.title('Semantic Search Tool Using SBERT') | |
st.markdown( | |
""" | |
Fetch provides value to our user base through the rich variety of offers | |
that are active in the app. We want our users to be able to easily seek | |
out offers in the app, so that they get the most out of using the app | |
and our partners get the most out of their relationship with Fetch. | |
Semantic Search model was utilized to search for relevant offers based | |
on user-provided keywords, such as categories, brands, or retailers. | |
""" | |
) | |
embedder = SentenceTransformer('msmarco-distilbert-base-v4') | |
embedder.max_seq_length = 256 | |
df = pd.read_csv('data.csv', index_col=0).fillna("") | |
corpus = df['corpus'] | |
corpus_embeddings = embedder.encode(corpus, convert_to_tensor=True, show_progress_bar=True) | |
def search(query, top_k): | |
"""Search the closest k results to input query | |
Args: | |
query: A string value | |
top_k: A integer between 1 and corpus length | |
Returns: | |
top_results: a tuple contains the scores and indices of the top k results. | |
""" | |
query_embedding = embedder.encode(query, convert_to_tensor=True) | |
cos_scores = util.cos_sim(query_embedding, corpus_embeddings)[0] | |
top_results = torch.topk(cos_scores, k=top_k) | |
return top_results | |
with st.form("my_form"): | |
query = st.text_input("Enter keywords to search for relevant offers, e.g. 'Walmart', 'ice cream', etc. ", placeholder = "Search:") | |
k = st.number_input('Top K relevent offers', min_value=3, max_value=10) | |
submitted = st.form_submit_button("Submit") | |
if submitted: | |
top_results = search(query, k) | |
for score, idx in zip(top_results[0], top_results[1]): | |
st.write(df.iloc[idx.item()]['offer'], "(Score: {:.4f})".format(score)) | |
st.write("\nFinished searching.") |