File size: 1,941 Bytes
628c49e
 
763c630
 
 
5b07345
763c630
 
 
 
 
 
 
628c49e
 
 
763c630
 
 
 
 
46b9843
 
763c630
 
 
628c49e
 
 
 
 
 
 
 
763c630
 
 
aea2c85
763c630
 
 
aea2c85
 
 
 
46b9843
c2a86ea
c08515c
628c49e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# app.py

import streamlit as st
import pandas as pd
from sentence_transformers import SentenceTransformer, util
import torch

st.title('Semantic Search Tool Using SBERT')
st.markdown(
    """
	Fetch provides value to our user base through the rich variety of offers
    that are active in the app. We want our users to be able to easily seek 
    out offers in the app, so that they get the most out of using the app 
    and our partners get the most out of their relationship with Fetch. 
    Semantic Search model was utilized to search for relevant offers based 
    on user-provided keywords, such as categories, brands, or retailers.
    """
)

embedder = SentenceTransformer('msmarco-distilbert-base-v4')
embedder.max_seq_length = 256
df = pd.read_csv('data.csv', index_col=0).fillna("")
corpus = df['corpus']
corpus_embeddings = embedder.encode(corpus, convert_to_tensor=True, show_progress_bar=True)

def search(query, top_k):
    """Search the closest k results to input query
    Args:
        query: A string value
        top_k: A integer between 1 and corpus length
        
    Returns:
        top_results: a tuple contains the scores and indices of the top k results.
    """
    query_embedding = embedder.encode(query, convert_to_tensor=True)
    cos_scores = util.cos_sim(query_embedding, corpus_embeddings)[0]
    top_results = torch.topk(cos_scores, k=top_k)
    
    return top_results
	
with st.form("my_form"):
    query = st.text_input("Enter keywords to search for relevant offers, e.g. 'Walmart', 'ice cream', etc. ", placeholder = "Search:")
    k = st.number_input('Top K relevent offers', min_value=3, max_value=10)
    submitted = st.form_submit_button("Submit")
    if submitted:
        top_results = search(query, k)
        for score, idx in zip(top_results[0], top_results[1]):
            st.write(df.iloc[idx.item()]['offer'], "(Score: {:.4f})".format(score))
        st.write("\nFinished searching.")