Spaces:
Sleeping
Sleeping
File size: 2,666 Bytes
fc1945b 1e7514c fc1945b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
import streamlit as st
import pandas as pd
from safetensors import safe_open
from sentence_transformers import SentenceTransformer, CrossEncoder, util
import pickle
st.title('Search offers in Fetch app')
st.markdown("""Fetch Rewards is a mobile app where you can earn free gift cards by scanning and uploading your shopping receipts.
You accumulate points for eligible receipts, which can be redeemed for various gift cards. It's a way to get rewards for your
everyday shopping.""")
st.markdown("""
If you type in a category (ex.diapers), this search engine will return
a list of offers relevant to this category. You can also search using
brand name (ex. Huggies) or a retailer name (ex.Target). This tool will
return relevant offers related to that category, brand or retailer along
with the similarity score representing how similar the result offer is to
your search query.""")
bi_encoder = SentenceTransformer('multi-qa-MiniLM-L6-cos-v1')
cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
tensors = {}
with safe_open("embeddings.safetensors", framework="pt") as f :
for k in f.keys():
tensors[k] = f.get_tensor(k)
corpus_embeddings = tensors['embedding']
with open('corpus.pickle', 'rb') as f:
passages = pickle.load(f)
def search(query, top_k):
query_embedding = bi_encoder.encode(query, convert_to_tensor=True)
query_embedding = query_embedding #.cuda()
hits = util.semantic_search(query_embedding, corpus_embeddings, top_k=top_k)
hits = hits[0]
cross_inp = [[query, passages[hit['corpus_id']]] for hit in hits]
cross_scores = cross_encoder.predict(cross_inp)
# Sort results by the cross-encoder scores
for idx in range(len(cross_scores)):
hits[idx]['cross-score'] = cross_scores[idx]
hits = sorted(hits, key=lambda x: x['cross-score'], reverse=True)
score_list, output_list = [],[]
for hit in hits[:10]:
score_list.append("{:.3f}".format(hit['cross-score']))
temp_output = passages[hit['corpus_id']].replace("\n", " ")
temp_output = list(temp_output.rsplit('{'))[0].strip()
output_list.append(temp_output)
dataframe = pd.DataFrame({'score': score_list, 'offers': output_list})
dataframe.drop_duplicates(subset=['offers'], keep='first', inplace=True)
return dataframe
with st.form("my_form"):
query = st.text_input("Enter the brand name, category or retailer name to search \
for relevant offers 👇",
placeholder = "Enter the text here")
num = st.number_input('Manximum number of offers to display', min_value=1, max_value=10)
submitted = st.form_submit_button("Submit")
if submitted:
df = search(query, num)
st.dataframe(df, use_container_width=True) |