HamidBekam commited on
Commit
e328815
1 Parent(s): dfe695e

Create app_2.py

Browse files
Files changed (1) hide show
  1. app_2.py +66 -0
app_2.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # streamlit_app.py
2
+ import streamlit as st
3
+ import pandas as pd
4
+ import torch
5
+ from sentence_transformers import SentenceTransformer, util
6
+ import pickle
7
+
8
+ # Load the first set of sentences & embeddings from disk
9
+ with open('clinical_inno_embeddings_masterid_paraphrase-multilingual-mpnet-base-v2.pkl', "rb") as fIn:
10
+ stored_data_1 = pickle.load(fIn)
11
+ stored_masterid_1 = stored_data_1['pro_master_id']
12
+ stored_products_1 = stored_data_1['products']
13
+ stored_embeddings_1 = stored_data_1['embeddings']
14
+
15
+ # Load the second set of sentences & embeddings from disk
16
+ # Replace 'other_embeddings.pkl' with your actual second embeddings file
17
+ with open('mean_clinical_inno_embeddings_masterid_paraphrase-multilingual-mpnet-base-v2.pkl', "rb") as fIn:
18
+ stored_data_2 = pickle.load(fIn)
19
+ stored_masterid_2 = stored_data_2['pro_master_id']
20
+ stored_products_2 = stored_data_2['mean_products']
21
+ stored_embeddings_2 = stored_data_2['mean_embeddings']
22
+
23
+ # Initialize the SentenceTransformer model
24
+ embedder = SentenceTransformer('sentence-transformers/paraphrase-multilingual-mpnet-base-v2')
25
+
26
+ def get_similar_products(query, products, embeddings, top_k=10):
27
+ query_embedding = embedder.encode(query, convert_to_tensor=True)
28
+ cos_scores = util.cos_sim(query_embedding, embeddings)[0]
29
+ top_results = torch.topk(cos_scores, k=top_k)
30
+
31
+ similar_products = [(products[idx.item()], score.item()) for score, idx in zip(top_results[0], top_results[1])]
32
+ return similar_products
33
+
34
+ # Streamlit UI
35
+ st.title("Product Similarity Finder")
36
+
37
+ # Embedding selection slider
38
+ embedding_option = st.select_slider(
39
+ 'Select Embedding Set',
40
+ options=['Embedding Set 1', 'Embedding Set 2']
41
+ )
42
+
43
+ # Determine which embeddings to use based on the slider selection
44
+ if embedding_option == 'Embedding Set 1':
45
+ stored_products = stored_products_1
46
+ stored_embeddings = stored_embeddings_1
47
+ else:
48
+ stored_products = stored_products_2
49
+ stored_embeddings = stored_embeddings_2
50
+
51
+ # User input
52
+ user_input = st.text_input("Enter a product name or description:")
53
+
54
+ # Search button
55
+ if st.button('Search'):
56
+ if user_input:
57
+ # Get and display similar products
58
+ results = get_similar_products(user_input, stored_products, stored_embeddings)
59
+
60
+ # Convert results to a DataFrame for nicer display
61
+ results_df = pd.DataFrame(results, columns=['Product', 'Score'])
62
+
63
+ # Use Streamlit's dataframe function to display results in a table with default formatting
64
+ st.dataframe(results_df.style.format({'Score': '{:.4f}'}))
65
+ else:
66
+ st.write("Please enter a product name or description to search.")