stogaja commited on
Commit
280afe4
1 Parent(s): e39a09a

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -70
app.py CHANGED
@@ -1,68 +1,49 @@
1
- # let's import the libraries
2
- from sentence_transformers import util
3
- from sentence_transformers import CrossEncoder
4
- from sentence_transformers import SentenceTransformer
5
- import time
6
  import sys
7
- import os
8
- import torch
9
- import en_core_web_sm
10
- from email import header
11
- import streamlit as st
12
  import pandas as pd
13
  import numpy as np
14
- import pickle
15
- import spacy
16
- from sklearn.metrics.pairwise import cosine_similarity
17
- from datasets import load_dataset
18
- import io
19
- import netrc
20
- from tqdm import tqdm
21
- tqdm.pandas()
22
-
23
- # Load the English STSB dataset
24
- stsb_dataset = load_dataset('stsb_multi_mt', 'en')
25
- stsb_train = pd.DataFrame(stsb_dataset['train'])
26
- stsb_test = pd.DataFrame(stsb_dataset['test'])
27
-
28
- # let's create helper functions
29
- nlp = spacy.load("en_core_web_sm")
30
-
31
-
32
- def text_processing(sentence):
33
- sentence = [token.lemma_.lower()
34
- for token in nlp(sentence)
35
- if token.is_alpha and not token.is_stop]
36
- return sentence
37
-
38
-
39
- def cos_sim(sentence1_emb, sentence2_emb):
40
- cos_sim = cosine_similarity(sentence1_emb, sentence2_emb)
41
- return np.diag(cos_sim)
42
-
43
-
44
- # let's read the csv file
45
- data = (pd.read_csv("SBERT_data.csv")).drop(['Unnamed: 0'], axis=1)
46
-
47
- prompt = "charles"
48
- data['prompt'] = prompt
49
- data.rename(columns={'target_text': 'sentence2',
50
- 'prompt': 'sentence1'}, inplace=True)
51
- data['sentence2'] = data['sentence2'].astype('str')
52
- data['sentence1'] = data['sentence1'].astype('str')
53
-
54
- XpathFinder = CrossEncoder("cross-encoder/stsb-roberta-base")
55
- sentence_pairs = []
56
- for sentence1, sentence2 in zip(data['sentence1'], data['sentence2']):
57
- sentence_pairs.append([sentence1, sentence2])
58
-
59
- data['SBERT CrossEncoder_Score'] = XpathFinder.predict(
60
- sentence_pairs, show_progress_bar=True)
61
-
62
- # sorting the values
63
- data.sort_values(by=['SBERT CrossEncoder_Score'], ascending=False)
64
 
65
- loaded_model = XpathFinder
66
 
67
  # Containers
68
  header_container = st.container()
@@ -77,13 +58,11 @@ with header_container:
77
 
78
  # model container
79
  with mod_container:
80
-
81
  # collecting input from user
82
  prompt = st.text_input("Enter your description below ...")
83
 
84
  # Loading e data
85
- data = (pd.read_csv("SBERT_data.csv")
86
- ).drop(['Unnamed: 0'], axis=1)
87
 
88
  data['prompt'] = prompt
89
  data.rename(columns={'target_text': 'sentence2',
@@ -94,16 +73,15 @@ with mod_container:
94
  # let's pass the input to the loaded_model with torch compiled with cuda
95
  if prompt:
96
  # let's get the result
97
- simscore = loaded_model.predict([prompt])
98
-
99
- from sentence_transformers import CrossEncoder
100
- loaded_model = CrossEncoder("cross-encoder/stsb-roberta-base")
101
  sentence_pairs = []
102
  for sentence1, sentence2 in zip(data['sentence1'], data['sentence2']):
103
  sentence_pairs.append([sentence1, sentence2])
 
104
 
105
- # sorting the df to get highest scoring xpath_container
106
- data['SBERT CrossEncoder_Score'] = loaded_model.predict(sentence_pairs)
107
  most_acc = data.head(5)
108
  # predictions
109
  st.write("Highest Similarity score: ", simscore)
 
1
+ import io
2
+ import netrc
3
+ import pickle
 
 
4
  import sys
 
 
 
 
 
5
  import pandas as pd
6
  import numpy as np
7
+ import streamlit as st
8
+ # from sentence_transformers import SentenceTransformer
9
+ # import sentence_transformers
10
+ # import torch
11
+ #######################################
12
+
13
+ st.markdown(
14
+ f"""
15
+ <style>
16
+ .reportview-container .main .block-container{{
17
+ max-width: 90%;
18
+ padding-top: 5rem;
19
+ padding-right: 5rem;
20
+ padding-left: 5rem;
21
+ padding-bottom: 5rem;
22
+ }}
23
+ img{{
24
+ max-width:40%;
25
+ margin-bottom:40px;
26
+ }}
27
+ </style>
28
+ """,
29
+ unsafe_allow_html=True,
30
+ )
31
+
32
+ # # let's load the saved model
33
+ # loaded_model = pickle.load(open('XpathFinder1.sav', 'rb'))
34
+ # loaded_model = pickle.load('XpathFinder1.sav', map_location='cpu')
35
+
36
+
37
+ # class CPU_Unpickler(pickle.Unpickler):
38
+ # def find_class(self, module, name):
39
+ # if module == 'torch.storage' and name == '_load_from_bytes':
40
+ # return lambda b: torch.load(io.BytesIO(b), map_location='cpu')
41
+ # else:
42
+ # return super().find_class(module, name)
43
+ #
44
+
45
+ #loaded_model = CPU_Unpickler(open('XpathFinder1.sav', 'rb')).load()
 
 
 
 
 
 
 
 
 
 
 
46
 
 
47
 
48
  # Containers
49
  header_container = st.container()
 
58
 
59
  # model container
60
  with mod_container:
 
61
  # collecting input from user
62
  prompt = st.text_input("Enter your description below ...")
63
 
64
  # Loading e data
65
+ data = (pd.read_csv("SBERT_data.csv")).drop(['Unnamed: 0'], axis=1)
 
66
 
67
  data['prompt'] = prompt
68
  data.rename(columns={'target_text': 'sentence2',
 
73
  # let's pass the input to the loaded_model with torch compiled with cuda
74
  if prompt:
75
  # let's get the result
76
+ from sentence_transformers.cross_encoder import CrossEncoder
77
+ XpathFinder = CrossEncoder("cross-encoder/stsb-roberta-base")
 
 
78
  sentence_pairs = []
79
  for sentence1, sentence2 in zip(data['sentence1'], data['sentence2']):
80
  sentence_pairs.append([sentence1, sentence2])
81
+ simscore = XpathFinder.predict([prompt])
82
 
83
+ # sorting the df to get highest scoring xpath_container
84
+ data['SBERT CrossEncoder_Score'] = XpathFinder.predict(sentence_pairs)
85
  most_acc = data.head(5)
86
  # predictions
87
  st.write("Highest Similarity score: ", simscore)