stogaja commited on
Commit
6b2f9a9
1 Parent(s): 050cd2b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -23
app.py CHANGED
@@ -1,4 +1,13 @@
1
  # let's import the libraries
 
 
 
 
 
 
 
 
 
2
  from email import header
3
  import streamlit as st
4
  import pandas as pd
@@ -11,14 +20,6 @@ import io
11
  import netrc
12
  from tqdm import tqdm
13
  tqdm.pandas()
14
- import torch
15
- import os
16
- import sys
17
- import time
18
- import sentence_transformers
19
- from sentence_transformers import SentenceTransformer
20
- from sentence_transformers import CrossEncoder
21
- from sentence_transformers import util
22
 
23
  # let's load the english stsb dataset
24
  stsb_dataset = load_dataset('stsb_multi_mt', 'en')
@@ -26,7 +27,10 @@ stsb_train = pd.DataFrame(stsb_dataset['train'])
26
  stsb_test = pd.DataFrame(stsb_dataset['test'])
27
 
28
  # let's create helper functions
29
- nlp = spacy.load("en_core_web_sm")
 
 
 
30
 
31
  def text_processing(sentence):
32
  sentence = [token.lemma_.lower()
@@ -34,10 +38,12 @@ def text_processing(sentence):
34
  if token.is_alpha and not token.is_stop]
35
  return sentence
36
 
 
37
  def cos_sim(sentence1_emb, sentence2_emb):
38
  cos_sim = cosine_similarity(sentence1_emb, sentence2_emb)
39
  return np.diag(cos_sim)
40
 
 
41
  # let's read the csv file
42
  data = (pd.read_csv("SBERT_data.csv")).drop(['Unnamed: 0'], axis=1)
43
 
@@ -52,10 +58,10 @@ data['sentence1'] = data['sentence1'].astype('str')
52
  XpathFinder = CrossEncoder("cross-encoder/stsb-roberta-base")
53
  sentence_pairs = []
54
  for sentence1, sentence2 in zip(data['sentence1'], data['sentence2']):
55
- sentence_pairs.append([sentence1, sentence2])
56
 
57
  data['SBERT CrossEncoder_Score'] = XpathFinder.predict(
58
- sentence_pairs, show_progress_bar=True)
59
 
60
  loaded_model = XpathFinder
61
 
@@ -65,18 +71,19 @@ mod_container = st.container()
65
 
66
  # let's create the header
67
  with header_container:
68
- st.title("SBERT CrossEncoder")
69
- st.markdown("This is a demo of the SBERT CrossEncoder model")
70
 
71
  # let's create the model container
72
  with mod_container:
73
- # let's get input from the user
74
- prompt = st.text_input("Enter a description below...")
75
-
76
- if prompt:
77
- simscore = loaded_model.predict([prompt])
78
- # sort the values
79
- data['SBERT CrossEncoder_Score'] = simscore
80
- most_acc = data.head(5)
81
- st.write(most_acc)
82
- st.write("The most accurate sentence is: ", most_acc['sentence2'].iloc[0])
 
 
1
  # let's import the libraries
2
+ from sentence_transformers import util
3
+ from sentence_transformers import CrossEncoder
4
+ from sentence_transformers import SentenceTransformer
5
+ import sentence_transformers
6
+ import time
7
+ import sys
8
+ import os
9
+ import torch
10
+ import en_core_web_sm
11
  from email import header
12
  import streamlit as st
13
  import pandas as pd
 
20
  import netrc
21
  from tqdm import tqdm
22
  tqdm.pandas()
 
 
 
 
 
 
 
 
23
 
24
  # let's load the english stsb dataset
25
  stsb_dataset = load_dataset('stsb_multi_mt', 'en')
 
27
  stsb_test = pd.DataFrame(stsb_dataset['test'])
28
 
29
  # let's create helper functions
30
+ nlp = en_core_web_sm.load()
31
+
32
+ #nlp = spacy.load("en_core_web_sm")
33
+
34
 
35
  def text_processing(sentence):
36
  sentence = [token.lemma_.lower()
 
38
  if token.is_alpha and not token.is_stop]
39
  return sentence
40
 
41
+
42
  def cos_sim(sentence1_emb, sentence2_emb):
43
  cos_sim = cosine_similarity(sentence1_emb, sentence2_emb)
44
  return np.diag(cos_sim)
45
 
46
+
47
  # let's read the csv file
48
  data = (pd.read_csv("SBERT_data.csv")).drop(['Unnamed: 0'], axis=1)
49
 
 
58
  XpathFinder = CrossEncoder("cross-encoder/stsb-roberta-base")
59
  sentence_pairs = []
60
  for sentence1, sentence2 in zip(data['sentence1'], data['sentence2']):
61
+ sentence_pairs.append([sentence1, sentence2])
62
 
63
  data['SBERT CrossEncoder_Score'] = XpathFinder.predict(
64
+ sentence_pairs, show_progress_bar=True)
65
 
66
  loaded_model = XpathFinder
67
 
 
71
 
72
  # let's create the header
73
  with header_container:
74
+ st.title("SBERT CrossEncoder")
75
+ st.markdown("This is a demo of the SBERT CrossEncoder model")
76
 
77
  # let's create the model container
78
  with mod_container:
79
+ # let's get input from the user
80
+ prompt = st.text_input("Enter a description below...")
81
+
82
+ if prompt:
83
+ simscore = loaded_model.predict([prompt])
84
+ # sort the values
85
+ data['SBERT CrossEncoder_Score'] = simscore
86
+ most_acc = data.head(5)
87
+ st.write(most_acc)
88
+ st.write("The most accurate sentence is: ",
89
+ most_acc['sentence2'].iloc[0])