Kuaaangwen commited on
Commit
5daf8df
1 Parent(s): 16dfa40

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -20
app.py CHANGED
@@ -1,19 +1,22 @@
1
  import streamlit as st
2
 
3
- # Library for Sentence Similarity
4
- import pandas as pd
5
- from sentence_transformers import SentenceTransformer
6
- from sklearn.metrics.pairwise import cosine_similarity
7
 
8
  # Library for Entailment
9
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
10
  import torch
11
 
12
 
 
 
 
13
 
14
  # Load models and tokenisers for both sentence transformers and text classification
15
 
16
- sentence_transformer_model = SentenceTransformer('all-MiniLM-L6-v2')
17
 
18
  tokenizer = AutoTokenizer.from_pretrained("roberta-large-mnli")
19
 
@@ -49,29 +52,29 @@ if sidebar_selectbox == "Compare two sentences":
49
 
50
  print("Comparing sentences...")
51
 
52
- ### Compare Sentence Similarity ###
53
 
54
- # Perform calculations
55
 
56
- #Initialise sentences
57
- sentences = []
58
 
59
- # Append input sentences to 'sentences' list
60
- sentences.append(sentence_1)
61
- sentences.append(sentence_2)
62
 
63
- # Create embeddings for both sentences
64
- sentence_embeddings = sentence_transformer_model.encode(sentences)
65
 
66
- cos_sim = cosine_similarity(sentence_embeddings[0].reshape(1, -1), sentence_embeddings[1].reshape(1, -1))[0][0]
67
- cos_sim = round(cos_sim * 100) # Convert to percentage and round-off
68
 
69
 
70
- # st.write('Similarity between "{}" and "{}" is {}%'.format(sentence_1,
71
- # sentence_2, cos_sim))
72
 
73
- st.subheader("Similarity")
74
- st.write(f"Similarity between the two sentences is {cos_sim}%.")
75
 
76
 
77
  ### Text classification - entailment, neutral or contradiction ###
@@ -100,6 +103,22 @@ if sidebar_selectbox == "Compare two sentences":
100
  st.write(text_classification_model.config.id2label[2], ":", round(outputs[0][2].item()*100,2),"%")
101
 
102
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
 
105
 
 
1
  import streamlit as st
2
 
3
+ # # Library for Sentence Similarity
4
+ # import pandas as pd
5
+ # from sentence_transformers import SentenceTransformer
6
+ # from sklearn.metrics.pairwise import cosine_similarity
7
 
8
  # Library for Entailment
9
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
10
  import torch
11
 
12
 
13
+ # # Library for keyword extraction
14
+ # import yake
15
+
16
 
17
  # Load models and tokenisers for both sentence transformers and text classification
18
 
19
+ # sentence_transformer_model = SentenceTransformer('all-MiniLM-L6-v2')
20
 
21
  tokenizer = AutoTokenizer.from_pretrained("roberta-large-mnli")
22
 
 
52
 
53
  print("Comparing sentences...")
54
 
55
+ # ### Compare Sentence Similarity ###
56
 
57
+ # # Perform calculations
58
 
59
+ # #Initialise sentences
60
+ # sentences = []
61
 
62
+ # # Append input sentences to 'sentences' list
63
+ # sentences.append(sentence_1)
64
+ # sentences.append(sentence_2)
65
 
66
+ # # Create embeddings for both sentences
67
+ # sentence_embeddings = sentence_transformer_model.encode(sentences)
68
 
69
+ # cos_sim = cosine_similarity(sentence_embeddings[0].reshape(1, -1), sentence_embeddings[1].reshape(1, -1))[0][0]
70
+ # cos_sim = round(cos_sim * 100) # Convert to percentage and round-off
71
 
72
 
73
+ # # st.write('Similarity between "{}" and "{}" is {}%'.format(sentence_1,
74
+ # # sentence_2, cos_sim))
75
 
76
+ # st.subheader("Similarity")
77
+ # st.write(f"Similarity between the two sentences is {cos_sim}%.")
78
 
79
 
80
  ### Text classification - entailment, neutral or contradiction ###
 
103
  st.write(text_classification_model.config.id2label[2], ":", round(outputs[0][2].item()*100,2),"%")
104
 
105
 
106
+ ### Extract keywords with YAKE ### (might make more sense with word cloud)
107
+
108
+ st.subheader("Keywords:")
109
+
110
+ kw_extractor = yake.KeywordExtractor(top=10, stopwords=None)
111
+ keywords = kw_extractor.extract_keywords(sentence_2)
112
+
113
+ # keywords_array = []
114
+
115
+ for kw, v in keywords:
116
+ # print("Keyphrase: ", kw, ": score", v)
117
+ # keywords_array.append(kw)
118
+
119
+ st.write(kw)
120
+
121
+
122
 
123
 
124