Spaces:
Sleeping
Sleeping
miesnerjacob
commited on
Commit
β’
620af8b
1
Parent(s):
7b8b2a7
added example default text
Browse files- app.py +7 -5
- emotion_detection.py +34 -3
- keyword_extraction.py +64 -2
- named_entity_recognition.py +27 -0
- part_of_speech_tagging.py +17 -0
- sentiment_analysis.py +34 -3
app.py
CHANGED
@@ -44,6 +44,8 @@ pos_tagger = load_pos_model()
|
|
44 |
emotion_detector = load_emotion_model()
|
45 |
ner = load_ner_model()
|
46 |
|
|
|
|
|
47 |
with st.sidebar:
|
48 |
page = option_menu(menu_title='Menu',
|
49 |
menu_icon="robot",
|
@@ -134,7 +136,7 @@ elif page == "Sentiment Analysis":
|
|
134 |
"""
|
135 |
)
|
136 |
|
137 |
-
text = st.text_area("Paste text here", value=
|
138 |
|
139 |
if st.button('π₯ Run!'):
|
140 |
with st.spinner("Loading..."):
|
@@ -158,7 +160,7 @@ elif page == "Keyword Extraction":
|
|
158 |
"""
|
159 |
)
|
160 |
|
161 |
-
text = st.text_area("Paste text here", value=
|
162 |
|
163 |
max_keywords = st.slider('# of Keywords Max Limit', min_value=1, max_value=10, value=5, step=1)
|
164 |
|
@@ -191,7 +193,7 @@ elif page == "Part of Speech Tagging":
|
|
191 |
"""
|
192 |
)
|
193 |
|
194 |
-
text = st.text_area("Paste text here", value=
|
195 |
|
196 |
if st.button('π₯ Run!'):
|
197 |
with st.spinner("Loading..."):
|
@@ -213,7 +215,7 @@ elif page == "Emotion Detection":
|
|
213 |
"""
|
214 |
)
|
215 |
|
216 |
-
text = st.text_area("Paste text here", value=
|
217 |
|
218 |
if st.button('π₯ Run!'):
|
219 |
with st.spinner("Loading..."):
|
@@ -237,7 +239,7 @@ elif page == "Named Entity Recognition":
|
|
237 |
"""
|
238 |
)
|
239 |
|
240 |
-
text = st.text_area("Paste text here", value=
|
241 |
|
242 |
if st.button('π₯ Run!'):
|
243 |
with st.spinner("Loading..."):
|
|
|
44 |
emotion_detector = load_emotion_model()
|
45 |
ner = load_ner_model()
|
46 |
|
47 |
+
example_text = "This is example text that contains both names of organizations like Hugging Face and cities like New York, all while portraying an upbeat attitude."
|
48 |
+
|
49 |
with st.sidebar:
|
50 |
page = option_menu(menu_title='Menu',
|
51 |
menu_icon="robot",
|
|
|
136 |
"""
|
137 |
)
|
138 |
|
139 |
+
text = st.text_area("Paste text here", value=example_text)
|
140 |
|
141 |
if st.button('π₯ Run!'):
|
142 |
with st.spinner("Loading..."):
|
|
|
160 |
"""
|
161 |
)
|
162 |
|
163 |
+
text = st.text_area("Paste text here", value=example_text)
|
164 |
|
165 |
max_keywords = st.slider('# of Keywords Max Limit', min_value=1, max_value=10, value=5, step=1)
|
166 |
|
|
|
193 |
"""
|
194 |
)
|
195 |
|
196 |
+
text = st.text_area("Paste text here", value=example_text)
|
197 |
|
198 |
if st.button('π₯ Run!'):
|
199 |
with st.spinner("Loading..."):
|
|
|
215 |
"""
|
216 |
)
|
217 |
|
218 |
+
text = st.text_area("Paste text here", value=example_text)
|
219 |
|
220 |
if st.button('π₯ Run!'):
|
221 |
with st.spinner("Loading..."):
|
|
|
239 |
"""
|
240 |
)
|
241 |
|
242 |
+
text = st.text_area("Paste text here", value=example_text)
|
243 |
|
244 |
if st.button('π₯ Run!'):
|
245 |
with st.spinner("Loading..."):
|
emotion_detection.py
CHANGED
@@ -5,6 +5,13 @@ import pandas as pd
|
|
5 |
|
6 |
|
7 |
class EmotionDetection:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
def __init__(self):
|
9 |
hub_location = 'cardiffnlp/twitter-roberta-base-emotion'
|
10 |
self.tokenizer = AutoTokenizer.from_pretrained(hub_location)
|
@@ -12,7 +19,15 @@ class EmotionDetection:
|
|
12 |
self.explainer = SequenceClassificationExplainer(self.model, self.tokenizer)
|
13 |
|
14 |
def justify(self, text):
|
15 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
word_attributions = self.explainer(text)
|
18 |
html = self.explainer.visualize("example.html")
|
@@ -20,7 +35,15 @@ class EmotionDetection:
|
|
20 |
return html
|
21 |
|
22 |
def classify(self, text):
|
23 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
tokens = self.tokenizer.encode_plus(text, add_special_tokens=False, return_tensors='pt')
|
26 |
outputs = self.model(**tokens)
|
@@ -32,7 +55,15 @@ class EmotionDetection:
|
|
32 |
return preds
|
33 |
|
34 |
def run(self, text):
|
35 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
preds = self.classify(text)
|
38 |
html = self.justify(text)
|
|
|
5 |
|
6 |
|
7 |
class EmotionDetection:
|
8 |
+
""" This class is an example
|
9 |
+
|
10 |
+
Attributes:
|
11 |
+
class_attribute (str): (class attribute) The class attribute
|
12 |
+
instance_attribute (str): The instance attribute
|
13 |
+
"""
|
14 |
+
|
15 |
def __init__(self):
|
16 |
hub_location = 'cardiffnlp/twitter-roberta-base-emotion'
|
17 |
self.tokenizer = AutoTokenizer.from_pretrained(hub_location)
|
|
|
19 |
self.explainer = SequenceClassificationExplainer(self.model, self.tokenizer)
|
20 |
|
21 |
def justify(self, text):
|
22 |
+
"""
|
23 |
+
The function to add two Complex Numbers.
|
24 |
+
|
25 |
+
Parameters:
|
26 |
+
num (ComplexNumber): The complex number to be added.
|
27 |
+
|
28 |
+
Returns:
|
29 |
+
ComplexNumber: A complex number which contains the sum.
|
30 |
+
"""
|
31 |
|
32 |
word_attributions = self.explainer(text)
|
33 |
html = self.explainer.visualize("example.html")
|
|
|
35 |
return html
|
36 |
|
37 |
def classify(self, text):
|
38 |
+
"""
|
39 |
+
The function to add two Complex Numbers.
|
40 |
+
|
41 |
+
Parameters:
|
42 |
+
num (ComplexNumber): The complex number to be added.
|
43 |
+
|
44 |
+
Returns:
|
45 |
+
ComplexNumber: A complex number which contains the sum.
|
46 |
+
"""
|
47 |
|
48 |
tokens = self.tokenizer.encode_plus(text, add_special_tokens=False, return_tensors='pt')
|
49 |
outputs = self.model(**tokens)
|
|
|
55 |
return preds
|
56 |
|
57 |
def run(self, text):
|
58 |
+
"""
|
59 |
+
The function to add two Complex Numbers.
|
60 |
+
|
61 |
+
Parameters:
|
62 |
+
num (ComplexNumber): The complex number to be added.
|
63 |
+
|
64 |
+
Returns:
|
65 |
+
ComplexNumber: A complex number which contains the sum.
|
66 |
+
"""
|
67 |
|
68 |
preds = self.classify(text)
|
69 |
html = self.justify(text)
|
keyword_extraction.py
CHANGED
@@ -6,11 +6,28 @@ import en_core_web_sm
|
|
6 |
|
7 |
|
8 |
class KeywordExtractor:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
def __init__(self):
|
10 |
self.nlp = en_core_web_sm.load()
|
11 |
self.nlp.add_pipe("textrank")
|
12 |
|
13 |
def get_keywords(self, text, max_keywords):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
doc = self.nlp(text)
|
15 |
|
16 |
kws = [i.text for i in doc._.phrases[:max_keywords]]
|
@@ -18,6 +35,16 @@ class KeywordExtractor:
|
|
18 |
return kws
|
19 |
|
20 |
def get_keyword_indicies(self, string_list, text):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
out = []
|
22 |
for s in string_list:
|
23 |
indicies = [[m.start(), m.end()] for m in re.finditer(re.escape(s), text)]
|
@@ -26,6 +53,16 @@ class KeywordExtractor:
|
|
26 |
return out
|
27 |
|
28 |
def merge_overlapping_indicies(self, indicies):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
# Sort the array on the basis of start values of intervals.
|
30 |
indicies.sort()
|
31 |
stack = []
|
@@ -41,6 +78,16 @@ class KeywordExtractor:
|
|
41 |
return stack
|
42 |
|
43 |
def merge_until_finished(self, indicies):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
len_indicies = 0
|
45 |
while True:
|
46 |
merged = self.merge_overlapping_indicies(indicies)
|
@@ -51,9 +98,15 @@ class KeywordExtractor:
|
|
51 |
len_indicies = len(merged)
|
52 |
|
53 |
def get_annotation(self, text, indicies, kws):
|
|
|
|
|
|
|
|
|
|
|
54 |
|
55 |
-
|
56 |
-
|
|
|
57 |
|
58 |
arr = list(text)
|
59 |
for idx in sorted(indicies, reverse=True):
|
@@ -71,6 +124,15 @@ class KeywordExtractor:
|
|
71 |
return final_annotation
|
72 |
|
73 |
def generate(self, text, max_keywords):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
|
75 |
kws = self.get_keywords(text, max_keywords)
|
76 |
|
|
|
6 |
|
7 |
|
8 |
class KeywordExtractor:
|
9 |
+
""" This class is an example
|
10 |
+
|
11 |
+
Attributes:
|
12 |
+
class_attribute (str): (class attribute) The class attribute
|
13 |
+
instance_attribute (str): The instance attribute
|
14 |
+
"""
|
15 |
+
|
16 |
def __init__(self):
|
17 |
self.nlp = en_core_web_sm.load()
|
18 |
self.nlp.add_pipe("textrank")
|
19 |
|
20 |
def get_keywords(self, text, max_keywords):
|
21 |
+
"""
|
22 |
+
The function to add two Complex Numbers.
|
23 |
+
|
24 |
+
Parameters:
|
25 |
+
num (ComplexNumber): The complex number to be added.
|
26 |
+
|
27 |
+
Returns:
|
28 |
+
ComplexNumber: A complex number which contains the sum.
|
29 |
+
"""
|
30 |
+
|
31 |
doc = self.nlp(text)
|
32 |
|
33 |
kws = [i.text for i in doc._.phrases[:max_keywords]]
|
|
|
35 |
return kws
|
36 |
|
37 |
def get_keyword_indicies(self, string_list, text):
|
38 |
+
"""
|
39 |
+
The function to add two Complex Numbers.
|
40 |
+
|
41 |
+
Parameters:
|
42 |
+
num (ComplexNumber): The complex number to be added.
|
43 |
+
|
44 |
+
Returns:
|
45 |
+
ComplexNumber: A complex number which contains the sum.
|
46 |
+
"""
|
47 |
+
|
48 |
out = []
|
49 |
for s in string_list:
|
50 |
indicies = [[m.start(), m.end()] for m in re.finditer(re.escape(s), text)]
|
|
|
53 |
return out
|
54 |
|
55 |
def merge_overlapping_indicies(self, indicies):
|
56 |
+
"""
|
57 |
+
The function to add two Complex Numbers.
|
58 |
+
|
59 |
+
Parameters:
|
60 |
+
num (ComplexNumber): The complex number to be added.
|
61 |
+
|
62 |
+
Returns:
|
63 |
+
ComplexNumber: A complex number which contains the sum.
|
64 |
+
"""
|
65 |
+
|
66 |
# Sort the array on the basis of start values of intervals.
|
67 |
indicies.sort()
|
68 |
stack = []
|
|
|
78 |
return stack
|
79 |
|
80 |
def merge_until_finished(self, indicies):
|
81 |
+
"""
|
82 |
+
The function to add two Complex Numbers.
|
83 |
+
|
84 |
+
Parameters:
|
85 |
+
num (ComplexNumber): The complex number to be added.
|
86 |
+
|
87 |
+
Returns:
|
88 |
+
ComplexNumber: A complex number which contains the sum.
|
89 |
+
"""
|
90 |
+
|
91 |
len_indicies = 0
|
92 |
while True:
|
93 |
merged = self.merge_overlapping_indicies(indicies)
|
|
|
98 |
len_indicies = len(merged)
|
99 |
|
100 |
def get_annotation(self, text, indicies, kws):
|
101 |
+
"""
|
102 |
+
The function to add two Complex Numbers.
|
103 |
+
|
104 |
+
Parameters:
|
105 |
+
num (ComplexNumber): The complex number to be added.
|
106 |
|
107 |
+
Returns:
|
108 |
+
ComplexNumber: A complex number which contains the sum.
|
109 |
+
"""
|
110 |
|
111 |
arr = list(text)
|
112 |
for idx in sorted(indicies, reverse=True):
|
|
|
124 |
return final_annotation
|
125 |
|
126 |
def generate(self, text, max_keywords):
|
127 |
+
"""
|
128 |
+
The function to add two Complex Numbers.
|
129 |
+
|
130 |
+
Parameters:
|
131 |
+
num (ComplexNumber): The complex number to be added.
|
132 |
+
|
133 |
+
Returns:
|
134 |
+
ComplexNumber: A complex number which contains the sum.
|
135 |
+
"""
|
136 |
|
137 |
kws = self.get_keywords(text, max_keywords)
|
138 |
|
named_entity_recognition.py
CHANGED
@@ -3,12 +3,29 @@ from transformers import pipeline
|
|
3 |
|
4 |
|
5 |
class NamedEntityRecognition:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
def __init__(self):
|
7 |
tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-large-finetuned-conll03-english")
|
8 |
model = AutoModelForTokenClassification.from_pretrained("xlm-roberta-large-finetuned-conll03-english")
|
9 |
self.nlp = pipeline("ner", model=model, tokenizer=tokenizer, grouped_entities=True)
|
10 |
|
11 |
def get_annotation(self, preds, text):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
splits = [0]
|
13 |
entities = {}
|
14 |
for i in preds:
|
@@ -29,6 +46,16 @@ class NamedEntityRecognition:
|
|
29 |
return final_annotation
|
30 |
|
31 |
def classify(self, text):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
preds = self.nlp(text)
|
33 |
ner_annotation = self.get_annotation(preds, text)
|
34 |
return preds, ner_annotation
|
|
|
3 |
|
4 |
|
5 |
class NamedEntityRecognition:
|
6 |
+
""" This class is an example
|
7 |
+
|
8 |
+
Attributes:
|
9 |
+
class_attribute (str): (class attribute) The class attribute
|
10 |
+
instance_attribute (str): The instance attribute
|
11 |
+
"""
|
12 |
+
|
13 |
def __init__(self):
|
14 |
tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-large-finetuned-conll03-english")
|
15 |
model = AutoModelForTokenClassification.from_pretrained("xlm-roberta-large-finetuned-conll03-english")
|
16 |
self.nlp = pipeline("ner", model=model, tokenizer=tokenizer, grouped_entities=True)
|
17 |
|
18 |
def get_annotation(self, preds, text):
|
19 |
+
"""
|
20 |
+
The function to add two Complex Numbers.
|
21 |
+
|
22 |
+
Parameters:
|
23 |
+
num (ComplexNumber): The complex number to be added.
|
24 |
+
|
25 |
+
Returns:
|
26 |
+
ComplexNumber: A complex number which contains the sum.
|
27 |
+
"""
|
28 |
+
|
29 |
splits = [0]
|
30 |
entities = {}
|
31 |
for i in preds:
|
|
|
46 |
return final_annotation
|
47 |
|
48 |
def classify(self, text):
|
49 |
+
"""
|
50 |
+
The function to add two Complex Numbers.
|
51 |
+
|
52 |
+
Parameters:
|
53 |
+
num (ComplexNumber): The complex number to be added.
|
54 |
+
|
55 |
+
Returns:
|
56 |
+
ComplexNumber: A complex number which contains the sum.
|
57 |
+
"""
|
58 |
+
|
59 |
preds = self.nlp(text)
|
60 |
ner_annotation = self.get_annotation(preds, text)
|
61 |
return preds, ner_annotation
|
part_of_speech_tagging.py
CHANGED
@@ -5,10 +5,27 @@ nltk.download('averaged_perceptron_tagger')
|
|
5 |
|
6 |
|
7 |
class POSTagging:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
def __init__(self):
|
9 |
pass
|
10 |
|
11 |
def classify(self, text):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
text = word_tokenize(text)
|
13 |
preds = nltk.pos_tag(text)
|
14 |
return preds
|
|
|
5 |
|
6 |
|
7 |
class POSTagging:
|
8 |
+
""" This class is an example
|
9 |
+
|
10 |
+
Attributes:
|
11 |
+
class_attribute (str): (class attribute) The class attribute
|
12 |
+
instance_attribute (str): The instance attribute
|
13 |
+
"""
|
14 |
+
|
15 |
def __init__(self):
|
16 |
pass
|
17 |
|
18 |
def classify(self, text):
|
19 |
+
"""
|
20 |
+
The function to add two Complex Numbers.
|
21 |
+
|
22 |
+
Parameters:
|
23 |
+
num (ComplexNumber): The complex number to be added.
|
24 |
+
|
25 |
+
Returns:
|
26 |
+
ComplexNumber: A complex number which contains the sum.
|
27 |
+
"""
|
28 |
+
|
29 |
text = word_tokenize(text)
|
30 |
preds = nltk.pos_tag(text)
|
31 |
return preds
|
sentiment_analysis.py
CHANGED
@@ -5,6 +5,13 @@ import pandas as pd
|
|
5 |
|
6 |
|
7 |
class SentimentAnalysis:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
def __init__(self):
|
9 |
# Load Tokenizer & Model
|
10 |
hub_location = 'cardiffnlp/twitter-roberta-base-sentiment'
|
@@ -23,7 +30,15 @@ class SentimentAnalysis:
|
|
23 |
self.explainer = SequenceClassificationExplainer(self.model, self.tokenizer)
|
24 |
|
25 |
def justify(self, text):
|
26 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
word_attributions = self.explainer(text)
|
29 |
html = self.explainer.visualize("example.html")
|
@@ -31,7 +46,15 @@ class SentimentAnalysis:
|
|
31 |
return html
|
32 |
|
33 |
def classify(self, text):
|
34 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
tokens = self.tokenizer.encode_plus(text, add_special_tokens=False, return_tensors='pt')
|
37 |
outputs = self.model(**tokens)
|
@@ -42,7 +65,15 @@ class SentimentAnalysis:
|
|
42 |
return preds
|
43 |
|
44 |
def run(self, text):
|
45 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
preds = self.classify(text)
|
48 |
html = self.justify(text)
|
|
|
5 |
|
6 |
|
7 |
class SentimentAnalysis:
|
8 |
+
""" This class is an example
|
9 |
+
|
10 |
+
Attributes:
|
11 |
+
class_attribute (str): (class attribute) The class attribute
|
12 |
+
instance_attribute (str): The instance attribute
|
13 |
+
"""
|
14 |
+
|
15 |
def __init__(self):
|
16 |
# Load Tokenizer & Model
|
17 |
hub_location = 'cardiffnlp/twitter-roberta-base-sentiment'
|
|
|
30 |
self.explainer = SequenceClassificationExplainer(self.model, self.tokenizer)
|
31 |
|
32 |
def justify(self, text):
|
33 |
+
"""
|
34 |
+
The function to add two Complex Numbers.
|
35 |
+
|
36 |
+
Parameters:
|
37 |
+
num (ComplexNumber): The complex number to be added.
|
38 |
+
|
39 |
+
Returns:
|
40 |
+
ComplexNumber: A complex number which contains the sum.
|
41 |
+
"""
|
42 |
|
43 |
word_attributions = self.explainer(text)
|
44 |
html = self.explainer.visualize("example.html")
|
|
|
46 |
return html
|
47 |
|
48 |
def classify(self, text):
|
49 |
+
"""
|
50 |
+
The function to add two Complex Numbers.
|
51 |
+
|
52 |
+
Parameters:
|
53 |
+
num (ComplexNumber): The complex number to be added.
|
54 |
+
|
55 |
+
Returns:
|
56 |
+
ComplexNumber: A complex number which contains the sum.
|
57 |
+
"""
|
58 |
|
59 |
tokens = self.tokenizer.encode_plus(text, add_special_tokens=False, return_tensors='pt')
|
60 |
outputs = self.model(**tokens)
|
|
|
65 |
return preds
|
66 |
|
67 |
def run(self, text):
|
68 |
+
"""
|
69 |
+
The function to add two Complex Numbers.
|
70 |
+
|
71 |
+
Parameters:
|
72 |
+
num (ComplexNumber): The complex number to be added.
|
73 |
+
|
74 |
+
Returns:
|
75 |
+
ComplexNumber: A complex number which contains the sum.
|
76 |
+
"""
|
77 |
|
78 |
preds = self.classify(text)
|
79 |
html = self.justify(text)
|