wrapped
Browse files- app_utils.py +10 -5
app_utils.py
CHANGED
@@ -18,6 +18,7 @@ from sumy.nlp.tokenizers import Tokenizer
|
|
18 |
from rouge import Rouge
|
19 |
from transformers import BartForConditionalGeneration, BartTokenizer
|
20 |
from transformers import T5ForConditionalGeneration, T5Tokenizer
|
|
|
21 |
|
22 |
# from nltk import ne_chunk
|
23 |
from nltk.tag import StanfordNERTagger
|
@@ -112,14 +113,18 @@ def nlp_analysis(text):
|
|
112 |
return df
|
113 |
|
114 |
|
|
|
115 |
def find_entities(text):
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
entities
|
121 |
entities=HTML_WRAPPER.format(entities)
|
122 |
return entities
|
|
|
|
|
|
|
123 |
|
124 |
|
125 |
def file_download(data):
|
|
|
18 |
from rouge import Rouge
|
19 |
from transformers import BartForConditionalGeneration, BartTokenizer
|
20 |
from transformers import T5ForConditionalGeneration, T5Tokenizer
|
21 |
+
from transformers import AutoTokenizer, AutoModelForTokenClassification
|
22 |
|
23 |
# from nltk import ne_chunk
|
24 |
from nltk.tag import StanfordNERTagger
|
|
|
113 |
return df
|
114 |
|
115 |
|
116 |
+
|
117 |
def find_entities(text):
|
118 |
+
tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english")
|
119 |
+
model = AutoModelForTokenClassification.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english")
|
120 |
+
nlp = pipeline("ner", model=model, tokenizer=tokenizer)
|
121 |
+
e=nlp(text)
|
122 |
+
entities=[(entity["word"], entity["entity"]) for entity in e]
|
123 |
entities=HTML_WRAPPER.format(entities)
|
124 |
return entities
|
125 |
+
|
126 |
+
|
127 |
+
|
128 |
|
129 |
|
130 |
def file_download(data):
|