UjjwalVIT commited on
Commit
4628306
1 Parent(s): af43a44
Files changed (1) hide show
  1. app_utils.py +10 -5
app_utils.py CHANGED
@@ -18,6 +18,7 @@ from sumy.nlp.tokenizers import Tokenizer
18
  from rouge import Rouge
19
  from transformers import BartForConditionalGeneration, BartTokenizer
20
  from transformers import T5ForConditionalGeneration, T5Tokenizer
 
21
 
22
  # from nltk import ne_chunk
23
  from nltk.tag import StanfordNERTagger
@@ -112,14 +113,18 @@ def nlp_analysis(text):
112
  return df
113
 
114
 
 
115
  def find_entities(text):
116
- stan = StanfordNERTagger(stanford_ner_model_path, stanford_ner_jar_path)
117
- text=text.replace("\n\n","\n")
118
- tokens = nltk.word_tokenize(text)
119
- tagged_tokens = stan.tag(tokens)
120
- entities = [(token, tag) for token, tag in tagged_tokens if tag != 'O']
121
  entities=HTML_WRAPPER.format(entities)
122
  return entities
 
 
 
123
 
124
 
125
  def file_download(data):
 
18
  from rouge import Rouge
19
  from transformers import BartForConditionalGeneration, BartTokenizer
20
  from transformers import T5ForConditionalGeneration, T5Tokenizer
21
+ from transformers import AutoTokenizer, AutoModelForTokenClassification
22
 
23
  # from nltk import ne_chunk
24
  from nltk.tag import StanfordNERTagger
 
113
  return df
114
 
115
 
116
+
117
  def find_entities(text):
118
+ tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english")
119
+ model = AutoModelForTokenClassification.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english")
120
+ nlp = pipeline("ner", model=model, tokenizer=tokenizer)
121
+ e=nlp(text)
122
+ entities=[(entity["word"], entity["entity"]) for entity in e]
123
  entities=HTML_WRAPPER.format(entities)
124
  return entities
125
+
126
+
127
+
128
 
129
 
130
  def file_download(data):