skadio commited on
Commit
d99bf21
1 Parent(s): 9a8792e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -100
app.py CHANGED
@@ -1,125 +1,75 @@
1
  """Demo for NER4OPT."""
2
- import re
3
  import warnings
4
 
5
  warnings.filterwarnings("ignore", category=DeprecationWarning)
6
 
7
- import spacy
8
- from spacy import displacy
9
- from spacy.training import iob_to_biluo, biluo_tags_to_offsets
10
- from spacy.tokenizer import Tokenizer
11
-
12
  import streamlit as st
13
-
14
- from simpletransformers.ner import NERModel, NERArgs
 
15
 
16
  HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">{}</div>"""
17
 
18
 
19
  @st.cache(allow_output_mutation=True)
20
  def load_models():
21
- """Load custom built NER4OPT model."""
22
- custom_labels = [
23
- 'O',
24
- 'B-CONST_DIR',
25
- 'I-CONST_DIR',
26
- 'B-LIMIT',
27
- 'I-LIMIT',
28
- 'B-VAR',
29
- 'I-VAR',
30
- 'B-OBJ_DIR',
31
- 'B-OBJ_NAME',
32
- 'I-OBJ_NAME',
33
- 'B-PARAM',
34
- 'I-PARAM',
35
- ]
36
- # # create model
37
- model_args = NERArgs()
38
- model_args.use_early_stopping = True
39
- model_args.early_stopping_delta = 0.01
40
- model_args.early_stopping_metric = "eval_loss"
41
- model_args.early_stopping_metric_minimize = False
42
- model_args.early_stopping_patience = 5
43
- model_args.evaluate_during_training_steps = 2000
44
- model_args.overwrite_output_dir = True
45
- model_args.reprocess_input_data = True
46
- model_args.num_train_epochs = 11
47
- model_args.adafactor_beta1 = 0.9
48
- model_args.weight_decay = 0.01
49
- model_args.max_seq_length = 512
50
- model_args.learning_rate = 4e-5
51
- model_args.train_batch_size = 1
52
- model_args.eval_batch_size = 1
53
- model_args.manual_seed = 123456789
54
- model_args.output_dir = "trained_transformer_model"
55
- model_args.use_cuda = False
56
- model_args.use_multiprocessing = False
57
- model = NERModel("roberta",
58
- "skadio/ner4opt-roberta-v2",
59
- labels=custom_labels,
60
- use_cuda=False,
61
- args=model_args)
62
- spacy_model = spacy.load("en_core_web_sm")
63
- spacy_model.tokenizer = Tokenizer(spacy_model.vocab,
64
- token_match=re.compile(r'\S+').match)
65
- spacy_blank_model = spacy.blank('en')
66
- spacy_blank_model.tokenizer = Tokenizer(
67
- spacy_blank_model.vocab, token_match=re.compile(r'\S+').match)
68
- return model, spacy_model, spacy_blank_model
69
 
70
 
71
  def main():
72
 
73
- st.sidebar.title("""
74
- NER4OPT Demo: \nFull code will be available at https://github.com/skadio/Ner4Opt
75
- """)
 
 
 
 
76
 
77
  text = st.text_area(
78
  "Text",
79
- "Cautious Asset Investment has a total of $ 150,000 to manage and decides to invest it in money market fund , which yields a 2 % return as well as in foreign bonds , which gives and average rate of return of 10.2 % . Internal policies require PAI to diversify the asset allocation so that the minimum investment in money market fund is 40 % of the total investment . Due to the risk of default of foreign countries , no more than 40 % of the total investment should be allocated to foreign bonds . How much should the Cautious Asset Investment allocate in each asset so as to maximize its average return ?"
80
  )
81
  if text == "":
82
  st.write("Please write a valid sentence.")
83
- model, spacy_model, spacy_blank_model = load_models()
84
-
85
- # Augmented Text
86
- spacy_doc = spacy_model(text)
87
- if len(list(spacy_doc.sents)) >= 2:
88
- last_two_sentences = ' '.join(
89
- [item.text for item in list(spacy_doc.sents)[-2::]])
90
- else:
91
- last_two_sentences = ' '.join(
92
- [item.text for item in list(spacy_doc.sents)[-1::]])
93
- to_skip_count = len(last_two_sentences.split())
94
- augmented_sent = last_two_sentences + " " + text
95
-
96
- if st.button("Get Named Entities"):
97
- predictions, raw_outputs = model.predict([augmented_sent],
98
- split_on_space=True)
99
- transformer_predictions = [
100
- list(val.values())[0] for val in predictions[0]
101
- ]
102
- transformer_predictions = transformer_predictions[to_skip_count::]
103
- biluo_tags = iob_to_biluo(transformer_predictions)
104
- doc = spacy_blank_model.make_doc(text)
105
- entities = biluo_tags_to_offsets(doc, biluo_tags)
106
- entities_formatted = []
107
- for tag in entities:
108
- entities_formatted.append({
109
- "start": tag[0],
110
- "end": tag[1],
111
- "label": tag[2],
112
- "score": 1.0
113
- })
114
- ner_for_display = [{
115
- "text": doc.text,
116
- "ents": entities_formatted,
117
- "title": None
118
- }]
119
- st.title("Named Entity Results")
120
- html_ner = displacy.render(ner_for_display, style="ent", manual=True)
121
- html_ner = html_ner.replace("\n", " ")
122
- st.write(HTML_WRAPPER.format(html_ner), unsafe_allow_html=True)
123
 
124
 
125
  if __name__ == '__main__':
 
1
  """Demo for NER4OPT."""
 
2
  import warnings
3
 
4
  warnings.filterwarnings("ignore", category=DeprecationWarning)
5
 
 
 
 
 
 
6
  import streamlit as st
7
+ import ner4opt
8
+ from ner4opt.utils import spacy_tokenize_sentence
9
+ from spacy import displacy
10
 
11
  HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">{}</div>"""
12
 
13
 
14
  @st.cache(allow_output_mutation=True)
15
  def load_models():
16
+ """Load and cache NER4OPT models."""
17
+ lexical_ner_model = ner4opt.Ner4Opt("lexical")
18
+ lexical_plus_ner_model = ner4opt.Ner4Opt("lexical_plus")
19
+ semantic_ner_model = ner4opt.Ner4Opt("semantic")
20
+ hybrid_ner_model = ner4opt.Ner4Opt("hybrid")
21
+ return lexical_ner_model, lexical_plus_ner_model, semantic_ner_model, hybrid_ner_model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
 
24
  def main():
25
 
26
+ st.title("""Named Entity Recognition for Optimization (Ner4Opt) Demo""")
27
+
28
+ st.markdown("""Source code for NER4OPT library is available at https://github.com/skadio/ner4opt""")
29
+
30
+ option = st.sidebar.selectbox(
31
+ 'Select the model for extracting entities',
32
+ ('Lexical', 'Lexical Plus', 'Semantic', 'Hybrid'))
33
 
34
  text = st.text_area(
35
  "Text",
36
+ "Cautious Asset Investment has a total of $150,000 to manage and decides to invest it in money market fund, which yields a 2% return as well as in foreign bonds, which gives and average rate of return of 10.2%. Internal policies require PAI to diversify the asset allocation so that the minimum investment in money market fund is 40% of the total investment. Due to the risk of default of foreign countries, no more than 40% of the total investment should be allocated to foreign bonds. How much should the Cautious Asset Investment allocate in each asset so as to maximize its average return?"
37
  )
38
  if text == "":
39
  st.write("Please write a valid sentence.")
40
+
41
+ lexical_ner_model, lexical_plus_ner_model, semantic_ner_model, hybrid_ner_model = load_models(
42
+ )
43
+
44
+ # get entities
45
+ if option == "Lexical":
46
+ predicted_entities = lexical_ner_model.get_entities(text)
47
+ elif option == "Lexical Plus":
48
+ predicted_entities = lexical_plus_ner_model.get_entities(text)
49
+ elif option == "Semantic":
50
+ predicted_entities = semantic_ner_model.get_entities(text)
51
+ elif option == "Hybrid":
52
+ predicted_entities = hybrid_ner_model.get_entities(text)
53
+
54
+ # re-format to match SpaCy, as it uses label instead of entity_group
55
+ # entity_group follows huggingface design and is compatible with most NER interfaces
56
+ entities_formatted = []
57
+ for tag in predicted_entities:
58
+ entities_formatted.append({
59
+ "start": tag['start'],
60
+ "end": tag['end'],
61
+ "label": tag['entity_group'],
62
+ "score": tag['score']
63
+ })
64
+
65
+ ner_for_display = [{
66
+ "text": spacy_tokenize_sentence(text),
67
+ "ents": entities_formatted,
68
+ "title": "Named Entities"
69
+ }]
70
+ html_ner = displacy.render(ner_for_display, style="ent", manual=True)
71
+ html_ner = html_ner.replace("\n", " ")
72
+ st.write(HTML_WRAPPER.format(html_ner), unsafe_allow_html=True)
 
 
 
 
 
 
 
73
 
74
 
75
  if __name__ == '__main__':