Update app.py
Browse files
app.py
CHANGED
@@ -1,125 +1,75 @@
|
|
1 |
"""Demo for NER4OPT."""
|
2 |
-
import re
|
3 |
import warnings
|
4 |
|
5 |
warnings.filterwarnings("ignore", category=DeprecationWarning)
|
6 |
|
7 |
-
import spacy
|
8 |
-
from spacy import displacy
|
9 |
-
from spacy.training import iob_to_biluo, biluo_tags_to_offsets
|
10 |
-
from spacy.tokenizer import Tokenizer
|
11 |
-
|
12 |
import streamlit as st
|
13 |
-
|
14 |
-
from
|
|
|
15 |
|
16 |
HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">{}</div>"""
|
17 |
|
18 |
|
19 |
@st.cache(allow_output_mutation=True)
|
20 |
def load_models():
|
21 |
-
"""Load
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
'I-LIMIT',
|
28 |
-
'B-VAR',
|
29 |
-
'I-VAR',
|
30 |
-
'B-OBJ_DIR',
|
31 |
-
'B-OBJ_NAME',
|
32 |
-
'I-OBJ_NAME',
|
33 |
-
'B-PARAM',
|
34 |
-
'I-PARAM',
|
35 |
-
]
|
36 |
-
# # create model
|
37 |
-
model_args = NERArgs()
|
38 |
-
model_args.use_early_stopping = True
|
39 |
-
model_args.early_stopping_delta = 0.01
|
40 |
-
model_args.early_stopping_metric = "eval_loss"
|
41 |
-
model_args.early_stopping_metric_minimize = False
|
42 |
-
model_args.early_stopping_patience = 5
|
43 |
-
model_args.evaluate_during_training_steps = 2000
|
44 |
-
model_args.overwrite_output_dir = True
|
45 |
-
model_args.reprocess_input_data = True
|
46 |
-
model_args.num_train_epochs = 11
|
47 |
-
model_args.adafactor_beta1 = 0.9
|
48 |
-
model_args.weight_decay = 0.01
|
49 |
-
model_args.max_seq_length = 512
|
50 |
-
model_args.learning_rate = 4e-5
|
51 |
-
model_args.train_batch_size = 1
|
52 |
-
model_args.eval_batch_size = 1
|
53 |
-
model_args.manual_seed = 123456789
|
54 |
-
model_args.output_dir = "trained_transformer_model"
|
55 |
-
model_args.use_cuda = False
|
56 |
-
model_args.use_multiprocessing = False
|
57 |
-
model = NERModel("roberta",
|
58 |
-
"skadio/ner4opt-roberta-v2",
|
59 |
-
labels=custom_labels,
|
60 |
-
use_cuda=False,
|
61 |
-
args=model_args)
|
62 |
-
spacy_model = spacy.load("en_core_web_sm")
|
63 |
-
spacy_model.tokenizer = Tokenizer(spacy_model.vocab,
|
64 |
-
token_match=re.compile(r'\S+').match)
|
65 |
-
spacy_blank_model = spacy.blank('en')
|
66 |
-
spacy_blank_model.tokenizer = Tokenizer(
|
67 |
-
spacy_blank_model.vocab, token_match=re.compile(r'\S+').match)
|
68 |
-
return model, spacy_model, spacy_blank_model
|
69 |
|
70 |
|
71 |
def main():
|
72 |
|
73 |
-
st.
|
74 |
-
|
75 |
-
|
|
|
|
|
|
|
|
|
76 |
|
77 |
text = st.text_area(
|
78 |
"Text",
|
79 |
-
"Cautious Asset Investment has a total of $
|
80 |
)
|
81 |
if text == "":
|
82 |
st.write("Please write a valid sentence.")
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
"ents": entities_formatted,
|
117 |
-
"title": None
|
118 |
-
}]
|
119 |
-
st.title("Named Entity Results")
|
120 |
-
html_ner = displacy.render(ner_for_display, style="ent", manual=True)
|
121 |
-
html_ner = html_ner.replace("\n", " ")
|
122 |
-
st.write(HTML_WRAPPER.format(html_ner), unsafe_allow_html=True)
|
123 |
|
124 |
|
125 |
if __name__ == '__main__':
|
|
|
1 |
"""Demo for NER4OPT."""
|
|
|
2 |
import warnings
|
3 |
|
4 |
warnings.filterwarnings("ignore", category=DeprecationWarning)
|
5 |
|
|
|
|
|
|
|
|
|
|
|
6 |
import streamlit as st
|
7 |
+
import ner4opt
|
8 |
+
from ner4opt.utils import spacy_tokenize_sentence
|
9 |
+
from spacy import displacy
|
10 |
|
11 |
HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">{}</div>"""
|
12 |
|
13 |
|
14 |
@st.cache(allow_output_mutation=True)
|
15 |
def load_models():
|
16 |
+
"""Load and cache NER4OPT models."""
|
17 |
+
lexical_ner_model = ner4opt.Ner4Opt("lexical")
|
18 |
+
lexical_plus_ner_model = ner4opt.Ner4Opt("lexical_plus")
|
19 |
+
semantic_ner_model = ner4opt.Ner4Opt("semantic")
|
20 |
+
hybrid_ner_model = ner4opt.Ner4Opt("hybrid")
|
21 |
+
return lexical_ner_model, lexical_plus_ner_model, semantic_ner_model, hybrid_ner_model
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
|
24 |
def main():
|
25 |
|
26 |
+
st.title("""Named Entity Recognition for Optimization (Ner4Opt) Demo""")
|
27 |
+
|
28 |
+
st.markdown("""Source code for NER4OPT library is available at https://github.com/skadio/ner4opt""")
|
29 |
+
|
30 |
+
option = st.sidebar.selectbox(
|
31 |
+
'Select the model for extracting entities',
|
32 |
+
('Lexical', 'Lexical Plus', 'Semantic', 'Hybrid'))
|
33 |
|
34 |
text = st.text_area(
|
35 |
"Text",
|
36 |
+
"Cautious Asset Investment has a total of $150,000 to manage and decides to invest it in money market fund, which yields a 2% return as well as in foreign bonds, which gives and average rate of return of 10.2%. Internal policies require PAI to diversify the asset allocation so that the minimum investment in money market fund is 40% of the total investment. Due to the risk of default of foreign countries, no more than 40% of the total investment should be allocated to foreign bonds. How much should the Cautious Asset Investment allocate in each asset so as to maximize its average return?"
|
37 |
)
|
38 |
if text == "":
|
39 |
st.write("Please write a valid sentence.")
|
40 |
+
|
41 |
+
lexical_ner_model, lexical_plus_ner_model, semantic_ner_model, hybrid_ner_model = load_models(
|
42 |
+
)
|
43 |
+
|
44 |
+
# get entities
|
45 |
+
if option == "Lexical":
|
46 |
+
predicted_entities = lexical_ner_model.get_entities(text)
|
47 |
+
elif option == "Lexical Plus":
|
48 |
+
predicted_entities = lexical_plus_ner_model.get_entities(text)
|
49 |
+
elif option == "Semantic":
|
50 |
+
predicted_entities = semantic_ner_model.get_entities(text)
|
51 |
+
elif option == "Hybrid":
|
52 |
+
predicted_entities = hybrid_ner_model.get_entities(text)
|
53 |
+
|
54 |
+
# re-format to match SpaCy, as it uses label instead of entity_group
|
55 |
+
# entity_group follows huggingface design and is compatible with most NER interfaces
|
56 |
+
entities_formatted = []
|
57 |
+
for tag in predicted_entities:
|
58 |
+
entities_formatted.append({
|
59 |
+
"start": tag['start'],
|
60 |
+
"end": tag['end'],
|
61 |
+
"label": tag['entity_group'],
|
62 |
+
"score": tag['score']
|
63 |
+
})
|
64 |
+
|
65 |
+
ner_for_display = [{
|
66 |
+
"text": spacy_tokenize_sentence(text),
|
67 |
+
"ents": entities_formatted,
|
68 |
+
"title": "Named Entities"
|
69 |
+
}]
|
70 |
+
html_ner = displacy.render(ner_for_display, style="ent", manual=True)
|
71 |
+
html_ner = html_ner.replace("\n", " ")
|
72 |
+
st.write(HTML_WRAPPER.format(html_ner), unsafe_allow_html=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
|
74 |
|
75 |
if __name__ == '__main__':
|