"""Demo for NER4OPT."""
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
import streamlit as st
import ner4opt
from ner4opt.utils import preprocess, spacy_tokenize_sentence
from spacy import displacy
HTML_WRAPPER = """
{}
"""
@st.cache_resource
def load_models():
"""Load and cache NER4OPT models."""
lexical_ner_model = ner4opt.Ner4Opt("lexical")
lexical_plus_ner_model = ner4opt.Ner4Opt("lexical_plus")
semantic_ner_model = ner4opt.Ner4Opt("semantic")
hybrid_ner_model = ner4opt.Ner4Opt("hybrid")
return lexical_ner_model, lexical_plus_ner_model, semantic_ner_model, hybrid_ner_model
def main():
st.title("""Ner4Opt: Named Entity Recognition for Optimization""")
st.markdown("""Given an optimization problem in natural language, Ner4Opt extracts optimization related entities from free-form text. The source code for Ner4Opt is available at https://github.com/skadio/ner4opt""")
option = st.sidebar.selectbox(
'Select a lexical, semantic, or hybrid model for extracting entities',
('Lexical', 'Lexical Plus', 'Semantic', 'Hybrid'), index=3)
text = st.text_area(
"Text",
"Cautious Asset Investment has a total of $150,000 to manage and decides to invest it in money market fund, which yields a 2% return as well as in foreign bonds, which gives and average rate of return of 10.2%. Internal policies require PAI to diversify the asset allocation so that the minimum investment in money market fund is 40% of the total investment. Due to the risk of default of foreign countries, no more than 40% of the total investment should be allocated to foreign bonds. How much should the Cautious Asset Investment allocate in each asset so as to maximize its average return?"
)
text = text.strip()
if text == "":
st.write("Please write a valid sentence.")
lexical_ner_model, lexical_plus_ner_model, semantic_ner_model, hybrid_ner_model = load_models(
)
# get entities
if option == "Lexical":
predicted_entities = lexical_ner_model.get_entities(text)
elif option == "Lexical Plus":
predicted_entities = lexical_plus_ner_model.get_entities(text)
elif option == "Semantic":
predicted_entities = semantic_ner_model.get_entities(text)
elif option == "Hybrid":
predicted_entities = hybrid_ner_model.get_entities(text)
# re-format to match SpaCy, as it uses label instead of entity_group
# entity_group follows huggingface design and is compatible with most NER interfaces
entities_formatted = []
for tag in predicted_entities:
entities_formatted.append({
"start": tag['start'],
"end": tag['end'],
"label": tag['entity_group'],
"score": tag['score']
})
ner_for_display = [{
"text": spacy_tokenize_sentence(preprocess(text)),
"ents": entities_formatted,
"title": "Named Entities"
}]
html_ner = displacy.render(ner_for_display, style="ent", manual=True)
html_ner = html_ner.replace("\n", " ")
st.write(HTML_WRAPPER.format(html_ner), unsafe_allow_html=True)
if __name__ == '__main__':
main()