"""Demo for NER4OPT.""" import warnings warnings.filterwarnings("ignore", category=DeprecationWarning) import streamlit as st import ner4opt from ner4opt.utils import preprocess, spacy_tokenize_sentence from spacy import displacy HTML_WRAPPER = """
{}
""" @st.cache_resource def load_models(): """Load and cache NER4OPT models.""" lexical_ner_model = ner4opt.Ner4Opt("lexical") lexical_plus_ner_model = ner4opt.Ner4Opt("lexical_plus") semantic_ner_model = ner4opt.Ner4Opt("semantic") hybrid_ner_model = ner4opt.Ner4Opt("hybrid") return lexical_ner_model, lexical_plus_ner_model, semantic_ner_model, hybrid_ner_model def main(): st.title("""Ner4Opt: Named Entity Recognition for Optimization""") st.markdown("""Given an optimization problem in natural language, Ner4Opt extracts optimization related entities from free-form text. The source code for Ner4Opt is available at https://github.com/skadio/ner4opt""") option = st.sidebar.selectbox( 'Select a lexical, semantic, or hybrid model for extracting entities', ('Lexical', 'Lexical Plus', 'Semantic', 'Hybrid'), index=3) text = st.text_area( "Text", "Cautious Asset Investment has a total of $150,000 to manage and decides to invest it in money market fund, which yields a 2% return as well as in foreign bonds, which gives and average rate of return of 10.2%. Internal policies require PAI to diversify the asset allocation so that the minimum investment in money market fund is 40% of the total investment. Due to the risk of default of foreign countries, no more than 40% of the total investment should be allocated to foreign bonds. How much should the Cautious Asset Investment allocate in each asset so as to maximize its average return?" ) text = text.strip() if text == "": st.write("Please write a valid sentence.") lexical_ner_model, lexical_plus_ner_model, semantic_ner_model, hybrid_ner_model = load_models( ) # get entities if option == "Lexical": predicted_entities = lexical_ner_model.get_entities(text) elif option == "Lexical Plus": predicted_entities = lexical_plus_ner_model.get_entities(text) elif option == "Semantic": predicted_entities = semantic_ner_model.get_entities(text) elif option == "Hybrid": predicted_entities = hybrid_ner_model.get_entities(text) # re-format to match SpaCy, as it uses label instead of entity_group # entity_group follows huggingface design and is compatible with most NER interfaces entities_formatted = [] for tag in predicted_entities: entities_formatted.append({ "start": tag['start'], "end": tag['end'], "label": tag['entity_group'], "score": tag['score'] }) ner_for_display = [{ "text": spacy_tokenize_sentence(preprocess(text)), "ents": entities_formatted, "title": "Named Entities" }] html_ner = displacy.render(ner_for_display, style="ent", manual=True) html_ner = html_ner.replace("\n", " ") st.write(HTML_WRAPPER.format(html_ner), unsafe_allow_html=True) if __name__ == '__main__': main()