File size: 3,323 Bytes
48be4c4
 
 
 
 
 
d99bf21
ad571cb
d99bf21
48be4c4
 
 
 
f0ac6d9
48be4c4
d99bf21
 
 
 
 
 
48be4c4
 
 
 
1833bb6
d99bf21
9742260
d99bf21
 
51b35e3
ba9657d
48be4c4
 
 
d99bf21
48be4c4
3222a06
48be4c4
 
d99bf21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ad571cb
d99bf21
 
 
 
 
 
48be4c4
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
"""Demo for NER4OPT."""
import warnings

warnings.filterwarnings("ignore", category=DeprecationWarning)

import streamlit as st
import ner4opt
from ner4opt.utils import preprocess, spacy_tokenize_sentence
from spacy import displacy

HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">{}</div>"""


@st.cache_resource
def load_models():
    """Load and cache NER4OPT models."""
    lexical_ner_model = ner4opt.Ner4Opt("lexical")
    lexical_plus_ner_model = ner4opt.Ner4Opt("lexical_plus")
    semantic_ner_model = ner4opt.Ner4Opt("semantic")
    hybrid_ner_model = ner4opt.Ner4Opt("hybrid")
    return lexical_ner_model, lexical_plus_ner_model, semantic_ner_model, hybrid_ner_model


def main():

    st.title("""Ner4Opt: Named Entity Recognition for Optimization""")

    st.markdown("""Given an optimization problem in natural language, Ner4Opt extracts optimization related entities from free-form text. The source code for Ner4Opt is available at https://github.com/skadio/ner4opt""")

    option = st.sidebar.selectbox(
        'Select a lexical, semantic, or hybrid model for extracting entities',
        ('Lexical', 'Lexical Plus', 'Semantic', 'Hybrid'), index=3)

    text = st.text_area(
        "Text",
        "Cautious Asset Investment has a total of $150,000 to manage and decides to invest it in money market fund, which yields a 2% return as well as in foreign bonds, which gives and average rate of return of 10.2%. Internal policies require PAI to diversify the asset allocation so that the minimum investment in money market fund is 40% of the total investment. Due to the risk of default of foreign countries, no more than 40% of the total investment should be allocated to foreign bonds. How much should the Cautious Asset Investment allocate in each asset so as to maximize its average return?"
    )
    text = text.strip()
    if text == "":
        st.write("Please write a valid sentence.")

    lexical_ner_model, lexical_plus_ner_model, semantic_ner_model, hybrid_ner_model = load_models(
    )

    # get entities
    if option == "Lexical":
        predicted_entities = lexical_ner_model.get_entities(text)
    elif option == "Lexical Plus":
        predicted_entities = lexical_plus_ner_model.get_entities(text)
    elif option == "Semantic":
        predicted_entities = semantic_ner_model.get_entities(text)
    elif option == "Hybrid":
        predicted_entities = hybrid_ner_model.get_entities(text)

    # re-format to match SpaCy, as it uses label instead of entity_group
    # entity_group follows huggingface design and is compatible with most NER interfaces
    entities_formatted = []
    for tag in predicted_entities:
        entities_formatted.append({
            "start": tag['start'],
            "end": tag['end'],
            "label": tag['entity_group'],
            "score": tag['score']
        })

    ner_for_display = [{
        "text": spacy_tokenize_sentence(preprocess(text)),
        "ents": entities_formatted,
        "title": "Named Entities"
    }]
    html_ner = displacy.render(ner_for_display, style="ent", manual=True)
    html_ner = html_ner.replace("\n", " ")
    st.write(HTML_WRAPPER.format(html_ner), unsafe_allow_html=True)


if __name__ == '__main__':
    main()