"""Demo for NER4OPT.""" import re import warnings warnings.filterwarnings("ignore", category=DeprecationWarning) import spacy from spacy import displacy from spacy.training import iob_to_biluo, biluo_tags_to_offsets from spacy.tokenizer import Tokenizer import streamlit as st from simpletransformers.ner import NERModel, NERArgs HTML_WRAPPER = """
{}
""" @st.cache_resource def load_models(): """Load custom built NER4OPT model.""" custom_labels = [ 'O', 'B-CONST_DIR', 'I-CONST_DIR', 'B-LIMIT', 'I-LIMIT', 'B-VAR', 'I-VAR', 'B-OBJ_DIR', 'B-OBJ_NAME', 'I-OBJ_NAME', 'B-PARAM', 'I-PARAM', ] # # create model model_args = NERArgs() model_args.use_early_stopping = True model_args.early_stopping_delta = 0.01 model_args.early_stopping_metric = "eval_loss" model_args.early_stopping_metric_minimize = False model_args.early_stopping_patience = 5 model_args.evaluate_during_training_steps = 2000 model_args.overwrite_output_dir = True model_args.reprocess_input_data = True model_args.num_train_epochs = 11 model_args.adafactor_beta1 = 0.9 model_args.weight_decay = 0.01 model_args.max_seq_length = 512 model_args.learning_rate = 4e-5 model_args.train_batch_size = 1 model_args.eval_batch_size = 1 model_args.manual_seed = 123456789 model_args.output_dir = "trained_transformer_model" model_args.use_cuda = True model_args.use_multiprocessing = False model = NERModel("roberta", "skadio/ner4opt-roberta", labels=custom_labels, use_cuda=True, args=model_args) spacy_model = spacy.load("en_core_web_sm") spacy_model.tokenizer = Tokenizer(spacy_model.vocab, token_match=re.compile(r'\S+').match) spacy_blank_model = spacy.blank('en') spacy_blank_model.tokenizer = Tokenizer( spacy_blank_model.vocab, token_match=re.compile(r'\S+').match) return model, spacy_model, spacy_blank_model def main(): st.sidebar.title(""" NER4OPT Demo: \nFull code will be available at https://github.com/skadio/Ner4Opt """) text = st.text_area( "Text", "Cautious Asset Investment has a total of $ 150,000 to manage and decides to invest it in money market fund , which yields a 2 % return as well as in foreign bonds , which gives and average rate of return of 10.2 % . Internal policies require PAI to diversify the asset allocation so that the minimum investment in money market fund is 40 % of the total investment . Due to the risk of default of foreign countries , no more than 40 % of the total investment should be allocated to foreign bonds . How much should the Cautious Asset Investment allocate in each asset so as to maximize its average return ?" ) if text == "": st.write("Please write a valid sentence.") model, spacy_model, spacy_blank_model = load_models() # Augmented Text spacy_doc = spacy_model(text) if len(list(spacy_doc.sents)) >= 2: last_two_sentences = ' '.join( [item.text for item in list(spacy_doc.sents)[-2::]]) else: last_two_sentences = ' '.join( [item.text for item in list(spacy_doc.sents)[-1::]]) to_skip_count = len(last_two_sentences.split()) augmented_sent = last_two_sentences + " " + text if st.button("Get Named Entities"): predictions, raw_outputs = model.predict([augmented_sent], split_on_space=True) transformer_predictions = [ list(val.values())[0] for val in predictions[0] ] transformer_predictions = transformer_predictions[to_skip_count::] biluo_tags = iob_to_biluo(transformer_predictions) doc = spacy_blank_model.make_doc(text) entities = biluo_tags_to_offsets(doc, biluo_tags) entities_formatted = [] for tag in entities: entities_formatted.append({ "start": tag[0], "end": tag[1], "label": tag[2], "score": 1.0 }) ner_for_display = [{ "text": doc.text, "ents": entities_formatted, "title": None }] st.title("Named Entity Results") html_ner = displacy.render(ner_for_display, style="ent", manual=True) html_ner = html_ner.replace("\n", " ") st.write(HTML_WRAPPER.format(html_ner), unsafe_allow_html=True) if __name__ == '__main__': main()