import streamlit as st from transformers import AutoTokenizer, AutoModelForTokenClassification from transformers import pipeline from annotated_text import annotated_text st.header("NER Demo") st.markdown("This demo uses a model to identify entities in text.") st.markdown("NOTE: this demo is public - please don't enter confidential text") # Streamlit text boxes # Text source: https://www.fool.com/earnings/call-transcripts/2022/02/08/danaos-dac-q4-2021-earnings-call-transcript/ text = st.text_area('Enter text:', value="Good day, and welcome to the Danaos Corporation conference call to discuss the financial results for the three months ended December 31, 2021. As a reminder, today's call is being recorded. Hosting the call today is Dr. John Coustas, chief executive officer of Danaos Corporation; and Mr. Evangelos Chatzis, chief financial officer of Danaos Corporation. Dr. Coustas and Mr. Chatzis will be making some introductory comments and then we will open the call to a question-and-answer session. Please go ahead. Thank you, operator, and good morning to everyone. And thank you for joining us today. Before we begin, I quickly want to remind everyone that management's remarks this morning may contain certain forward-looking statements and that actual results could differ materially from those projected today. These forward-looking statements are made as of today, and we undertake no obligation to update them. Factors that might affect future results are discussed in our filings with the SEC, and we encourage you to review these detailed Safe Harbor and risk factor disclosures. Please also note that where we feel appropriate, we will continue to refer to non-GAAP financial measures such as EBITDA, adjusted EBITDA and adjusted net income to evaluate our business. Reconciliations of non-GAAP financial measures to GAAP financial measures are included in our earnings release and accompanying materials. With that, now let me turn the call over to Dr. Coustas, who will provide a broad overview of the quarter.") # Model setup model_name = "dslim/bert-base-NER" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForTokenClassification.from_pretrained(model_name) nlp = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple") colour_map = { 'ORG': '#8fbc8f', 'PER': '#b0c4de', 'LOC': '#e6e6fa', 'MISC': '#fffacd', } # Run model if text: ner_results = nlp(text) s = 0 parsed_text = [] for n in ner_results: parsed_text.append(text[s:n["start"]]) parsed_text.append((n["word"], n["entity_group"], colour_map[n["entity_group"]])) s = n["end"] parsed_text.append(text[s:]) annotated_text(*parsed_text) st.json(ner_results)