balamurugan1603's picture
bug fix
80415c9
import streamlit as st
from annotated_text import annotated_text
from transformers import pipeline
def process_output(output, text):
lst = []
i = 0
for ent in output:
if text[i: ent["start"]] != "":
lst.append(text[i: ent["start"]])
lst.append((text[ent["start"]: ent["end"]], ent["entity_group"], colors_dict[ent["entity_group"]]))
i = ent["end"]
if text[i:] != "":
lst.append(text[i:])
return lst
if __name__ == "__main__":
st.set_page_config(page_title="Named Entity Recognizer", page_icon="🐒")
st.markdown(""" <style>
#MainMenu {visibility: hidden;}
footer {visibility: hidden;}
</style> """, unsafe_allow_html=True)
st.header("Named Entity Recognizer")
st.write("Developed with ❤️ by [Balamurugan P](https://www.linkedin.com/in/bala-murugan-62073b212/)")
st.text("")
text = st.text_area('Enter text to find Named Entities :', height=170)
st.text("")
# Loading the pipeline from hub
# Pipeline handles the preprocessing and post processing steps
model_checkpoint = "balamurugan1603/bert-finetuned-ner"
namedEntityRecogniser = pipeline(
"token-classification", model=model_checkpoint, aggregation_strategy="simple"
)
if st.button("Check Entities"):
st.text("")
output = namedEntityRecogniser(text)
annotated_text(*process_output(output, text))