File size: 1,441 Bytes
26883a2
7ba03d6
26883a2
 
 
7ba03d6
 
 
 
 
 
 
 
26883a2
7ba03d6
 
26883a2
7ba03d6
26883a2
 
1c6b4bb
26883a2
1c6b4bb
26883a2
 
1c6b4bb
 
 
 
26883a2
1c6b4bb
 
 
26883a2
1c6b4bb
c384035
26883a2
1c6b4bb
 
 
80415c9
1c6b4bb
 
 
 
 
 
 
 
26883a2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import streamlit as st
from annotated_text import annotated_text
from transformers import pipeline


def process_output(output, text):
    lst = []
    i = 0
    for ent in output:
        if text[i: ent["start"]] != "":
            lst.append(text[i: ent["start"]])
        lst.append((text[ent["start"]: ent["end"]], ent["entity_group"], colors_dict[ent["entity_group"]]))
        i = ent["end"]
        
    if text[i:] != "":
        lst.append(text[i:])
    
    return lst


if __name__ == "__main__":

    st.set_page_config(page_title="Named Entity Recognizer", page_icon="🐒")


    st.markdown(""" <style>
    #MainMenu {visibility: hidden;}
    footer {visibility: hidden;}
    </style> """, unsafe_allow_html=True)

    st.header("Named Entity Recognizer")
    st.write("Developed with ❤️ by [Balamurugan P](https://www.linkedin.com/in/bala-murugan-62073b212/)")
    st.text("")

    text = st.text_area('Enter text to find Named Entities :', height=170)

    st.text("")

    # Loading the pipeline from hub
    # Pipeline handles the preprocessing and post processing steps
    model_checkpoint = "balamurugan1603/bert-finetuned-ner"
    namedEntityRecogniser = pipeline(
        "token-classification", model=model_checkpoint, aggregation_strategy="simple"
    )

    if st.button("Check Entities"):
        st.text("")
        output = namedEntityRecogniser(text)
        annotated_text(*process_output(output, text))