File size: 5,830 Bytes
a297ab2
 
 
 
 
 
 
2e6f5d4
a297ab2
 
2e6f5d4
a297ab2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2e6f5d4
a297ab2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import streamlit as st
from transformers import pipeline
import spacy
from spacy import displacy
import plotly.express as px
import numpy as np

st.set_page_config(page_title="NLP Prototype")

st.title("Natural Language Processing Prototype")
st.write("_This web application is intended for educational use, please do not upload any sensitive information._")
st.subheader("__Which natural language processing task would you like to try?__")
st.write("- __Sentiment Analysis:__ Identifying whether a piece of text has a positive or negative sentiment.")
st.write("- __Named Entity Recognition:__ Identifying all geopolitical entities, organizations, people, locations, or dates in a body of text.")
st.write("- __Text Classification:__ Placing a piece of text into one or more categories.")
st.write("- __Text Summarization:__ Condensing larger bodies of text into smaller bodies of text.")

option = st.selectbox('Please select from the list',('','Sentiment Analysis','Named Entity Recognition', 'Text Classification','Text Summarization'))

@st.cache(allow_output_mutation=True, show_spinner=False)
def Loading_Model_1():
    sum2 = pipeline("summarization",framework="pt")
    return sum2

@st.cache(allow_output_mutation=True, show_spinner=False)
def Loading_Model_2():
    class1 = pipeline("zero-shot-classification",framework="pt")
    return class1

@st.cache(allow_output_mutation=True, show_spinner=False)
def Loading_Model_3():
    sentiment = pipeline("sentiment-analysis", framework="pt")
    return sentiment

@st.cache(allow_output_mutation=True, show_spinner=False)
def Loading_Model_4():
    nlp = spacy.load('en_core_web_sm')
    return nlp

@st.cache(allow_output_mutation=True)
def entRecognizer(entDict, typeEnt):
    entList = [ent for ent in entDict if entDict[ent] == typeEnt]
    return entList

def plot_result(top_topics, scores):
    top_topics = np.array(top_topics)
    scores = np.array(scores)
    scores *= 100
    fig = px.bar(x=scores, y=top_topics, orientation='h', 
                 labels={'x': 'Probability', 'y': 'Category'},
                 text=scores,
                 range_x=(0,115),
                 title='Top Predictions',
                 color=np.linspace(0,1,len(scores)),
                 color_continuous_scale="Bluered")
    fig.update(layout_coloraxis_showscale=False)
    fig.update_traces(texttemplate='%{text:0.1f}%', textposition='outside')
    st.plotly_chart(fig)    

with st.spinner(text="Please wait for the models to load. This should take approximately 60 seconds."):
    sum2 = Loading_Model_1()
    class1 = Loading_Model_2()
    sentiment = Loading_Model_3()
    nlp = Loading_Model_4()

if option == 'Text Classification':
    cat1 = st.text_input('Enter each possible category name (separated by a comma). Maximum 5 categories.')
    text = st.text_area('Enter Text Below:', height=200)
    submit = st.button('Generate')
    if submit:
        st.subheader("Classification Results:")
        labels1 = cat1.strip().split(',')
        result = class1(text, candidate_labels=labels1)
        cat1name = result['labels'][0]
        cat1prob = result['scores'][0]
        st.write('Category: {} | Probability: {:.1f}%'.format(cat1name,(cat1prob*100)))
        plot_result(result['labels'][::-1][-10:], result['scores'][::-1][-10:])
        
if option == 'Text Summarization':
    max_lengthy = st.slider('Maximum summary length (words)', min_value=30, max_value=150, value=60, step=10)
    num_beamer = st.slider('Speed vs quality of summary (1 is fastest)', min_value=1, max_value=8, value=4, step=1)
    text = st.text_area('Enter Text Below (maximum 800 words):', height=300) 
    submit = st.button('Generate')  
    if submit:
        st.subheader("Summary:")
        with st.spinner(text="This may take a moment..."):
            summWords = sum2(text, max_length=max_lengthy, min_length=15, num_beams=num_beamer, do_sample=True, early_stopping=True, repetition_penalty=1.5, length_penalty=1.5)
        text2 =summWords[0]["summary_text"] 
        st.write(text2)

if option == 'Sentiment Analysis':
    text = st.text_area('Enter Text Below:', height=200)
    submit = st.button('Generate')
    if submit:
        st.subheader("Sentiment:")
        result = sentiment(text)
        sent = result[0]['label']
        cert = result[0]['score']
        st.write('Text Sentiment: {} | Probability: {:.1f}%'.format(sent,(cert*100)))

if option == 'Named Entity Recognition':
    text = st.text_area('Enter Text Below:', height=300)
    submit = st.button('Generate')
    if submit:    
        entities = []
        entityLabels = []
        doc = nlp(text)
        for ent in doc.ents:
            entities.append(ent.text)
            entityLabels.append(ent.label_)
        entDict = dict(zip(entities, entityLabels)) 
        entOrg = entRecognizer(entDict, "ORG")
        entPerson = entRecognizer(entDict, "PERSON")
        entDate = entRecognizer(entDict, "DATE")
        entGPE = entRecognizer(entDict, "GPE")
        entLoc = entRecognizer(entDict, "LOC")
        options = {"ents": ["ORG", "GPE", "PERSON", "LOC", "DATE"]}
        HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">{}</div>"""
        
        st.subheader("List of Named Entities:")
        st.write("Geopolitical Entities (GPE): " + str(entGPE))
        st.write("People (PERSON): " + str(entPerson))
        st.write("Organizations (ORG): " + str(entOrg))
        st.write("Dates (DATE): " + str(entDate))
        st.write("Locations (LOC): " + str(entLoc))
        st.subheader("Original Text with Entities Highlighted")
        html = displacy.render(doc, style="ent", options=options)
        html = html.replace("\n", " ")
        st.write(HTML_WRAPPER.format(html), unsafe_allow_html=True)