File size: 2,477 Bytes
a7d4382
3d72282
 
 
a7d4382
3d72282
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import streamlit as st
import nltk
import neattext as ntx
from nltk.sentiment.vader import SentimentIntensityAnalyzer


# Ensure VADER lexicon is available
def _ensure_vader_downloaded() -> None:
    nltk.download('vader_lexicon')


@st.cache_resource(show_spinner=False)
def get_sentiment_analyzer() -> SentimentIntensityAnalyzer:
    _ensure_vader_downloaded()
    return SentimentIntensityAnalyzer()


# Text preprocessing
stemmer = nltk.SnowballStemmer('english')


def clean_text(text: str) -> str:
    if not isinstance(text, str):
        return ""
    text = text.lower()
    text = ntx.remove_emojis(text)
    text = ntx.remove_emails(text)
    text = ntx.remove_phone_numbers(text)
    text = ntx.remove_urls(text)
    text = ntx.remove_hashtags(text)
    text = ntx.remove_special_characters(text)
    text = ntx.remove_multiple_spaces(text)
    text = ntx.remove_dates(text)
    text = text.strip()
    text = ntx.remove_stopwords(text)
    text = ntx.normalize(text)
    text = " ".join(stemmer.stem(word) for word in text.split())
    return text


def label_from_compound(compound: float) -> str:
    if compound >= 0.05:
        return "Positive"
    if compound <= -0.05:
        return "Negative"
    return "Neutral"


st.set_page_config(page_title='Pfizer Vaccine Sentiment - VADER', page_icon='πŸ’¬', layout='centered')

st.title('πŸ’¬ Sentiment Polarity Detector (VADER)')
st.write('Detect sentiment polarity of a text using VADER.')

default_text = (
    "Pfizer vaccine is doing a great job protecting people, but some side effects worry me."
)

text_input = st.text_area('Enter text to analyze', value=default_text, height=150)

analyze = st.button('Analyze Sentiment', type='primary', use_container_width=True)

if analyze and text_input.strip():
    sia = get_sentiment_analyzer()
    cleaned = clean_text(text_input)
    scores = sia.polarity_scores(cleaned)
    label = label_from_compound(scores.get('compound', 0.0))

    st.subheader('Result')
    st.markdown(f"**Predicted Polarity:** {label}")

    col1, col2, col3, col4 = st.columns(4)
    col1.metric('🟒 Positive', f"{scores.get('pos', 0.0):.3f}")
    col2.metric('πŸ”΄ Negative', f"{scores.get('neg', 0.0):.3f}")
    col3.metric('🟑 Neutral', f"{scores.get('neu', 0.0):.3f}")
    col4.metric('πŸ”΅ Compound', f"{scores.get('compound', 0.0):.3f}")

    with st.expander('Show cleaned text'):
        st.code(cleaned)

elif analyze:
    st.warning('Please enter some text to analyze.')