Spaces:
Runtime error
Runtime error
import matplotlib.pyplot as plt | |
import pandas as pd | |
plt.rcParams["figure.figsize"] = (30,20) | |
import nltk | |
nltk.download('punkt') | |
nltk.download('stopwords') | |
nltk.download('corpus') | |
nltk.download('vader_lexicon') | |
nltk.download('averaged_perceptron_tagger') | |
import spacy | |
# import en_core_web_sm | |
nlp = spacy.load("en_core_web_sm") | |
from spacy import displacy | |
from nltk.tokenize import sent_tokenize | |
from nltk.tokenize import word_tokenize | |
from nltk.probability import FreqDist | |
from nltk.corpus import stopwords | |
from wordcloud import WordCloud, ImageColorGenerator | |
from PIL import Image | |
import streamlit as st | |
from nltk.corpus import stopwords | |
from sklearn.feature_extraction.text import CountVectorizer | |
from collections import Counter | |
import seaborn as sns | |
import plotly.express as px | |
from wordcloud import WordCloud, STOPWORDS | |
from textstat import flesch_reading_ease | |
# import SessionState | |
from nltk.sentiment.vader import SentimentIntensityAnalyzer | |
sid = SentimentIntensityAnalyzer() | |
def create_wordcloud(text): | |
st.header("Here is wordcloud..") | |
with st.spinner("Preparing your wordcloud .."): | |
wc = WordCloud(width=width*100 , height=(height*100) +200 , background_color='white', colormap='prism', collocations = False).generate_from_text(text) | |
fig, ax = plt.subplots() | |
# fig, ax = plt.subplots(figsize=(width , height)) | |
plt.imshow(wc, interpolation='bilinear') | |
plt.axis('off') | |
st.pyplot(fig) | |
st.balloons() | |
# @st.cache(suppress_st_warning=True, allow_output_mutation=True) | |
def get_input(): | |
text_dream= 'ex_dream.txt' | |
text_tryst= 'ex_tryst.txt' | |
with open(text_dream) as f: | |
dream = f.readlines() | |
with open(text_tryst) as f: | |
tryst = f.readlines() | |
if 'x' not in st.session_state: | |
st.session_state['x'] = ' ' | |
if 'k' not in st.session_state: | |
st.session_state['k'] = 0 | |
if st.button('Example: I have a dream - M. King'): | |
st.session_state['x'] = ' '.join(dream) | |
if st.button('Example: Tryst with destiny - J. Nehru'): | |
st.session_state['x'] = ' '.join(tryst) | |
em = st.empty() | |
if st.button('Clear'): | |
st.session_state['k']+=1 | |
st.session_state['x'] = ' ' | |
text = em.text_area("Paste your text or Click Example", value = st.session_state['x'] , key = st.session_state['k'], height=200) | |
return text | |
def create_ngram(text): | |
st.header("N-Gram Anaysis is >>") | |
def plot_top_ngrams_barchart(text, n=2): | |
stop=set(stopwords.words('english')) | |
new= text.str.split() | |
new=new.values.tolist() | |
corpus=[word for i in new for word in i] | |
def _get_top_ngram(corpus, n=None): | |
vec = CountVectorizer(ngram_range=(n, n), stop_words=stop).fit(corpus) | |
bag_of_words = vec.transform(corpus) | |
sum_words = bag_of_words.sum(axis=0) | |
words_freq = [(word, sum_words[0, idx]) | |
for word, idx in vec.vocabulary_.items()] | |
words_freq =sorted(words_freq, key = lambda x: x[1], reverse=True) | |
return words_freq[:10] | |
top_n_bigrams=_get_top_ngram(text,n)[:10] | |
x,y=map(list,zip(*top_n_bigrams)) | |
fig = px.bar(x=y,y=x, color=y) | |
fig.update_layout( yaxis=dict(autorange='reversed')) | |
fig.update_layout(autosize=False,width=width*100,height=height*100) | |
st.plotly_chart(fig) | |
st.subheader(f"Unigram:") | |
plot_top_ngrams_barchart(pd.Series([text]), 1) | |
st.subheader(f"Bigram:") | |
plot_top_ngrams_barchart(pd.Series([text]), 2) | |
st.subheader(f"Trigram:") | |
plot_top_ngrams_barchart(pd.Series([text]), 3) | |
# # Overall Sentiment | |
def create_sentiment(text, tokenized_sent): | |
sentiment_dict = sid.polarity_scores(text) | |
st.header(f"Sentiment Analysis >>") | |
st.subheader(f"Overall Sentiment score is = {sentiment_dict['compound']}") | |
# decide sentiment as positive, negative and neutral | |
if sentiment_dict['compound'] >= 0.05 : | |
st.subheader("Sentence Overall Rated As Positive") | |
elif sentiment_dict['compound'] <= - 0.05 : | |
st.subheader("Sentence Overall Rated As Negative") | |
else : | |
st.subheader("Sentence Overall Rated As Neutral") | |
# Temporal sentiment | |
st.subheader(f"Temporal Sentiment") | |
temporal_sentiment = pd.DataFrame(columns =['sentence', 'sentiment', 'len_sent']) | |
for sent in tokenized_sent: | |
sentiment_dict = sid.polarity_scores(sent) | |
temporal_sentiment = temporal_sentiment.append({'sentence' : sent, | |
'sentiment' :sentiment_dict['compound'],'len_sent' : len(sent.split())}, ignore_index=True) | |
temporal_sentiment['sentiment_stretch'] = (temporal_sentiment['sentiment'] * temporal_sentiment['len_sent']).astype(float) | |
fig = px.bar(temporal_sentiment, x=temporal_sentiment.index , y='sentiment', | |
hover_data=['sentence','sentiment','sentiment_stretch'], color= (temporal_sentiment['sentiment'] > 0), | |
color_discrete_map={True: 'green',False: 'red'}) | |
fig.update_layout(autosize=False,width=width*100,height=height*100) | |
st.plotly_chart(fig) | |
st.subheader(f"Temporal Sentiment Stretch") | |
fig = px.bar(temporal_sentiment, x=temporal_sentiment.index , y='sentiment_stretch', | |
hover_data=['sentence','sentiment','sentiment_stretch'], color= (temporal_sentiment['sentiment'] > 0), | |
color_discrete_map={True: 'green',False: 'red'}) | |
fig.update_layout(autosize=False,width=width*100,height=height*100) | |
st.plotly_chart(fig) | |
# # ner | |
def nested_state(state): | |
# st.session_state['state_ner'] = state | |
st.session_state['nested_session'] = state | |
def create_ner(text): | |
# st.session_state['state_ner'] = True | |
st.header(f"Named Entity Recognition >>") | |
st.subheader(f"Top Entities .. ") | |
doc=nlp(text) | |
ent = [X.label_ for X in doc.ents] | |
counter=Counter(ent) | |
count=counter.most_common() | |
x,y=map(list,zip(*count)) | |
fig = px.bar(x=y,y=x, color=y) | |
fig.update_layout( yaxis=dict(autorange='reversed')) | |
fig.update_layout(autosize=False,width=width*100,height=height*100) | |
st.plotly_chart(fig) | |
st.subheader(f"What Are Those Entities .. ") | |
ent_type= st.selectbox("Select Named Entity :", x, on_change=nested_state(True)) | |
ent_single = [X.text for X in doc.ents if X.label_ == ent_type] | |
ent_single=[x for x in ent_single] | |
counter=Counter(ent_single) | |
count=counter.most_common() | |
x,y=map(list,zip(*count)) | |
fig = px.bar(x=y,y=x, color=y) | |
fig.update_layout( yaxis=dict(autorange='reversed')) | |
fig.update_layout(autosize=False,width=width*100,height=height*100) | |
st.plotly_chart(fig) | |
if st.button("Render NER"): | |
st.markdown(displacy.render(doc, style='ent'), unsafe_allow_html=True) | |
# # pos tags | |
def create_pos(text): | |
st.session_state['state_ner'] = True | |
st.header(f"Part of Speech >>") | |
st.subheader(f"Top POS ..") | |
# st.markdown(displacy.render(doc, style='dep'), unsafe_allow_html=True) | |
pos = nltk.pos_tag(tokenized_word) | |
pos=list(map(list,zip(*pos)))[1] | |
pos = [x for x in pos] | |
counter=Counter(pos) | |
count=counter.most_common() | |
x,y=map(list,zip(*count)) | |
fig = px.bar(x=y,y=x, color=y) | |
fig.update_layout( yaxis=dict(autorange='reversed')) | |
fig.update_layout(autosize=False,width=width*100,height=height*100) | |
st.plotly_chart(fig) | |
st.subheader(f"What Are those POS .. ") | |
pos_type= st.selectbox("Select POS :", x) | |
pos_single = [] | |
pos = nltk.pos_tag(tokenized_word) | |
for word,tag in pos: | |
if tag==pos_type: | |
pos_single.append(word) | |
pos_single=[x for x in pos_single] | |
counter=Counter(pos_single) | |
count=counter.most_common() | |
x,y=map(list,zip(*count)) | |
fig = px.bar(x=y,y=x, color=y) | |
fig.update_layout( yaxis=dict(autorange='reversed')) | |
fig.update_layout(autosize=False,width=width*100,height=height*100) | |
st.plotly_chart(fig) | |
# # Text Complexity | |
def create_complexity(text, tokenized_sent): | |
st.header(f"Text Complexity >>") | |
st.caption(f"Higher scores indicate material that is easier to read,lower numbers mark harder-to-read passages:\ | |
– 0-30 College\ | |
– 50-60 High school\ | |
– 60+ Fourth grade") | |
st.subheader(f"Flesch Reading Ease score is = {flesch_reading_ease(text)}") | |
# Temporal sentiment | |
st.subheader(f"Temporal Complexity") | |
temporal_complexity= pd.DataFrame(columns =['sentence', 'complexity', 'len_sent']) | |
for sent in tokenized_sent: | |
complexity = flesch_reading_ease(sent) | |
temporal_complexity = temporal_complexity.append({'sentence' : sent, | |
'complexity' :complexity,'len_sent' : len(sent.split())}, ignore_index=True) | |
temporal_complexity['complexity_stretch'] = (temporal_complexity['complexity'] * temporal_complexity['len_sent']).astype(float) | |
fig = px.bar(temporal_complexity, x=temporal_complexity.index , y='complexity', | |
hover_data=['sentence','complexity','complexity_stretch'], color= (temporal_complexity['complexity'] > 30), | |
color_discrete_map={True: 'green',False: 'red'}) | |
fig.update_layout(autosize=False,width=width*100,height=height*100) | |
st.plotly_chart(fig) | |
if __name__ == '__main__': | |
m = st.markdown("""<style>div.stButton > button:first-child | |
{background-color: #dbe6c4;} | |
</style>""", unsafe_allow_html=True) | |
st.title("Text Disection : Analyze Your Text") | |
# st.caption("Simple NLP tool to analyze your text eg. political speech analysis ") | |
st.sidebar.header("Adjust Plot Dimensions") | |
width = st.sidebar.slider("Plot Width", 1, 25, 10) | |
height = st.sidebar.slider("Plot Height", 1, 25, 5) | |
# Input | |
st.header(f"Your Text please..") | |
text = get_input() | |
tokenized_sent=sent_tokenize(text) | |
tokenized_word=word_tokenize(text) | |
st.markdown(f"###### Total Sentences in the text = {len(tokenized_sent)}") | |
st.markdown(f"###### Total words in the text = {len(tokenized_word)}") | |
st.sidebar.title("Analysis Type") | |
analysis = st.sidebar.radio("Select Analysis", | |
options = ['Wordcloud', 'N-Gram Analysis', 'Sentiment Analysis', 'Named Entity Recognition Analysis', | |
'Part Of Speech Analysis', 'Text Complexity Analysis','Keep Calm!'], index=6) | |
# if st.button("Complete Analysis"): | |
# create_wordcloud(text) | |
# create_ngram(text) | |
# create_sentiment(text, tokenized_sent) | |
# create_ner(text) | |
# create_pos(text) | |
# create_complexity(text, tokenized_sent) | |
# analysis = 'Keep Calm!' | |
if text =='': | |
st.text('First Input Text And Then Select Analysis From Sidebar') | |
st.image('nlp_meme.jpg') | |
else: | |
try: | |
if analysis == 'Wordcloud': | |
create_wordcloud(text) | |
if analysis == 'N-Gram Analysis': | |
create_ngram(text) | |
if analysis == 'Sentiment Analysis': | |
create_sentiment(text, tokenized_sent) | |
if analysis == 'Named Entity Recognition Analysis': | |
create_ner(text) | |
if analysis == 'Part Of Speech Analysis': | |
create_pos(text) | |
if analysis == 'Text Complexity Analysis': | |
create_complexity(text, tokenized_sent) | |
if analysis == 'Keep Calm!': | |
st.image('nlp_meme.jpg') | |
except: | |
st.text('First Input Text And Then Select Analysis From Sidebar') | |
st.image('nlp_meme.jpg') | |