Spaces:

nikhilmane007
/

text_dissection

Runtime error

Nikhil Mane

all analysis

8bf5454 almost 3 years ago

10.9 kB

	import matplotlib.pyplot as plt
	import pandas as pd
	plt.rcParams["figure.figsize"] = (30,20)

	import nltk
	nltk.download('punkt')
	nltk.download('stopwords')
	nltk.download('corpus')
	nltk.download('vader_lexicon')
	nltk.download('averaged_perceptron_tagger')

	import spacy
	# import en_core_web_sm
	nlp = spacy.load("en_core_web_sm")
	from spacy import displacy

	from nltk.tokenize import sent_tokenize
	from nltk.tokenize import word_tokenize
	from nltk.probability import FreqDist
	from nltk.corpus import stopwords
	from wordcloud import WordCloud, ImageColorGenerator
	from PIL import Image
	import streamlit as st
	from nltk.corpus import stopwords
	from sklearn.feature_extraction.text import CountVectorizer
	from collections import Counter
	import seaborn as sns
	import plotly.express as px
	from wordcloud import WordCloud, STOPWORDS
	from textstat import flesch_reading_ease
	# import SessionState



	from nltk.sentiment.vader import SentimentIntensityAnalyzer
	sid = SentimentIntensityAnalyzer()

	def create_wordcloud(text):
	st.header("Here is wordcloud..")
	wc = WordCloud(width=width100 , height=height100 , background_color='white', colormap='prism', collocations = False).generate_from_text(text)
	fig, ax = plt.subplots()
	# fig, ax = plt.subplots(figsize=(width , height))
	plt.imshow(wc, interpolation='bilinear')
	plt.axis('off')
	st.pyplot(fig)

	# @st.cache(suppress_st_warning=True, allow_output_mutation=True)
	def get_input():

	text_dream= 'ex_dream.txt'
	text_tryst= 'ex_tryst.txt'

	with open(text_dream) as f:
	dream = f.readlines()
	with open(text_tryst) as f:
	tryst = f.readlines()

	if 'x' not in st.session_state:
	st.session_state['x'] = ' '

	if 'k' not in st.session_state:
	st.session_state['k'] = 0

	if st.button('Example: I have a dream - M. King'):
	st.session_state['x'] = ' '.join(dream)

	if st.button('Example: Tryst with destiny - J. Nehru'):
	st.session_state['x'] = ' '.join(tryst)


	em = st.empty()
	if st.button('Clear'):
	st.session_state['k']+=1
	st.session_state['x'] = ' '

	text = em.text_area("Paste your text or Click Example", value = st.session_state['x'] , key = st.session_state['k'], height=200, placeholder="Add here..")

	return text

	def create_ngram(text):
	st.header("N-Gram Anaysis is >>")
	def plot_top_ngrams_barchart(text, n=2):
	stop=set(stopwords.words('english'))
	new= text.str.split()
	new=new.values.tolist()
	corpus=[word for i in new for word in i]

	def _get_top_ngram(corpus, n=None):
	vec = CountVectorizer(ngram_range=(n, n), stop_words=stop).fit(corpus)
	bag_of_words = vec.transform(corpus)
	sum_words = bag_of_words.sum(axis=0)
	words_freq = [(word, sum_words[0, idx])
	for word, idx in vec.vocabulary_.items()]
	words_freq =sorted(words_freq, key = lambda x: x[1], reverse=True)
	return words_freq[:10]

	top_n_bigrams=_get_top_ngram(text,n)[:10]
	x,y=map(list,zip(*top_n_bigrams))
	fig = px.bar(x=y,y=x, color=y)
	fig.update_layout( yaxis=dict(autorange='reversed'))
	fig.update_layout(autosize=False,width=width100,height=height100)
	st.plotly_chart(fig)

	st.subheader(f"Unigram:")
	plot_top_ngrams_barchart(pd.Series([text]), 1)
	st.subheader(f"Bigram:")
	plot_top_ngrams_barchart(pd.Series([text]), 2)
	st.subheader(f"Trigram:")
	plot_top_ngrams_barchart(pd.Series([text]), 3)

	# # Overall Sentiment
	def create_sentiment(text, tokenized_sent):
	sentiment_dict = sid.polarity_scores(text)
	st.header(f"Sentiment Analysis >>")
	st.subheader(f"Overall Sentiment score is = {sentiment_dict['compound']}")
	# decide sentiment as positive, negative and neutral
	if sentiment_dict['compound'] >= 0.05 :
	st.subheader("Sentence Overall Rated As Positive")

	elif sentiment_dict['compound'] <= - 0.05 :
	st.subheader("Sentence Overall Rated As Negative")

	else :
	st.subheader("Sentence Overall Rated As Neutral")

	# Temporal sentiment
	st.subheader(f"Temporal Sentiment")

	temporal_sentiment = pd.DataFrame(columns =['sentence', 'sentiment', 'len_sent'])
	for sent in tokenized_sent:
	sentiment_dict = sid.polarity_scores(sent)
	temporal_sentiment = temporal_sentiment.append({'sentence' : sent,
	'sentiment' :sentiment_dict['compound'],'len_sent' : len(sent.split())}, ignore_index=True)
	temporal_sentiment['sentiment_stretch'] = (temporal_sentiment['sentiment'] * temporal_sentiment['len_sent']).astype(float)

	fig = px.bar(temporal_sentiment, x=temporal_sentiment.index , y='sentiment',
	hover_data=['sentence','sentiment','sentiment_stretch'], color= (temporal_sentiment['sentiment'] > 0),
	color_discrete_map={True: 'green',False: 'red'})
	fig.update_layout(autosize=False,width=width100,height=height100)
	st.plotly_chart(fig)

	st.subheader(f"Temporal Sentiment Stretch")
	fig = px.bar(temporal_sentiment, x=temporal_sentiment.index , y='sentiment_stretch',
	hover_data=['sentence','sentiment','sentiment_stretch'], color= (temporal_sentiment['sentiment'] > 0),
	color_discrete_map={True: 'green',False: 'red'})
	fig.update_layout(autosize=False,width=width100,height=height100)
	st.plotly_chart(fig)
	# # ner
	def nested_state(state):
	# st.session_state['state_ner'] = state
	st.session_state['nested_session'] = state

	def create_ner(text):
	# st.session_state['state_ner'] = True
	st.header(f"Named Entity Recognition >>")
	st.subheader(f"Top Entities .. ")
	doc=nlp(text)

	ent = [X.label_ for X in doc.ents]
	counter=Counter(ent)
	count=counter.most_common()
	x,y=map(list,zip(*count))
	fig = px.bar(x=y,y=x, color=y)
	fig.update_layout( yaxis=dict(autorange='reversed'))
	fig.update_layout(autosize=False,width=width100,height=height100)
	st.plotly_chart(fig)

	st.subheader(f"What Are Those Entities .. ")
	ent_type= st.selectbox("Select Named Entity :", x, on_change=nested_state(True))
	ent_single = [X.text for X in doc.ents if X.label_ == ent_type]
	ent_single=[x for x in ent_single]
	counter=Counter(ent_single)
	count=counter.most_common()
	x,y=map(list,zip(*count))
	fig = px.bar(x=y,y=x, color=y)
	fig.update_layout( yaxis=dict(autorange='reversed'))
	fig.update_layout(autosize=False,width=width100,height=height100)
	st.plotly_chart(fig)

	if st.button("Render NER"):
	st.markdown(displacy.render(doc, style='ent'), unsafe_allow_html=True)


	# # pos tags
	def create_pos(text):
	st.session_state['state_ner'] = True
	st.header(f"Part of Speech >>")
	st.subheader(f"Top POS ..")
	# st.markdown(displacy.render(doc, style='dep'), unsafe_allow_html=True)
	pos = nltk.pos_tag(tokenized_word)
	pos=list(map(list,zip(*pos)))[1]
	pos = [x for x in pos]
	counter=Counter(pos)
	count=counter.most_common()
	x,y=map(list,zip(*count))
	fig = px.bar(x=y,y=x, color=y)
	fig.update_layout( yaxis=dict(autorange='reversed'))
	fig.update_layout(autosize=False,width=width100,height=height100)
	st.plotly_chart(fig)

	st.subheader(f"What Are those POS .. ")
	pos_type= st.selectbox("Select POS :", x)
	pos_single = []
	pos = nltk.pos_tag(tokenized_word)
	for word,tag in pos:
	if tag==pos_type:
	pos_single.append(word)

	pos_single=[x for x in pos_single]
	counter=Counter(pos_single)
	count=counter.most_common()
	x,y=map(list,zip(*count))
	fig = px.bar(x=y,y=x, color=y)
	fig.update_layout( yaxis=dict(autorange='reversed'))
	fig.update_layout(autosize=False,width=width100,height=height100)
	st.plotly_chart(fig)

	# # Text Complexity
	def create_complexity(text, tokenized_sent):
	st.header(f"Text Complexity >>")
	st.caption(f"Higher scores indicate material that is easier to read,lower numbers mark harder-to-read passages:\
	– 0-30 College\
	– 50-60 High school\
	– 60+ Fourth grade")
	st.subheader(f"Flesch Reading Ease score is = {flesch_reading_ease(text)}")

	# Temporal sentiment
	st.subheader(f"Temporal Complexity")

	temporal_complexity= pd.DataFrame(columns =['sentence', 'complexity', 'len_sent'])
	for sent in tokenized_sent:
	complexity = flesch_reading_ease(sent)
	temporal_complexity = temporal_complexity.append({'sentence' : sent,
	'complexity' :complexity,'len_sent' : len(sent.split())}, ignore_index=True)
	temporal_complexity['complexity_stretch'] = (temporal_complexity['complexity'] * temporal_complexity['len_sent']).astype(float)

	fig = px.bar(temporal_complexity, x=temporal_complexity.index , y='complexity',
	hover_data=['sentence','complexity','complexity_stretch'], color= (temporal_complexity['complexity'] > 30),
	color_discrete_map={True: 'green',False: 'red'})
	fig.update_layout(autosize=False,width=width100,height=height100)
	st.plotly_chart(fig)

	if __name__ == '__main__':
	m = st.markdown("""<style>div.stButton > button:first-child
	{background-color: #dbe6c4;}
	</style>""", unsafe_allow_html=True)

	st.title("Text Disection : Analyze your text")
	st.sidebar.header("Adjust Plot Dimensions")
	width = st.sidebar.slider("Plot Width", 1, 25, 10)
	height = st.sidebar.slider("Plot Height", 1, 25, 7)


	# Input
	st.header(f"Your Text please..")

	text = get_input()
	tokenized_sent=sent_tokenize(text)
	tokenized_word=word_tokenize(text)
	st.markdown(f"###### Total Sentences in the text = {len(tokenized_sent)}")
	st.markdown(f"###### Total words in the text = {len(tokenized_word)}")


	st.sidebar.title("Analysis Type")
	analysis = st.sidebar.radio("Select Analysis",
	options = ['Wordcloud', 'N-Gram Analysis', 'Sentiment Analysis', 'Named Entity Recognition Analysis',
	'Part Of Speech Analysis', 'Text Complexity Analysis','Keep Calm!'], index=6)


	if st.button("Complete Analysis"):
	create_wordcloud(text)
	create_ngram(text)
	create_sentiment(text, tokenized_sent)
	create_ner(text)
	create_pos(text)
	create_complexity(text, tokenized_sent)
	analysis = 'Keep Calm!'

	if analysis == 'Wordcloud':
	create_wordcloud(text)
	if analysis == 'N-Gram Analysis':
	create_ngram(text)
	if analysis == 'Sentiment Analysis':
	create_sentiment(text, tokenized_sent)
	if analysis == 'Named Entity Recognition Analysis':
	create_ner(text)
	if analysis == 'Part Of Speech Analysis':
	create_pos(text)
	if analysis == 'Text Complexity Analysis':
	create_complexity(text, tokenized_sent)
	if analysis == 'Keep Calm!':
	st.image('nlp_meme.jpg')