Spaces:

Kurkur99
/

Sentiment_analysis

Runtime error

App Files Files Community

Sentiment_analysis / eda.py

Kurkur99

Update eda.py

298c4f8 11 months ago

raw

history blame contribute delete

No virus

1.89 kB

	import streamlit as st
	import pandas as pd
	import matplotlib.pyplot as plt
	from wordcloud import WordCloud
	import re

	def label_sentiment(rating):
	"""Label sentiment based on the rating."""
	if rating in [1, 2]:
	return 'negative'
	elif rating == 3:
	return 'neutral'
	elif rating in [4, 5]:
	return 'positive'
	else:
	return 'unknown'

	def process_review(review):
	"""Simple processing for the review text."""
	review = review.lower()
	review = re.sub(r'[^a-z\s]', '', review) # Remove non-alphabetical characters
	return review

	def display_eda(data):
	# Derive the 'sentiment' column from 'rating' if it doesn't exist
	if 'sentiment' not in data.columns:
	if 'rating' not in data.columns:
	st.error("The dataset does not contain a 'rating' or 'sentiment' column. Please check the data source.")
	return
	else:
	data['sentiment'] = data['rating'].apply(label_sentiment)

	# Distribution of sentiments
	st.subheader("Distribution of Sentiments")
	sentiment_counts = data['sentiment'].value_counts()
	fig, ax = plt.subplots()
	sentiment_counts.plot(kind='bar', ax=ax)
	ax.set_title('Distribution of Sentiments')
	ax.set_xlabel('Sentiment')
	ax.set_ylabel('Count')
	st.pyplot(fig)

	# Word cloud for each sentiment
	st.subheader("Word Clouds for Sentiments")
	sentiments = data['sentiment'].unique()
	for sentiment in sentiments:
	st.write(f"Word Cloud for {sentiment}")
	subset = data[data['sentiment'] == sentiment]
	text = " ".join(process_review(review) for review in subset['review_description'])
	wordcloud = WordCloud(max_words=100, background_color="white").generate(text)
	fig = plt.figure()
	plt.imshow(wordcloud, interpolation="bilinear")
	plt.axis("off")
	st.pyplot(fig)