import streamlit as st import pandas as pd import matplotlib.pyplot as plt from wordcloud import WordCloud import re def label_sentiment(rating): """Label sentiment based on the rating.""" if rating in [1, 2]: return 'negative' elif rating == 3: return 'neutral' elif rating in [4, 5]: return 'positive' else: return 'unknown' def process_review(review): """Simple processing for the review text.""" review = review.lower() review = re.sub(r'[^a-z\s]', '', review) # Remove non-alphabetical characters return review def display_eda(data): # Derive the 'sentiment' column from 'rating' if it doesn't exist if 'sentiment' not in data.columns: if 'rating' not in data.columns: st.error("The dataset does not contain a 'rating' or 'sentiment' column. Please check the data source.") return else: data['sentiment'] = data['rating'].apply(label_sentiment) # Distribution of sentiments st.subheader("Distribution of Sentiments") sentiment_counts = data['sentiment'].value_counts() fig, ax = plt.subplots() sentiment_counts.plot(kind='bar', ax=ax) ax.set_title('Distribution of Sentiments') ax.set_xlabel('Sentiment') ax.set_ylabel('Count') st.pyplot(fig) # Word cloud for each sentiment st.subheader("Word Clouds for Sentiments") sentiments = data['sentiment'].unique() for sentiment in sentiments: st.write(f"Word Cloud for {sentiment}") subset = data[data['sentiment'] == sentiment] text = " ".join(process_review(review) for review in subset['review_description']) wordcloud = WordCloud(max_words=100, background_color="white").generate(text) fig = plt.figure() plt.imshow(wordcloud, interpolation="bilinear") plt.axis("off") st.pyplot(fig)