import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
from wordcloud import WordCloud
import re

def label_sentiment(rating):
    """Label sentiment based on the rating."""
    if rating in [1, 2]:
        return 'negative'
    elif rating == 3:
        return 'neutral'
    elif rating in [4, 5]:
        return 'positive'
    else:
        return 'unknown'

def process_review(review):
    """Simple processing for the review text."""
    review = review.lower()
    review = re.sub(r'[^a-z\s]', '', review)  # Remove non-alphabetical characters
    return review

def display_eda(data):
    # Derive the 'sentiment' column from 'rating' if it doesn't exist
    if 'sentiment' not in data.columns:
        if 'rating' not in data.columns:
            st.error("The dataset does not contain a 'rating' or 'sentiment' column. Please check the data source.")
            return
        else:
            data['sentiment'] = data['rating'].apply(label_sentiment)

    # Distribution of sentiments
    st.subheader("Distribution of Sentiments")
    sentiment_counts = data['sentiment'].value_counts()
    fig, ax = plt.subplots()
    sentiment_counts.plot(kind='bar', ax=ax)
    ax.set_title('Distribution of Sentiments')
    ax.set_xlabel('Sentiment')
    ax.set_ylabel('Count')
    st.pyplot(fig)

    # Word cloud for each sentiment
    st.subheader("Word Clouds for Sentiments")
    sentiments = data['sentiment'].unique()
    for sentiment in sentiments:
        st.write(f"Word Cloud for {sentiment}")
        subset = data[data['sentiment'] == sentiment]
        text = " ".join(process_review(review) for review in subset['review_description'])
        wordcloud = WordCloud(max_words=100, background_color="white").generate(text)
        fig = plt.figure()
        plt.imshow(wordcloud, interpolation="bilinear")
        plt.axis("off")
        st.pyplot(fig)