import streamlit as st
import pandas as pd
from IPython.display import Audio
import torch
from transformers import pipeline
from datasets import load_dataset
import soundfile as sf
from num2words import num2words
import plotly.express as px

#import dataset
def preprocess_dataset(file_path, num_samples_each_class):
    dataset = pd.read_excel(file_path)
    dataset = dataset[dataset['airline_sentiment_confidence'] > 0.5]
    dataset = dataset[['text', 'airline_sentiment']]
    num_samples_each_class = num_samples_each_class
    # Randomly select equal number of samples for each class
    dataset = dataset.groupby('airline_sentiment').apply(lambda x: x.sample(n=num_samples_each_class)).reset_index(drop=True)
    text = dataset['text']
    sentiments = dataset['airline_sentiment']
    return text, sentiments

def txt2sentiment(text):
    pipe = pipeline("text-classification", model="Kayyyy27/fine-tuned-United_Airlines_Twitter_Sentiment_Analysis")
    result = pipe(text)
    predicted_label = result[0]['label']
    # Define label mapping dictionary
    label_mapping = {"LABEL_2": "positive",  "LABEL_1": "neutral",  "LABEL_0": "negative"}
    # Convert output labels
    predicted_label = label_mapping[predicted_label]
    predicted_confidence = "{:.2f}".format(result[0]['score'] * 100)
    return text, predicted_label, predicted_confidence

def sentiment2audio(userinput, sentiment, confidence):
    pipe3 = pipeline("text-to-speech", "microsoft/speecht5_tts")
    embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
    speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
    text = f"{userinput}. The tweet is {sentiment}, with {confidence} percent confidence"
    speech = pipe3(text, forward_params={"speaker_embeddings": speaker_embedding})
    audio = Audio(speech['audio'], rate=speech['sampling_rate'])
    return audio

def audiosummary(sentiment_counts):
    positive_sentiment_count = sentiment_counts.get('positive', 0)
    positive_sentiment_count = num2words(positive_sentiment_count, lang='en')

    negative_sentiment_count = sentiment_counts.get('negative', 0)
    negative_sentiment_count = num2words(negative_sentiment_count, lang='en')

    neutral_sentiment_count = sentiment_counts.get('neutral', 0)
    neutral_sentiment_count = num2words(neutral_sentiment_count, lang='en')

    text = f"There are {positive_sentiment_count} positive tweets, {negative_sentiment_count} negative tweets, and {neutral_sentiment_count} neutral tweets"
    pipe3 = pipeline("text-to-speech", "microsoft/speecht5_tts")
    embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
    speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
    speech = pipe3(text, forward_params={"speaker_embeddings": speaker_embedding})
    audio = Audio(speech['audio'], rate=speech['sampling_rate'])
    return audio


def process_tweet(tweet):
    # Stage 1: Text to sentiment
    text, predicted_sentiment, predicted_confidence = txt2sentiment(tweet)

    # Stage 2: Sentiments to audio
    audio_output = sentiment2audio(text, predicted_sentiment, num2words(predicted_confidence, lang='en'))

    # Create a dictionary with the tweet information
    tweet_info = {
        'Input': text,
        'Sentiment': predicted_sentiment,
        'Confidence': f'{predicted_confidence}%',
        'Audio': audio_output
    }


    # Return the tweet information
    return tweet_info

def main():

    st.header(":rainbow[United Airlines Twitter Sentiment Analysis]",divider='rainbow')
    st.write(":blue[Enter tweets to analyze its sentiment(seperated by @united) or upload a file:]")

    # User input options
    option = st.selectbox("Input type", ["Text Input", "File Upload"])

    tweets = []

    if option == "Text Input":
        user_input = st.text_input("Enter tweets (separated by @united)")

        if user_input:
            # Split the user input into multiple tweets
            user_tweets = [tweet.strip() for tweet in user_input.split("@united")[1:]]

            for tweet in user_tweets:
                tweet_info = process_tweet(tweet)
                tweets.append(tweet_info)

            
    elif option == "File Upload":
        uploaded_file = st.file_uploader("Upload csv file", type="csv")

        if uploaded_file is not None:
            # Read the uploaded excel file into a pandas DataFrame
            df = pd.read_csv(uploaded_file)
            df["text"] = df["text"].str.replace("@united", "")

            # Process each tweet in the DataFrame
            for tweet in df["text"]:
                tweet_info = process_tweet(tweet)
                tweets.append(tweet_info)

    st.write(":red[Only the first 5 audio outputs will be displayed]")
    # Create a DataFrame from the list of tweet information
    df_tweets = pd.DataFrame(tweets)

    # Display the DataFrame as a table
    if df_tweets is not None and not df_tweets.empty:

        # Create a copy of the DataFrame to add audio buttons
        df_with_buttons = df_tweets.head(5).copy()

        # Iterate over the DataFrame rows
        for index, row in df_with_buttons.iterrows():
            audio = row['Audio']
            st.audio(audio.data, format="audio/wav", start_time=0)

        df_tweets = df_tweets[["Input","Sentiment", "Confidence"]]

        # # Create an expander to show the data output
        # with st.expander("View All Outputs"):
        #     st.table(df_tweets)
        st.table(df_tweets)

        # Add a summary section with a pie chart of sentiments
        st.subheader(":blue[Summary]")

        # Calculate sentiment counts
        sentiment_counts = df_tweets['Sentiment'].value_counts()

        #create an audio that read the summary output
        outputsummary = audiosummary(sentiment_counts)
        st.audio(outputsummary.data, format="audio/wav", start_time=0)

        # Create a pie chart using Plotly Express
        fig = px.pie(sentiment_counts, values=sentiment_counts.values, names=sentiment_counts.index)
        st.plotly_chart(fig)


if __name__ == "__main__":
    main()