File size: 6,158 Bytes
0fa70df 3593cb1 0fa70df 75c9e82 0fa70df cd0e2ba 0fa70df cd0e2ba 0fa70df 357d56c 96dbda2 082be25 96dbda2 082be25 96dbda2 082be25 6c1dca8 631bf65 9227144 1916040 75c9e82 9227144 1916040 75c9e82 9227144 1916040 75c9e82 1916040 cfad33c 1916040 666eadf 9227144 0fa70df 9227144 3593cb1 9227144 1916040 9227144 ec7a369 9227144 8625123 9227144 904173e 666eadf 1916040 e54bd75 9227144 e54bd75 33e5a68 e54bd75 37d43c2 666eadf c8ca882 1916040 631bf65 1916040 904173e 631bf65 ec7a369 631bf65 ec7a369 f1aefa1 6c515f5 51d1ed9 c8ca882 ec7a369 6c515f5 28c6e6d 3593cb1 95e55e7 3593cb1 6d42812 3593cb1 357d56c 99b8a87 631bf65 5797c96 3593cb1 51d1ed9 0fa70df |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
import streamlit as st
import pandas as pd
from IPython.display import Audio
import torch
from transformers import pipeline
from datasets import load_dataset
import soundfile as sf
from num2words import num2words
import plotly.express as px
#import dataset
def preprocess_dataset(file_path, num_samples_each_class):
dataset = pd.read_excel(file_path)
dataset = dataset[dataset['airline_sentiment_confidence'] > 0.5]
dataset = dataset[['text', 'airline_sentiment']]
num_samples_each_class = num_samples_each_class
# Randomly select equal number of samples for each class
dataset = dataset.groupby('airline_sentiment').apply(lambda x: x.sample(n=num_samples_each_class)).reset_index(drop=True)
text = dataset['text']
sentiments = dataset['airline_sentiment']
return text, sentiments
def txt2sentiment(text):
pipe = pipeline("text-classification", model="Kayyyy27/fine-tuned-United_Airlines_Twitter_Sentiment_Analysis")
result = pipe(text)
predicted_label = result[0]['label']
# Define label mapping dictionary
label_mapping = {"LABEL_2": "positive", "LABEL_1": "neutral", "LABEL_0": "negative"}
# Convert output labels
predicted_label = label_mapping[predicted_label]
predicted_confidence = "{:.2f}".format(result[0]['score'] * 100)
return text, predicted_label, predicted_confidence
def sentiment2audio(userinput, sentiment, confidence):
pipe3 = pipeline("text-to-speech", "microsoft/speecht5_tts")
embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
text = f"{userinput}. The tweet is {sentiment}, with {confidence} percent confidence"
speech = pipe3(text, forward_params={"speaker_embeddings": speaker_embedding})
audio = Audio(speech['audio'], rate=speech['sampling_rate'])
return audio
def audiosummary(sentiment_counts):
positive_sentiment_count = sentiment_counts.get('positive', 0)
positive_sentiment_count = num2words(positive_sentiment_count, lang='en')
negative_sentiment_count = sentiment_counts.get('negative', 0)
negative_sentiment_count = num2words(negative_sentiment_count, lang='en')
neutral_sentiment_count = sentiment_counts.get('neutral', 0)
neutral_sentiment_count = num2words(neutral_sentiment_count, lang='en')
text = f"There are {positive_sentiment_count} positive tweets, {negative_sentiment_count} negative tweets, and {neutral_sentiment_count} neutral tweets"
pipe3 = pipeline("text-to-speech", "microsoft/speecht5_tts")
embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
speech = pipe3(text, forward_params={"speaker_embeddings": speaker_embedding})
audio = Audio(speech['audio'], rate=speech['sampling_rate'])
return audio
def process_tweet(tweet):
# Stage 1: Text to sentiment
text, predicted_sentiment, predicted_confidence = txt2sentiment(tweet)
# Stage 2: Sentiments to audio
audio_output = sentiment2audio(text, predicted_sentiment, num2words(predicted_confidence, lang='en'))
# Create a dictionary with the tweet information
tweet_info = {
'Input': text,
'Sentiment': predicted_sentiment,
'Confidence': f'{predicted_confidence}%',
'Audio': audio_output
}
# Return the tweet information
return tweet_info
def main():
st.header(":rainbow[United Airlines Twitter Sentiment Analysis]",divider='rainbow')
st.write(":blue[Enter tweets to analyze its sentiment(seperated by @united) or upload a file:]")
# User input options
option = st.selectbox("Input type", ["Text Input", "File Upload"])
tweets = []
if option == "Text Input":
user_input = st.text_input("Enter tweets (separated by @united)")
if user_input:
# Split the user input into multiple tweets
user_tweets = [tweet.strip() for tweet in user_input.split("@united")[1:]]
for tweet in user_tweets:
tweet_info = process_tweet(tweet)
tweets.append(tweet_info)
elif option == "File Upload":
uploaded_file = st.file_uploader("Upload csv file", type="csv")
if uploaded_file is not None:
# Read the uploaded excel file into a pandas DataFrame
df = pd.read_csv(uploaded_file)
df["text"] = df["text"].str.replace("@united", "")
# Process each tweet in the DataFrame
for tweet in df["text"]:
tweet_info = process_tweet(tweet)
tweets.append(tweet_info)
st.write(":red[Only the first 5 audio outputs will be displayed]")
# Create a DataFrame from the list of tweet information
df_tweets = pd.DataFrame(tweets)
# Display the DataFrame as a table
if df_tweets is not None and not df_tweets.empty:
# Create a copy of the DataFrame to add audio buttons
df_with_buttons = df_tweets.head(5).copy()
# Iterate over the DataFrame rows
for index, row in df_with_buttons.iterrows():
audio = row['Audio']
st.audio(audio.data, format="audio/wav", start_time=0)
df_tweets = df_tweets[["Input","Sentiment", "Confidence"]]
# # Create an expander to show the data output
# with st.expander("View All Outputs"):
# st.table(df_tweets)
st.table(df_tweets)
# Add a summary section with a pie chart of sentiments
st.subheader(":blue[Summary]")
# Calculate sentiment counts
sentiment_counts = df_tweets['Sentiment'].value_counts()
#create an audio that read the summary output
outputsummary = audiosummary(sentiment_counts)
st.audio(outputsummary.data, format="audio/wav", start_time=0)
# Create a pie chart using Plotly Express
fig = px.pie(sentiment_counts, values=sentiment_counts.values, names=sentiment_counts.index)
st.plotly_chart(fig)
if __name__ == "__main__":
main() |