Spaces:

Kayyyy27
/

UnitedAirlinesTwitterSentimentAnalysis

Runtime error

App Files Files Community

UnitedAirlinesTwitterSentimentAnalysis / app.py

Kayyyy27

Upload app.py

33e5a68 verified 4 months ago

raw

history blame contribute delete

No virus

6.16 kB


	import streamlit as st
	import pandas as pd
	from IPython.display import Audio
	import torch
	from transformers import pipeline
	from datasets import load_dataset
	import soundfile as sf
	from num2words import num2words
	import plotly.express as px

	#import dataset
	def preprocess_dataset(file_path, num_samples_each_class):
	dataset = pd.read_excel(file_path)
	dataset = dataset[dataset['airline_sentiment_confidence'] > 0.5]
	dataset = dataset[['text', 'airline_sentiment']]
	num_samples_each_class = num_samples_each_class
	# Randomly select equal number of samples for each class
	dataset = dataset.groupby('airline_sentiment').apply(lambda x: x.sample(n=num_samples_each_class)).reset_index(drop=True)
	text = dataset['text']
	sentiments = dataset['airline_sentiment']
	return text, sentiments

	def txt2sentiment(text):
	pipe = pipeline("text-classification", model="Kayyyy27/fine-tuned-United_Airlines_Twitter_Sentiment_Analysis")
	result = pipe(text)
	predicted_label = result[0]['label']
	# Define label mapping dictionary
	label_mapping = {"LABEL_2": "positive", "LABEL_1": "neutral", "LABEL_0": "negative"}
	# Convert output labels
	predicted_label = label_mapping[predicted_label]
	predicted_confidence = "{:.2f}".format(result[0]['score'] * 100)
	return text, predicted_label, predicted_confidence

	def sentiment2audio(userinput, sentiment, confidence):
	pipe3 = pipeline("text-to-speech", "microsoft/speecht5_tts")
	embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
	speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
	text = f"{userinput}. The tweet is {sentiment}, with {confidence} percent confidence"
	speech = pipe3(text, forward_params={"speaker_embeddings": speaker_embedding})
	audio = Audio(speech['audio'], rate=speech['sampling_rate'])
	return audio

	def audiosummary(sentiment_counts):
	positive_sentiment_count = sentiment_counts.get('positive', 0)
	positive_sentiment_count = num2words(positive_sentiment_count, lang='en')

	negative_sentiment_count = sentiment_counts.get('negative', 0)
	negative_sentiment_count = num2words(negative_sentiment_count, lang='en')

	neutral_sentiment_count = sentiment_counts.get('neutral', 0)
	neutral_sentiment_count = num2words(neutral_sentiment_count, lang='en')

	text = f"There are {positive_sentiment_count} positive tweets, {negative_sentiment_count} negative tweets, and {neutral_sentiment_count} neutral tweets"
	pipe3 = pipeline("text-to-speech", "microsoft/speecht5_tts")
	embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
	speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
	speech = pipe3(text, forward_params={"speaker_embeddings": speaker_embedding})
	audio = Audio(speech['audio'], rate=speech['sampling_rate'])
	return audio


	def process_tweet(tweet):
	# Stage 1: Text to sentiment
	text, predicted_sentiment, predicted_confidence = txt2sentiment(tweet)

	# Stage 2: Sentiments to audio
	audio_output = sentiment2audio(text, predicted_sentiment, num2words(predicted_confidence, lang='en'))

	# Create a dictionary with the tweet information
	tweet_info = {
	'Input': text,
	'Sentiment': predicted_sentiment,
	'Confidence': f'{predicted_confidence}%',
	'Audio': audio_output
	}


	# Return the tweet information
	return tweet_info

	def main():

	st.header(":rainbow[United Airlines Twitter Sentiment Analysis]",divider='rainbow')
	st.write(":blue[Enter tweets to analyze its sentiment(seperated by @united) or upload a file:]")

	# User input options
	option = st.selectbox("Input type", ["Text Input", "File Upload"])

	tweets = []

	if option == "Text Input":
	user_input = st.text_input("Enter tweets (separated by @united)")

	if user_input:
	# Split the user input into multiple tweets
	user_tweets = [tweet.strip() for tweet in user_input.split("@united")[1:]]

	for tweet in user_tweets:
	tweet_info = process_tweet(tweet)
	tweets.append(tweet_info)


	elif option == "File Upload":
	uploaded_file = st.file_uploader("Upload csv file", type="csv")

	if uploaded_file is not None:
	# Read the uploaded excel file into a pandas DataFrame
	df = pd.read_csv(uploaded_file)
	df["text"] = df["text"].str.replace("@united", "")

	# Process each tweet in the DataFrame
	for tweet in df["text"]:
	tweet_info = process_tweet(tweet)
	tweets.append(tweet_info)

	st.write(":red[Only the first 5 audio outputs will be displayed]")
	# Create a DataFrame from the list of tweet information
	df_tweets = pd.DataFrame(tweets)

	# Display the DataFrame as a table
	if df_tweets is not None and not df_tweets.empty:

	# Create a copy of the DataFrame to add audio buttons
	df_with_buttons = df_tweets.head(5).copy()

	# Iterate over the DataFrame rows
	for index, row in df_with_buttons.iterrows():
	audio = row['Audio']
	st.audio(audio.data, format="audio/wav", start_time=0)

	df_tweets = df_tweets[["Input","Sentiment", "Confidence"]]

	# # Create an expander to show the data output
	# with st.expander("View All Outputs"):
	# st.table(df_tweets)
	st.table(df_tweets)

	# Add a summary section with a pie chart of sentiments
	st.subheader(":blue[Summary]")

	# Calculate sentiment counts
	sentiment_counts = df_tweets['Sentiment'].value_counts()

	#create an audio that read the summary output
	outputsummary = audiosummary(sentiment_counts)
	st.audio(outputsummary.data, format="audio/wav", start_time=0)

	# Create a pie chart using Plotly Express
	fig = px.pie(sentiment_counts, values=sentiment_counts.values, names=sentiment_counts.index)
	st.plotly_chart(fig)



	if __name__ == "__main__":
	main()