Spaces:

seek007
/

external

Sleeping

App Files Files Community

external / app.py

seek007

Create app.py

8432f36 verified 5 months ago

raw

history blame

6.22 kB

	# -- coding: utf-8 --
	"""FA20-BCS-OO1 final app.ipynb

	Automatically generated by Colab
	"""

	# !pip install emoji gradio



	import joblib, pickle, pandas as pd, numpy as np
	import gradio as gr
	from TweetNormalizer import normalizeTweet
	import seaborn as sns
	import matplotlib.pyplot as plt

	from transformers import pipeline
	# seek007/taskA-DeBERTa-bweet-1.2.5
	# seek007/taskA-DeBERTa-large-1.0.0
	# seek007/taskA-DeBERTa-bweet-1.1.0
	pipe= pipeline(model="seek007/taskA-DeBERTa-large-1.0.0",tokenizer='seek007/taskA-DeBERTa-large-1.0.0')

	# pipe = joblib.load('/content/drive/MyDrive/FYPpkl models/pipeA-wTok-0.0.1.pkl')



	import numpy as np

	def predict(text=None , fil=None):
	# Preprocess the text
	preprocessed_text = normalizeTweet(text)
	sentiment =None
	df=None
	fig=None
	if fil:
	if fil.name.endswith('.csv'):
	df = pd.read_csv(fil.name)
	elif fil.name.endswith('.xlsx') or fil.name.endswith('.xls'):
	df = pd.read_excel(fil.name)
	else:
	raise ValueError("Unsupported file type. Please upload a CSV or Excel file.")

	# df= df.sample(20)
	lst = list(df.tweet)
	m =[normalizeTweet(i) for i in lst]
	# m = [truncate_string(i) for i in m]
	d = pd.DataFrame(pipe.predict(m))
	df['label'] = d['label']
	# print(df.sample(5))
	df.drop('sarcastic', axis=1, inplace=True)
	# print(df.sample(5))

	mapping = {
	'LABEL_0': 'non_sarcastic',
	'LABEL_1': 'sarcastic'
	}

	# df['label']=df['label'].map(mapping)
	sarcastic_count = np.sum(df.label =='sarcastic')
	non_sarcastic_count = np.sum(df.label =='non_sarcastic')

	labels = ['Sarcastic', 'Non-Sarcastic']
	sizes = [sarcastic_count, non_sarcastic_count]
	colors = ['gold', 'lightblue']
	explode = (0.1, 0) # explode 1st slice
	sns.set_style("whitegrid")
	fig, ax = plt.subplots()
	ax.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%', shadow=True, startangle=140) #, colors=colors
	ax.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.

	plt.title('Sarcastic vs Non-Sarcastic Tweets')

	# fig = plt.figure() #figsize=(8, 6)
	# sns.countplot(x='label', data=df, palette='viridis')
	# plt.title('Result: Count Plot') # Add a title to the plot
	# plt.xlabel('label') # Add label for the x-axis
	# plt.ylabel('Count')
	# Perform sentiment prediction
	if text !="" or fil !=None:
	prediction = pipe.predict([preprocessed_text])[0]
	print(prediction)
	# sentiment = {p['label']: p['score'] for p in prediction}
	# sentiment['']
	# print(sentiment)
	sentiment = "Sarcastic" if (prediction['label'] == 'LABEL_1' or prediction['label'] =='sarcastic') else "Non Sarcastic"
	if fil == None:
	df= pd.DataFrame([{'tweet':text, 'label':sentiment}])
	else:
	return "Either enter text or upload .csv or .xlsx file.!" , df, fig

	return sentiment, df, fig





	file_path =gr.File(label="Upload a File")
	output = gr.Label(num_top_classes=2, label="Predicted Labels")
	demo = gr.Interface(fn=predict, inputs=[gr.Text(label="Input"),file_path], outputs=[output, gr.DataFrame(headers =['Tweets', 'Labels'], wrap=True), gr.Plot(label="Sarcasm Predictor")], title="Sarcasm Predictor")

	# demo.launch(debug=True)

	file_path =gr.File(label="Upload a File")
	label = gr.Label(num_top_classes=3, label="Top 3 Labels")
	classification = gr.Interface(classify, inputs=[gr.Text(label="Input"),file_path], outputs= [label, gr.DataFrame(headers =['Tweets', 'Label', "Score"], wrap=True), gr.Plot(label="Sarcasm classifier")], title="Sarcasm Classifier")

	# classification.launch(debug=True)

	from transformers import pipeline
	pipe2 = pipeline(model="seek007/taskB-bertweet-base-trainer-1.0.0", tokenizer="seek007/taskB-bertweet-base-trainer-1.0.0")

	def classifyB(text=None , fil=None):
	# Preprocess the text
	preprocessed_text = normalizeTweet(text)
	sentiment =None
	df=None
	fig=None
	labels = ['sarcasm', 'irony','Staire', 'understatement','overstatement', 'rhetorical question']
	if fil:
	if fil.name.endswith('.csv'):
	df = pd.read_csv(fil.name)
	elif fil.name.endswith('.xlsx') or fil.name.endswith('.xls'):
	df = pd.read_excel(fil.name)
	else:
	raise ValueError("Unsupported file type. Please upload a CSV or Excel file.")

	lst = list(df.tweet)
	m =[normalizeTweet(i) for i in lst]
	# m = [truncate_string(i) for i in m]
	d = pipe2(m)

	structured_data = []

	# Iterate over the list of dictionaries and convert each to a structured dictionary
	for item in d:
	labels = item['label']
	scores = item['score']
	structured_data.append({ "label": labels, "score": scores})

	# Convert the list of dictionaries to a DataFrame
	df1 = pd.DataFrame(structured_data)
	df = pd.concat([df, df1], axis=1)

	# df["labels"] = d['labels']
	# print("df: ",df.head())
	# return df.head()


	fig = plt.figure() #figsize=(8, 6)
	sns.countplot(x='label', data=df, palette='viridis')
	plt.title('Result: Count Plot') # Add a title to the plot
	plt.xlabel('label') # Add label for the x-axis
	plt.ylabel('Count')
	# Perform sentiment prediction
	if text !=None or fil !=None:
	prediction = pipe2([preprocessed_text])[0]
	print(prediction["label"])
	labels = prediction['label']
	scores = prediction['score']

	# Combine labels and scores, and sort by score in descending order


	# Extract top 3 labels and their scores

	sentiment = labels


	return sentiment, df, fig

	file_path =gr.File(label="Upload a File")
	label = gr.Label( label="Labels")
	classificationB = gr.Interface(classifyB, inputs=[gr.Text(label="Input"),file_path], outputs= [label, gr.DataFrame(headers =['Tweets', 'Label', "Score"], wrap=True), gr.Plot(label="Sarcasm classifier")], title="Sarcasm Classifier",theme= 'dark')

	main = gr.TabbedInterface([demo, classificationB],['Analysizer', 'Classifier'], title="Sarcasm Predictor: An Optimized Sentiment Analysis system" )

	main.launch(share=True)