|
|
|
"""FA20-BCS-OO1 final app.ipynb |
|
|
|
Automatically generated by Colab |
|
""" |
|
|
|
|
|
|
|
|
|
|
|
import joblib, pickle, pandas as pd, numpy as np |
|
import gradio as gr |
|
from TweetNormalizer import normalizeTweet |
|
import seaborn as sns |
|
import matplotlib.pyplot as plt |
|
|
|
from transformers import pipeline |
|
|
|
|
|
|
|
pipe= pipeline(model="seek007/taskA-DeBERTa-large-1.0.0",tokenizer='seek007/taskA-DeBERTa-large-1.0.0') |
|
|
|
|
|
|
|
|
|
|
|
import numpy as np |
|
|
|
def predict(text=None , fil=None): |
|
|
|
preprocessed_text = normalizeTweet(text) |
|
sentiment =None |
|
df=None |
|
fig=None |
|
if fil: |
|
if fil.name.endswith('.csv'): |
|
df = pd.read_csv(fil.name) |
|
elif fil.name.endswith('.xlsx') or fil.name.endswith('.xls'): |
|
df = pd.read_excel(fil.name) |
|
else: |
|
raise ValueError("Unsupported file type. Please upload a CSV or Excel file.") |
|
|
|
|
|
lst = list(df.tweet) |
|
m =[normalizeTweet(i) for i in lst] |
|
|
|
d = pd.DataFrame(pipe.predict(m)) |
|
df['label'] = d['label'] |
|
|
|
df.drop('sarcastic', axis=1, inplace=True) |
|
|
|
|
|
mapping = { |
|
'LABEL_0': 'non_sarcastic', |
|
'LABEL_1': 'sarcastic' |
|
} |
|
|
|
|
|
sarcastic_count = np.sum(df.label =='sarcastic') |
|
non_sarcastic_count = np.sum(df.label =='non_sarcastic') |
|
|
|
labels = ['Sarcastic', 'Non-Sarcastic'] |
|
sizes = [sarcastic_count, non_sarcastic_count] |
|
colors = ['gold', 'lightblue'] |
|
explode = (0.1, 0) |
|
sns.set_style("whitegrid") |
|
fig, ax = plt.subplots() |
|
ax.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%', shadow=True, startangle=140) |
|
ax.axis('equal') |
|
|
|
plt.title('Sarcastic vs Non-Sarcastic Tweets') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if text !="" or fil !=None: |
|
prediction = pipe.predict([preprocessed_text])[0] |
|
print(prediction) |
|
|
|
|
|
|
|
sentiment = "Sarcastic" if (prediction['label'] == 'LABEL_1' or prediction['label'] =='sarcastic') else "Non Sarcastic" |
|
if fil == None: |
|
df= pd.DataFrame([{'tweet':text, 'label':sentiment}]) |
|
else: |
|
return "Either enter text or upload .csv or .xlsx file.!" , df, fig |
|
|
|
return sentiment, df, fig |
|
|
|
|
|
|
|
|
|
|
|
file_path =gr.File(label="Upload a File") |
|
output = gr.Label(num_top_classes=2, label="Predicted Labels") |
|
demo = gr.Interface(fn=predict, inputs=[gr.Text(label="Input"),file_path], outputs=[output, gr.DataFrame(headers =['Tweets', 'Labels'], wrap=True), gr.Plot(label="Sarcasm Predictor")], title="Sarcasm Predictor") |
|
|
|
|
|
|
|
file_path =gr.File(label="Upload a File") |
|
label = gr.Label(num_top_classes=3, label="Top 3 Labels") |
|
classification = gr.Interface(classify, inputs=[gr.Text(label="Input"),file_path], outputs= [label, gr.DataFrame(headers =['Tweets', 'Label', "Score"], wrap=True), gr.Plot(label="Sarcasm classifier")], title="Sarcasm Classifier") |
|
|
|
|
|
|
|
from transformers import pipeline |
|
pipe2 = pipeline(model="seek007/taskB-bertweet-base-trainer-1.0.0", tokenizer="seek007/taskB-bertweet-base-trainer-1.0.0") |
|
|
|
def classifyB(text=None , fil=None): |
|
|
|
preprocessed_text = normalizeTweet(text) |
|
sentiment =None |
|
df=None |
|
fig=None |
|
labels = ['sarcasm', 'irony','Staire', 'understatement','overstatement', 'rhetorical question'] |
|
if fil: |
|
if fil.name.endswith('.csv'): |
|
df = pd.read_csv(fil.name) |
|
elif fil.name.endswith('.xlsx') or fil.name.endswith('.xls'): |
|
df = pd.read_excel(fil.name) |
|
else: |
|
raise ValueError("Unsupported file type. Please upload a CSV or Excel file.") |
|
|
|
lst = list(df.tweet) |
|
m =[normalizeTweet(i) for i in lst] |
|
|
|
d = pipe2(m) |
|
|
|
structured_data = [] |
|
|
|
|
|
for item in d: |
|
labels = item['label'] |
|
scores = item['score'] |
|
structured_data.append({ "label": labels, "score": scores}) |
|
|
|
|
|
df1 = pd.DataFrame(structured_data) |
|
df = pd.concat([df, df1], axis=1) |
|
|
|
|
|
|
|
|
|
|
|
|
|
fig = plt.figure() |
|
sns.countplot(x='label', data=df, palette='viridis') |
|
plt.title('Result: Count Plot') |
|
plt.xlabel('label') |
|
plt.ylabel('Count') |
|
|
|
if text !=None or fil !=None: |
|
prediction = pipe2([preprocessed_text])[0] |
|
print(prediction["label"]) |
|
labels = prediction['label'] |
|
scores = prediction['score'] |
|
|
|
|
|
|
|
|
|
|
|
|
|
sentiment = labels |
|
|
|
|
|
return sentiment, df, fig |
|
|
|
file_path =gr.File(label="Upload a File") |
|
label = gr.Label( label="Labels") |
|
classificationB = gr.Interface(classifyB, inputs=[gr.Text(label="Input"),file_path], outputs= [label, gr.DataFrame(headers =['Tweets', 'Label', "Score"], wrap=True), gr.Plot(label="Sarcasm classifier")], title="Sarcasm Classifier",theme= 'dark') |
|
|
|
main = gr.TabbedInterface([demo, classificationB],['Analysizer', 'Classifier'], title="Sarcasm Predictor: An Optimized Sentiment Analysis system" ) |
|
|
|
main.launch(share=True) |