chinhon's picture
Update app.py
efe580c
import gradio as gr
import pandas as pd
import numpy as np
import re
from transformers import pipeline
sentiment = pipeline(
"sentiment-analysis",
model="distilbert-base-uncased-finetuned-sst-2-english",
tokenizer="distilbert-base-uncased-finetuned-sst-2-english",
)
def clean_text(text):
text = text.encode("ascii", errors="ignore").decode(
"ascii"
) # remove non-ascii, Chinese characters
text = text.lower()
text = re.sub(r"\n", " ", text)
text = re.sub(r"\n\n", " ", text)
text = re.sub(r"\t", " ", text)
text = text.strip(" ")
text = re.sub(r"[^\w\s]", "", text) # remove punctuation and special characters
text = re.sub(
" +", " ", text
).strip() # get rid of multiple spaces and replace with a single
return text
# note that the sentiment-analysis pipeline returns 2 values - a label and a score
def sentiment_analysis(text):
input_text = (
pd.DataFrame(text.split("."))
.stack()
.reset_index()
.rename(columns={0: "Paras"})
.drop("level_0", axis=1)
.drop("level_1", axis=1)
.dropna()
)
input_text["Clean_Text"] = input_text["Paras"].map(lambda text: clean_text(text))
corpus = list(input_text["Clean_Text"].values)
input_text["Sentiment"] = sentiment(corpus)
input_text["Sentiment_Label"] = [x.get("label") for x in input_text["Sentiment"]]
input_text["Sentiment_Score"] = [x.get("score") for x in input_text["Sentiment"]]
cols = ["Paras", "Sentiment_Label", "Sentiment_Score"]
df = input_text[cols].copy()
df = df[df["Paras"].str.strip().astype(bool)]
df["Sentiment_Score"] = np.where(
df["Sentiment_Label"] == "NEGATIVE",
-(df["Sentiment_Score"]),
df["Sentiment_Score"],
)
df["Sentiment_Score"] = df["Sentiment_Score"].round(6)
overall_sentiment_score = df["Sentiment_Score"].sum().round(3)
sentiment_count = df["Sentiment_Label"].value_counts().to_string()
return overall_sentiment_score, sentiment_count, df
gradio_ui = gr.Interface(
fn=sentiment_analysis,
title="Analyse The Sentiment Structure Of A Speech",
description="Upload a speech or parts of it for a detailed sentiment analysis",
inputs=gr.inputs.Textbox(lines=30, label="Paste Text Here"),
outputs=[
gr.outputs.Textbox(type="number", label="Overall Sentiment Score"),
gr.outputs.Textbox(
type="auto", label="How Many Positive & Negative Sentences?"
),
gr.outputs.Dataframe(
headers=["Paras", "Sentiment_Label", "Sentiment_Score"],
max_rows=None,
max_cols=3,
overflow_row_behaviour="paginate",
type="auto",
label="Detailed Assessment By Sentence",
),
],
)
gradio_ui.launch(enable_queue=True)