import gradio as gr import pandas as pd import numpy as np import re from transformers import pipeline sentiment = pipeline( "sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english", tokenizer="distilbert-base-uncased-finetuned-sst-2-english", ) def clean_text(text): text = text.encode("ascii", errors="ignore").decode( "ascii" ) # remove non-ascii, Chinese characters text = text.lower() text = re.sub(r"\n", " ", text) text = re.sub(r"\n\n", " ", text) text = re.sub(r"\t", " ", text) text = text.strip(" ") text = re.sub(r"[^\w\s]", "", text) # remove punctuation and special characters text = re.sub( " +", " ", text ).strip() # get rid of multiple spaces and replace with a single return text # note that the sentiment-analysis pipeline returns 2 values - a label and a score def sentiment_analysis(text): input_text = ( pd.DataFrame(text.split(".")) .stack() .reset_index() .rename(columns={0: "Paras"}) .drop("level_0", axis=1) .drop("level_1", axis=1) .dropna() ) input_text["Clean_Text"] = input_text["Paras"].map(lambda text: clean_text(text)) corpus = list(input_text["Clean_Text"].values) input_text["Sentiment"] = sentiment(corpus) input_text["Sentiment_Label"] = [x.get("label") for x in input_text["Sentiment"]] input_text["Sentiment_Score"] = [x.get("score") for x in input_text["Sentiment"]] cols = ["Paras", "Sentiment_Label", "Sentiment_Score"] df = input_text[cols].copy() df = df[df["Paras"].str.strip().astype(bool)] df["Sentiment_Score"] = np.where( df["Sentiment_Label"] == "NEGATIVE", -(df["Sentiment_Score"]), df["Sentiment_Score"], ) df["Sentiment_Score"] = df["Sentiment_Score"].round(6) overall_sentiment_score = df["Sentiment_Score"].sum().round(3) sentiment_count = df["Sentiment_Label"].value_counts().to_string() return overall_sentiment_score, sentiment_count, df gradio_ui = gr.Interface( fn=sentiment_analysis, title="Analyse The Sentiment Structure Of A Speech", description="Upload a speech or parts of it for a detailed sentiment analysis", inputs=gr.inputs.Textbox(lines=30, label="Paste Text Here"), outputs=[ gr.outputs.Textbox(type="number", label="Overall Sentiment Score"), gr.outputs.Textbox( type="auto", label="How Many Positive & Negative Sentences?" ), gr.outputs.Dataframe( headers=["Paras", "Sentiment_Label", "Sentiment_Score"], max_rows=None, max_cols=3, overflow_row_behaviour="paginate", type="auto", label="Detailed Assessment By Sentence", ), ], ) gradio_ui.launch(enable_queue=True)