File size: 2,852 Bytes
47419e1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
efe580c
47419e1
 
efe580c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import gradio as gr
import pandas as pd
import numpy as np
import re

from transformers import pipeline

sentiment = pipeline(
    "sentiment-analysis",
    model="distilbert-base-uncased-finetuned-sst-2-english",
    tokenizer="distilbert-base-uncased-finetuned-sst-2-english",
)

def clean_text(text):
    text = text.encode("ascii", errors="ignore").decode(
        "ascii"
    )  # remove non-ascii, Chinese characters
    text = text.lower()
    text = re.sub(r"\n", " ", text)
    text = re.sub(r"\n\n", " ", text)
    text = re.sub(r"\t", " ", text)
    text = text.strip(" ")
    text = re.sub(r"[^\w\s]", "", text)  # remove punctuation and special characters
    text = re.sub(
        " +", " ", text
    ).strip()  # get rid of multiple spaces and replace with a single
    return text


# note that the sentiment-analysis pipeline returns 2 values - a label and a score
def sentiment_analysis(text):
    input_text = (
        pd.DataFrame(text.split("."))
        .stack()
        .reset_index()
        .rename(columns={0: "Paras"})
        .drop("level_0", axis=1)
        .drop("level_1", axis=1)
        .dropna()
    )

    input_text["Clean_Text"] = input_text["Paras"].map(lambda text: clean_text(text))

    corpus = list(input_text["Clean_Text"].values)

    input_text["Sentiment"] = sentiment(corpus)

    input_text["Sentiment_Label"] = [x.get("label") for x in input_text["Sentiment"]]

    input_text["Sentiment_Score"] = [x.get("score") for x in input_text["Sentiment"]]

    cols = ["Paras", "Sentiment_Label", "Sentiment_Score"]
    df = input_text[cols].copy()

    df = df[df["Paras"].str.strip().astype(bool)]

    df["Sentiment_Score"] = np.where(
        df["Sentiment_Label"] == "NEGATIVE",
        -(df["Sentiment_Score"]),
        df["Sentiment_Score"],
    )

    df["Sentiment_Score"] = df["Sentiment_Score"].round(6)

    overall_sentiment_score = df["Sentiment_Score"].sum().round(3)

    sentiment_count = df["Sentiment_Label"].value_counts().to_string()

    return overall_sentiment_score, sentiment_count, df


gradio_ui = gr.Interface(
    fn=sentiment_analysis,
    title="Analyse The Sentiment Structure Of A Speech",
    description="Upload a speech or parts of it for a detailed sentiment analysis",
    inputs=gr.inputs.Textbox(lines=30, label="Paste Text Here"),
    outputs=[
        gr.outputs.Textbox(type="number", label="Overall Sentiment Score"),
        gr.outputs.Textbox(
            type="auto", label="How Many Positive & Negative Sentences?"
        ),
        gr.outputs.Dataframe(
            headers=["Paras", "Sentiment_Label", "Sentiment_Score"],
            max_rows=None,
            max_cols=3,
            overflow_row_behaviour="paginate",
            type="auto",
            label="Detailed Assessment By Sentence",
        ),
    ], 
)

gradio_ui.launch(enable_queue=True)