chinhon commited on
Commit
aa4d165
β€’
1 Parent(s): da83034

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +95 -0
app.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import numpy as np
4
+ import re
5
+
6
+ from transformers import pipeline
7
+
8
+ sentiment = pipeline(
9
+ "sentiment-analysis",
10
+ model="distilbert-base-uncased-finetuned-sst-2-english",
11
+ tokenizer="distilbert-base-uncased-finetuned-sst-2-english",
12
+ )
13
+
14
+ def clean_text(text):
15
+ text = text.encode("ascii", errors="ignore").decode(
16
+ "ascii"
17
+ ) # remove non-ascii, Chinese characters
18
+ text = text.lower()
19
+ text = re.sub(r"\n", " ", text)
20
+ text = re.sub(r"\n\n", " ", text)
21
+ text = re.sub(r"\t", " ", text)
22
+ text = text.strip(" ")
23
+ text = re.sub(r"[^\w\s]", "", text) # remove punctuation and special characters
24
+ text = re.sub(
25
+ " +", " ", text
26
+ ).strip() # get rid of multiple spaces and replace with a single
27
+ return text
28
+
29
+
30
+ # note that the sentiment-analysis pipeline returns 2 values - a label and a score
31
+ def sentiment_analysis(text):
32
+ input_text = (
33
+ pd.DataFrame(text.split("."))
34
+ .stack()
35
+ .reset_index()
36
+ .rename(columns={0: "Paras"})
37
+ .drop("level_0", axis=1)
38
+ .drop("level_1", axis=1)
39
+ .dropna()
40
+ )
41
+
42
+ input_text["Clean_Text"] = input_text["Paras"].map(lambda text: clean_text(text))
43
+
44
+ corpus = list(input_text["Clean_Text"].values)
45
+
46
+ input_text["Sentiment"] = sentiment(corpus)
47
+
48
+ input_text["Sentiment_Label"] = [x.get("label") for x in input_text["Sentiment"]]
49
+
50
+ input_text["Sentiment_Score"] = [x.get("score") for x in input_text["Sentiment"]]
51
+
52
+ cols = ["Paras", "Sentiment_Label", "Sentiment_Score"]
53
+ df = input_text[cols].copy()
54
+
55
+ df = df[df["Paras"].str.strip().astype(bool)]
56
+
57
+ df["Sentiment_Score"] = np.where(
58
+ df["Sentiment_Label"] == "NEGATIVE",
59
+ -(df["Sentiment_Score"]),
60
+ df["Sentiment_Score"],
61
+ )
62
+
63
+ df["Sentiment_Score"] = df["Sentiment_Score"].round(6)
64
+
65
+ overall_sentiment_score = df["Sentiment_Score"].sum().round(3)
66
+
67
+ sentiment_count = df["Sentiment_Label"].value_counts().to_string()
68
+
69
+ return overall_sentiment_score, sentiment_count, df
70
+
71
+
72
+ gradio_ui = gr.Interface(
73
+ fn=sentiment_analysis,
74
+ title="Analyse The Sentiment Structure Of A Speech",
75
+ description="Upload a speech or parts of it for a detailed sentiment analysis",
76
+ inputs=gr.inputs.Textbox(lines=30, label="Paste Text Here"),
77
+ outputs=[
78
+ gr.outputs.Textbox(type="number", label="Overall Sentiment Score"),
79
+ gr.outputs.Textbox(
80
+ type="auto", label="How Many Positive & Negative Sentences?"
81
+ ),
82
+ gr.outputs.Dataframe(
83
+ headers=["Paras", "Sentiment_Label", "Sentiment_Score"],
84
+ max_rows=None,
85
+ max_cols=3,
86
+ overflow_row_behaviour="paginate",
87
+ type="auto",
88
+ label="Detailed Assessment By Sentence",
89
+ ),
90
+ ],
91
+ enable_queue=True,
92
+ )
93
+
94
+
95
+ gradio_ui.launch()