Spaces:
Sleeping
Sleeping
Minh Q. Le
commited on
Commit
•
ed47213
1
Parent(s):
40cd566
Added Label Analyis Plot
Browse files- app/cosmic_view.py +34 -4
- app/deberta_view.py +34 -3
- app/gpt_view.py +34 -5
- app/utils.py +32 -1
- requirements.txt +1 -0
app/cosmic_view.py
CHANGED
@@ -7,12 +7,11 @@ from app.utils import (
|
|
7 |
create_input_instruction,
|
8 |
format_prediction_ouptut,
|
9 |
remove_temp_dir,
|
10 |
-
decode_numeric_label,
|
11 |
-
decode_speaker_role,
|
12 |
display_sentiment_score_table,
|
13 |
sentiment_flow_plot,
|
14 |
sentiment_intensity_analysis,
|
15 |
EXAMPLE_CONVERSATIONS,
|
|
|
16 |
)
|
17 |
from fairseq.data.data_utils import collate_tokens
|
18 |
|
@@ -22,6 +21,7 @@ sys.path.insert(0, "../") # neccesary to load modules outside of app
|
|
22 |
|
23 |
from app import roberta, comet, COSMIC_MODEL, cosmic_args
|
24 |
from preprocessing import preprocess
|
|
|
25 |
from Model.COSMIC.erc_training.predict_epik import predict, get_valid_dataloader
|
26 |
|
27 |
|
@@ -227,6 +227,36 @@ def cosmic_ui():
|
|
227 |
|
228 |
# reset the output whenever a change in the input is detected
|
229 |
conversation_input.change(lambda x: "", conversation_input, output)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
230 |
|
231 |
gr.Markdown("# Sentiment Flow Plot")
|
232 |
with gr.Row():
|
@@ -275,8 +305,8 @@ def cosmic_ui():
|
|
275 |
|
276 |
# reset all outputs whenever a change in the input is detected
|
277 |
conversation_input.change(
|
278 |
-
lambda x: ("", None, None),
|
279 |
conversation_input,
|
280 |
-
outputs=[output, plot_box, intensity_plot],
|
281 |
)
|
282 |
return cosmic_model
|
|
|
7 |
create_input_instruction,
|
8 |
format_prediction_ouptut,
|
9 |
remove_temp_dir,
|
|
|
|
|
10 |
display_sentiment_score_table,
|
11 |
sentiment_flow_plot,
|
12 |
sentiment_intensity_analysis,
|
13 |
EXAMPLE_CONVERSATIONS,
|
14 |
+
label_analysis,
|
15 |
)
|
16 |
from fairseq.data.data_utils import collate_tokens
|
17 |
|
|
|
21 |
|
22 |
from app import roberta, comet, COSMIC_MODEL, cosmic_args
|
23 |
from preprocessing import preprocess
|
24 |
+
from preprocessing.preprocess import decode_numeric_label, decode_speaker_role
|
25 |
from Model.COSMIC.erc_training.predict_epik import predict, get_valid_dataloader
|
26 |
|
27 |
|
|
|
227 |
|
228 |
# reset the output whenever a change in the input is detected
|
229 |
conversation_input.change(lambda x: "", conversation_input, output)
|
230 |
+
|
231 |
+
gr.Markdown("# Analysis of Labels")
|
232 |
+
with gr.Row():
|
233 |
+
with gr.Column(scale=1):
|
234 |
+
gr.Markdown(
|
235 |
+
"""
|
236 |
+
<b>Frequency Analysis of Labels</b>
|
237 |
+
One key aspect of our analysis involves examining the
|
238 |
+
frequency distribution of labels assigned to different
|
239 |
+
parts of the conversation. This includes tracking the
|
240 |
+
occurrence of labels such as "Interest," "Curiosity,"
|
241 |
+
"Confused," "Openness," and "Acceptance." The resulting
|
242 |
+
distribution provides insights into the prevalence of
|
243 |
+
various sentiments during the interaction.
|
244 |
+
|
245 |
+
<b>Word Cloud Visualization</b>
|
246 |
+
In addition to label frequency, we employ word cloud
|
247 |
+
visualization to depict the prominent terms in the input
|
248 |
+
conversations. This visual representation highlights the
|
249 |
+
most frequently used words, shedding light on the key
|
250 |
+
themes and topics discussed.
|
251 |
+
"""
|
252 |
+
)
|
253 |
+
with gr.Column(scale=3):
|
254 |
+
labels_plot = gr.Plot(label="Analysis of Labels Plot")
|
255 |
+
with gr.Column(scale=3):
|
256 |
+
wordcloud_plot = gr.Plot(label="Analysis of Labels Plot")
|
257 |
+
|
258 |
+
labels_btn = gr.Button(value="Plot Label Analysis")
|
259 |
+
labels_btn.click(label_analysis, inputs=[output], outputs=[labels_plot,wordcloud_plot])
|
260 |
|
261 |
gr.Markdown("# Sentiment Flow Plot")
|
262 |
with gr.Row():
|
|
|
305 |
|
306 |
# reset all outputs whenever a change in the input is detected
|
307 |
conversation_input.change(
|
308 |
+
lambda x: ("", None, None, None, None),
|
309 |
conversation_input,
|
310 |
+
outputs=[output, labels_plot, wordcloud_plot, plot_box, intensity_plot],
|
311 |
)
|
312 |
return cosmic_model
|
app/deberta_view.py
CHANGED
@@ -6,6 +6,7 @@ from app.utils import (
|
|
6 |
sentiment_flow_plot,
|
7 |
sentiment_intensity_analysis,
|
8 |
EXAMPLE_CONVERSATIONS,
|
|
|
9 |
)
|
10 |
|
11 |
import sys
|
@@ -106,6 +107,36 @@ def deberta_ui():
|
|
106 |
# reset the output whenever a change in the input is detected
|
107 |
conversation_input.change(lambda x: "", conversation_input, output)
|
108 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
gr.Markdown("# Sentiment Flow Plot")
|
110 |
with gr.Row():
|
111 |
with gr.Column(scale=1):
|
@@ -153,8 +184,8 @@ def deberta_ui():
|
|
153 |
|
154 |
# reset all outputs whenever a change in the input is detected
|
155 |
conversation_input.change(
|
156 |
-
lambda x: ("", None, None),
|
157 |
conversation_input,
|
158 |
-
outputs=[output, plot_box, intensity_plot],
|
159 |
)
|
160 |
-
return deberta_model
|
|
|
6 |
sentiment_flow_plot,
|
7 |
sentiment_intensity_analysis,
|
8 |
EXAMPLE_CONVERSATIONS,
|
9 |
+
label_analysis,
|
10 |
)
|
11 |
|
12 |
import sys
|
|
|
107 |
# reset the output whenever a change in the input is detected
|
108 |
conversation_input.change(lambda x: "", conversation_input, output)
|
109 |
|
110 |
+
gr.Markdown("# Analysis of Labels")
|
111 |
+
with gr.Row():
|
112 |
+
with gr.Column(scale=1):
|
113 |
+
gr.Markdown(
|
114 |
+
"""
|
115 |
+
<b>Frequency Analysis of Labels</b>
|
116 |
+
One key aspect of our analysis involves examining the
|
117 |
+
frequency distribution of labels assigned to different
|
118 |
+
parts of the conversation. This includes tracking the
|
119 |
+
occurrence of labels such as "Interest," "Curiosity,"
|
120 |
+
"Confused," "Openness," and "Acceptance." The resulting
|
121 |
+
distribution provides insights into the prevalence of
|
122 |
+
various sentiments during the interaction.
|
123 |
+
|
124 |
+
<b>Word Cloud Visualization</b>
|
125 |
+
In addition to label frequency, we employ word cloud
|
126 |
+
visualization to depict the prominent terms in the input
|
127 |
+
conversations. This visual representation highlights the
|
128 |
+
most frequently used words, shedding light on the key
|
129 |
+
themes and topics discussed.
|
130 |
+
"""
|
131 |
+
)
|
132 |
+
with gr.Column(scale=3):
|
133 |
+
labels_plot = gr.Plot(label="Analysis of Labels Plot")
|
134 |
+
with gr.Column(scale=3):
|
135 |
+
wordcloud_plot = gr.Plot(label="Analysis of Labels Plot")
|
136 |
+
|
137 |
+
labels_btn = gr.Button(value="Plot Label Analysis")
|
138 |
+
labels_btn.click(label_analysis, inputs=[output], outputs=[labels_plot,wordcloud_plot])
|
139 |
+
|
140 |
gr.Markdown("# Sentiment Flow Plot")
|
141 |
with gr.Row():
|
142 |
with gr.Column(scale=1):
|
|
|
184 |
|
185 |
# reset all outputs whenever a change in the input is detected
|
186 |
conversation_input.change(
|
187 |
+
lambda x: ("", None, None, None, None),
|
188 |
conversation_input,
|
189 |
+
outputs=[output, labels_plot, wordcloud_plot, plot_box, intensity_plot],
|
190 |
)
|
191 |
+
return deberta_model
|
app/gpt_view.py
CHANGED
@@ -1,5 +1,3 @@
|
|
1 |
-
import os
|
2 |
-
import seaborn as sns
|
3 |
from openai import OpenAI
|
4 |
import gradio as gr
|
5 |
import re
|
@@ -11,6 +9,7 @@ from app.utils import (
|
|
11 |
display_sentiment_score_table,
|
12 |
sentiment_intensity_analysis,
|
13 |
EXAMPLE_CONVERSATIONS,
|
|
|
14 |
)
|
15 |
|
16 |
openai_args = {"api_key": ""}
|
@@ -332,6 +331,36 @@ def gpt_ui():
|
|
332 |
outputs=[output_box, report_md],
|
333 |
)
|
334 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
335 |
gr.Markdown("# Sentiment Flow Plot")
|
336 |
with gr.Row():
|
337 |
with gr.Column(scale=1):
|
@@ -379,9 +408,9 @@ def gpt_ui():
|
|
379 |
|
380 |
# reset all outputs whenever a change in the input is detected
|
381 |
conversation_input.change(
|
382 |
-
lambda x: ("", "", None, None),
|
383 |
conversation_input,
|
384 |
-
outputs=[output_box, report_md, plot_box, intensity_plot],
|
385 |
)
|
386 |
|
387 |
-
return gpt_model
|
|
|
|
|
|
|
1 |
from openai import OpenAI
|
2 |
import gradio as gr
|
3 |
import re
|
|
|
9 |
display_sentiment_score_table,
|
10 |
sentiment_intensity_analysis,
|
11 |
EXAMPLE_CONVERSATIONS,
|
12 |
+
label_analysis,
|
13 |
)
|
14 |
|
15 |
openai_args = {"api_key": ""}
|
|
|
331 |
outputs=[output_box, report_md],
|
332 |
)
|
333 |
|
334 |
+
gr.Markdown("# Analysis of Labels")
|
335 |
+
with gr.Row():
|
336 |
+
with gr.Column(scale=1):
|
337 |
+
gr.Markdown(
|
338 |
+
"""
|
339 |
+
<b>Frequency Analysis of Labels</b>
|
340 |
+
One key aspect of our analysis involves examining the
|
341 |
+
frequency distribution of labels assigned to different
|
342 |
+
parts of the conversation. This includes tracking the
|
343 |
+
occurrence of labels such as "Interest," "Curiosity,"
|
344 |
+
"Confused," "Openness," and "Acceptance." The resulting
|
345 |
+
distribution provides insights into the prevalence of
|
346 |
+
various sentiments during the interaction.
|
347 |
+
|
348 |
+
<b>Word Cloud Visualization</b>
|
349 |
+
In addition to label frequency, we employ word cloud
|
350 |
+
visualization to depict the prominent terms in the input
|
351 |
+
conversations. This visual representation highlights the
|
352 |
+
most frequently used words, shedding light on the key
|
353 |
+
themes and topics discussed.
|
354 |
+
"""
|
355 |
+
)
|
356 |
+
with gr.Column(scale=3):
|
357 |
+
labels_plot = gr.Plot(label="Analysis of Labels Plot")
|
358 |
+
with gr.Column(scale=3):
|
359 |
+
wordcloud_plot = gr.Plot(label="Analysis of Labels Plot")
|
360 |
+
|
361 |
+
labels_btn = gr.Button(value="Plot Label Analysis")
|
362 |
+
labels_btn.click(label_analysis, inputs=[output_box], outputs=[labels_plot,wordcloud_plot])
|
363 |
+
|
364 |
gr.Markdown("# Sentiment Flow Plot")
|
365 |
with gr.Row():
|
366 |
with gr.Column(scale=1):
|
|
|
408 |
|
409 |
# reset all outputs whenever a change in the input is detected
|
410 |
conversation_input.change(
|
411 |
+
lambda x: ("", "", None, None, None, None),
|
412 |
conversation_input,
|
413 |
+
outputs=[output_box, report_md, labels_plot, wordcloud_plot, plot_box, intensity_plot],
|
414 |
)
|
415 |
|
416 |
+
return gpt_model
|
app/utils.py
CHANGED
@@ -6,9 +6,10 @@ import pandas as pd
|
|
6 |
import seaborn as sns
|
7 |
from statistics import mean
|
8 |
import matplotlib.pyplot as plt
|
|
|
|
|
9 |
from nltk.sentiment import SentimentIntensityAnalyzer
|
10 |
from preprocessing.preprocess import process_user_input
|
11 |
-
from preprocessing.preprocess import decode_numeric_label, decode_speaker_role
|
12 |
|
13 |
|
14 |
def create_input_instruction():
|
@@ -74,6 +75,36 @@ SENTIMENT_GROUP_MAPPING = {
|
|
74 |
"Obscene": -3,
|
75 |
}
|
76 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
|
78 |
def sentiment_flow_plot(labeled_conv):
|
79 |
"""Generate the sentiment flow plot using the output from the label predecting
|
|
|
6 |
import seaborn as sns
|
7 |
from statistics import mean
|
8 |
import matplotlib.pyplot as plt
|
9 |
+
from collections import Counter
|
10 |
+
from wordcloud import WordCloud
|
11 |
from nltk.sentiment import SentimentIntensityAnalyzer
|
12 |
from preprocessing.preprocess import process_user_input
|
|
|
13 |
|
14 |
|
15 |
def create_input_instruction():
|
|
|
75 |
"Obscene": -3,
|
76 |
}
|
77 |
|
78 |
+
def label_analysis(labeled_conv):
|
79 |
+
msg_pattern = r"(Agent|Visitor): (.*)\n\[(.*)\]"
|
80 |
+
# find the components of each message, including the speaker, message, and label
|
81 |
+
component_lst = re.findall(msg_pattern, labeled_conv)
|
82 |
+
|
83 |
+
labels=[]
|
84 |
+
|
85 |
+
for speaker, _, label in component_lst:
|
86 |
+
labels.append(label)
|
87 |
+
|
88 |
+
label_counts = Counter(labels)
|
89 |
+
|
90 |
+
# Create a bar plot
|
91 |
+
fig1, ax = plt.subplots(figsize=(12, 6))
|
92 |
+
sns.barplot(x=list(label_counts.keys()), y=list(label_counts.values()), ax=ax)
|
93 |
+
|
94 |
+
plt.title('Label Frequency Distribution')
|
95 |
+
plt.xlabel('Labels')
|
96 |
+
plt.ylabel('Frequency')
|
97 |
+
plt.xticks(rotation=45, ha='right')
|
98 |
+
|
99 |
+
labels_text = " ".join(labels)
|
100 |
+
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(labels_text)
|
101 |
+
|
102 |
+
# Create a new figure for the word cloud
|
103 |
+
fig2, ax2 = plt.subplots(figsize=(10, 5))
|
104 |
+
ax2.imshow(wordcloud, interpolation='bilinear')
|
105 |
+
ax2.axis('off') # Turn off the axis
|
106 |
+
|
107 |
+
return fig1,fig2
|
108 |
|
109 |
def sentiment_flow_plot(labeled_conv):
|
110 |
"""Generate the sentiment flow plot using the output from the label predecting
|
requirements.txt
CHANGED
@@ -167,4 +167,5 @@ wcwidth==0.2.12
|
|
167 |
weasel==0.3.4
|
168 |
websockets==11.0.3
|
169 |
Werkzeug==3.0.1
|
|
|
170 |
wrapt==1.14.1
|
|
|
167 |
weasel==0.3.4
|
168 |
websockets==11.0.3
|
169 |
Werkzeug==3.0.1
|
170 |
+
wordcloud==1.9.3
|
171 |
wrapt==1.14.1
|