Minh Q. Le commited on
Commit
ed47213
1 Parent(s): 40cd566

Added Label Analyis Plot

Browse files
Files changed (5) hide show
  1. app/cosmic_view.py +34 -4
  2. app/deberta_view.py +34 -3
  3. app/gpt_view.py +34 -5
  4. app/utils.py +32 -1
  5. requirements.txt +1 -0
app/cosmic_view.py CHANGED
@@ -7,12 +7,11 @@ from app.utils import (
7
  create_input_instruction,
8
  format_prediction_ouptut,
9
  remove_temp_dir,
10
- decode_numeric_label,
11
- decode_speaker_role,
12
  display_sentiment_score_table,
13
  sentiment_flow_plot,
14
  sentiment_intensity_analysis,
15
  EXAMPLE_CONVERSATIONS,
 
16
  )
17
  from fairseq.data.data_utils import collate_tokens
18
 
@@ -22,6 +21,7 @@ sys.path.insert(0, "../") # neccesary to load modules outside of app
22
 
23
  from app import roberta, comet, COSMIC_MODEL, cosmic_args
24
  from preprocessing import preprocess
 
25
  from Model.COSMIC.erc_training.predict_epik import predict, get_valid_dataloader
26
 
27
 
@@ -227,6 +227,36 @@ def cosmic_ui():
227
 
228
  # reset the output whenever a change in the input is detected
229
  conversation_input.change(lambda x: "", conversation_input, output)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
 
231
  gr.Markdown("# Sentiment Flow Plot")
232
  with gr.Row():
@@ -275,8 +305,8 @@ def cosmic_ui():
275
 
276
  # reset all outputs whenever a change in the input is detected
277
  conversation_input.change(
278
- lambda x: ("", None, None),
279
  conversation_input,
280
- outputs=[output, plot_box, intensity_plot],
281
  )
282
  return cosmic_model
 
7
  create_input_instruction,
8
  format_prediction_ouptut,
9
  remove_temp_dir,
 
 
10
  display_sentiment_score_table,
11
  sentiment_flow_plot,
12
  sentiment_intensity_analysis,
13
  EXAMPLE_CONVERSATIONS,
14
+ label_analysis,
15
  )
16
  from fairseq.data.data_utils import collate_tokens
17
 
 
21
 
22
  from app import roberta, comet, COSMIC_MODEL, cosmic_args
23
  from preprocessing import preprocess
24
+ from preprocessing.preprocess import decode_numeric_label, decode_speaker_role
25
  from Model.COSMIC.erc_training.predict_epik import predict, get_valid_dataloader
26
 
27
 
 
227
 
228
  # reset the output whenever a change in the input is detected
229
  conversation_input.change(lambda x: "", conversation_input, output)
230
+
231
+ gr.Markdown("# Analysis of Labels")
232
+ with gr.Row():
233
+ with gr.Column(scale=1):
234
+ gr.Markdown(
235
+ """
236
+ <b>Frequency Analysis of Labels</b>
237
+ One key aspect of our analysis involves examining the
238
+ frequency distribution of labels assigned to different
239
+ parts of the conversation. This includes tracking the
240
+ occurrence of labels such as "Interest," "Curiosity,"
241
+ "Confused," "Openness," and "Acceptance." The resulting
242
+ distribution provides insights into the prevalence of
243
+ various sentiments during the interaction.
244
+
245
+ <b>Word Cloud Visualization</b>
246
+ In addition to label frequency, we employ word cloud
247
+ visualization to depict the prominent terms in the input
248
+ conversations. This visual representation highlights the
249
+ most frequently used words, shedding light on the key
250
+ themes and topics discussed.
251
+ """
252
+ )
253
+ with gr.Column(scale=3):
254
+ labels_plot = gr.Plot(label="Analysis of Labels Plot")
255
+ with gr.Column(scale=3):
256
+ wordcloud_plot = gr.Plot(label="Analysis of Labels Plot")
257
+
258
+ labels_btn = gr.Button(value="Plot Label Analysis")
259
+ labels_btn.click(label_analysis, inputs=[output], outputs=[labels_plot,wordcloud_plot])
260
 
261
  gr.Markdown("# Sentiment Flow Plot")
262
  with gr.Row():
 
305
 
306
  # reset all outputs whenever a change in the input is detected
307
  conversation_input.change(
308
+ lambda x: ("", None, None, None, None),
309
  conversation_input,
310
+ outputs=[output, labels_plot, wordcloud_plot, plot_box, intensity_plot],
311
  )
312
  return cosmic_model
app/deberta_view.py CHANGED
@@ -6,6 +6,7 @@ from app.utils import (
6
  sentiment_flow_plot,
7
  sentiment_intensity_analysis,
8
  EXAMPLE_CONVERSATIONS,
 
9
  )
10
 
11
  import sys
@@ -106,6 +107,36 @@ def deberta_ui():
106
  # reset the output whenever a change in the input is detected
107
  conversation_input.change(lambda x: "", conversation_input, output)
108
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  gr.Markdown("# Sentiment Flow Plot")
110
  with gr.Row():
111
  with gr.Column(scale=1):
@@ -153,8 +184,8 @@ def deberta_ui():
153
 
154
  # reset all outputs whenever a change in the input is detected
155
  conversation_input.change(
156
- lambda x: ("", None, None),
157
  conversation_input,
158
- outputs=[output, plot_box, intensity_plot],
159
  )
160
- return deberta_model
 
6
  sentiment_flow_plot,
7
  sentiment_intensity_analysis,
8
  EXAMPLE_CONVERSATIONS,
9
+ label_analysis,
10
  )
11
 
12
  import sys
 
107
  # reset the output whenever a change in the input is detected
108
  conversation_input.change(lambda x: "", conversation_input, output)
109
 
110
+ gr.Markdown("# Analysis of Labels")
111
+ with gr.Row():
112
+ with gr.Column(scale=1):
113
+ gr.Markdown(
114
+ """
115
+ <b>Frequency Analysis of Labels</b>
116
+ One key aspect of our analysis involves examining the
117
+ frequency distribution of labels assigned to different
118
+ parts of the conversation. This includes tracking the
119
+ occurrence of labels such as "Interest," "Curiosity,"
120
+ "Confused," "Openness," and "Acceptance." The resulting
121
+ distribution provides insights into the prevalence of
122
+ various sentiments during the interaction.
123
+
124
+ <b>Word Cloud Visualization</b>
125
+ In addition to label frequency, we employ word cloud
126
+ visualization to depict the prominent terms in the input
127
+ conversations. This visual representation highlights the
128
+ most frequently used words, shedding light on the key
129
+ themes and topics discussed.
130
+ """
131
+ )
132
+ with gr.Column(scale=3):
133
+ labels_plot = gr.Plot(label="Analysis of Labels Plot")
134
+ with gr.Column(scale=3):
135
+ wordcloud_plot = gr.Plot(label="Analysis of Labels Plot")
136
+
137
+ labels_btn = gr.Button(value="Plot Label Analysis")
138
+ labels_btn.click(label_analysis, inputs=[output], outputs=[labels_plot,wordcloud_plot])
139
+
140
  gr.Markdown("# Sentiment Flow Plot")
141
  with gr.Row():
142
  with gr.Column(scale=1):
 
184
 
185
  # reset all outputs whenever a change in the input is detected
186
  conversation_input.change(
187
+ lambda x: ("", None, None, None, None),
188
  conversation_input,
189
+ outputs=[output, labels_plot, wordcloud_plot, plot_box, intensity_plot],
190
  )
191
+ return deberta_model
app/gpt_view.py CHANGED
@@ -1,5 +1,3 @@
1
- import os
2
- import seaborn as sns
3
  from openai import OpenAI
4
  import gradio as gr
5
  import re
@@ -11,6 +9,7 @@ from app.utils import (
11
  display_sentiment_score_table,
12
  sentiment_intensity_analysis,
13
  EXAMPLE_CONVERSATIONS,
 
14
  )
15
 
16
  openai_args = {"api_key": ""}
@@ -332,6 +331,36 @@ def gpt_ui():
332
  outputs=[output_box, report_md],
333
  )
334
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
335
  gr.Markdown("# Sentiment Flow Plot")
336
  with gr.Row():
337
  with gr.Column(scale=1):
@@ -379,9 +408,9 @@ def gpt_ui():
379
 
380
  # reset all outputs whenever a change in the input is detected
381
  conversation_input.change(
382
- lambda x: ("", "", None, None),
383
  conversation_input,
384
- outputs=[output_box, report_md, plot_box, intensity_plot],
385
  )
386
 
387
- return gpt_model
 
 
 
1
  from openai import OpenAI
2
  import gradio as gr
3
  import re
 
9
  display_sentiment_score_table,
10
  sentiment_intensity_analysis,
11
  EXAMPLE_CONVERSATIONS,
12
+ label_analysis,
13
  )
14
 
15
  openai_args = {"api_key": ""}
 
331
  outputs=[output_box, report_md],
332
  )
333
 
334
+ gr.Markdown("# Analysis of Labels")
335
+ with gr.Row():
336
+ with gr.Column(scale=1):
337
+ gr.Markdown(
338
+ """
339
+ <b>Frequency Analysis of Labels</b>
340
+ One key aspect of our analysis involves examining the
341
+ frequency distribution of labels assigned to different
342
+ parts of the conversation. This includes tracking the
343
+ occurrence of labels such as "Interest," "Curiosity,"
344
+ "Confused," "Openness," and "Acceptance." The resulting
345
+ distribution provides insights into the prevalence of
346
+ various sentiments during the interaction.
347
+
348
+ <b>Word Cloud Visualization</b>
349
+ In addition to label frequency, we employ word cloud
350
+ visualization to depict the prominent terms in the input
351
+ conversations. This visual representation highlights the
352
+ most frequently used words, shedding light on the key
353
+ themes and topics discussed.
354
+ """
355
+ )
356
+ with gr.Column(scale=3):
357
+ labels_plot = gr.Plot(label="Analysis of Labels Plot")
358
+ with gr.Column(scale=3):
359
+ wordcloud_plot = gr.Plot(label="Analysis of Labels Plot")
360
+
361
+ labels_btn = gr.Button(value="Plot Label Analysis")
362
+ labels_btn.click(label_analysis, inputs=[output_box], outputs=[labels_plot,wordcloud_plot])
363
+
364
  gr.Markdown("# Sentiment Flow Plot")
365
  with gr.Row():
366
  with gr.Column(scale=1):
 
408
 
409
  # reset all outputs whenever a change in the input is detected
410
  conversation_input.change(
411
+ lambda x: ("", "", None, None, None, None),
412
  conversation_input,
413
+ outputs=[output_box, report_md, labels_plot, wordcloud_plot, plot_box, intensity_plot],
414
  )
415
 
416
+ return gpt_model
app/utils.py CHANGED
@@ -6,9 +6,10 @@ import pandas as pd
6
  import seaborn as sns
7
  from statistics import mean
8
  import matplotlib.pyplot as plt
 
 
9
  from nltk.sentiment import SentimentIntensityAnalyzer
10
  from preprocessing.preprocess import process_user_input
11
- from preprocessing.preprocess import decode_numeric_label, decode_speaker_role
12
 
13
 
14
  def create_input_instruction():
@@ -74,6 +75,36 @@ SENTIMENT_GROUP_MAPPING = {
74
  "Obscene": -3,
75
  }
76
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
 
78
  def sentiment_flow_plot(labeled_conv):
79
  """Generate the sentiment flow plot using the output from the label predecting
 
6
  import seaborn as sns
7
  from statistics import mean
8
  import matplotlib.pyplot as plt
9
+ from collections import Counter
10
+ from wordcloud import WordCloud
11
  from nltk.sentiment import SentimentIntensityAnalyzer
12
  from preprocessing.preprocess import process_user_input
 
13
 
14
 
15
  def create_input_instruction():
 
75
  "Obscene": -3,
76
  }
77
 
78
+ def label_analysis(labeled_conv):
79
+ msg_pattern = r"(Agent|Visitor): (.*)\n\[(.*)\]"
80
+ # find the components of each message, including the speaker, message, and label
81
+ component_lst = re.findall(msg_pattern, labeled_conv)
82
+
83
+ labels=[]
84
+
85
+ for speaker, _, label in component_lst:
86
+ labels.append(label)
87
+
88
+ label_counts = Counter(labels)
89
+
90
+ # Create a bar plot
91
+ fig1, ax = plt.subplots(figsize=(12, 6))
92
+ sns.barplot(x=list(label_counts.keys()), y=list(label_counts.values()), ax=ax)
93
+
94
+ plt.title('Label Frequency Distribution')
95
+ plt.xlabel('Labels')
96
+ plt.ylabel('Frequency')
97
+ plt.xticks(rotation=45, ha='right')
98
+
99
+ labels_text = " ".join(labels)
100
+ wordcloud = WordCloud(width=800, height=400, background_color='white').generate(labels_text)
101
+
102
+ # Create a new figure for the word cloud
103
+ fig2, ax2 = plt.subplots(figsize=(10, 5))
104
+ ax2.imshow(wordcloud, interpolation='bilinear')
105
+ ax2.axis('off') # Turn off the axis
106
+
107
+ return fig1,fig2
108
 
109
  def sentiment_flow_plot(labeled_conv):
110
  """Generate the sentiment flow plot using the output from the label predecting
requirements.txt CHANGED
@@ -167,4 +167,5 @@ wcwidth==0.2.12
167
  weasel==0.3.4
168
  websockets==11.0.3
169
  Werkzeug==3.0.1
 
170
  wrapt==1.14.1
 
167
  weasel==0.3.4
168
  websockets==11.0.3
169
  Werkzeug==3.0.1
170
+ wordcloud==1.9.3
171
  wrapt==1.14.1