molokhovdmitry commited on
Commit
5000d19
1 Parent(s): 44c255c

Update t-SNE plots to use column

Browse files
Files changed (1) hide show
  1. src/app.py +40 -10
src/app.py CHANGED
@@ -1,4 +1,5 @@
1
  import os
 
2
  from dotenv import load_dotenv
3
  from transformers import pipeline
4
  from sentence_transformers import SentenceTransformer
@@ -134,6 +135,9 @@ def nmf_plots(df,
134
  for i, col in enumerate(topic_cols):
135
  df[col] = nmf_embeddings[i]
136
 
 
 
 
137
  # Get word values for every topic
138
  word_df = pd.DataFrame(
139
  nmf.components_.T,
@@ -171,7 +175,7 @@ def nmf_plots(df,
171
  return df, [topic_words_fig, contributions_fig]
172
 
173
 
174
- def tsne_plots(df, encoder, emotion_cols, color_emotion, tsne_perplexity):
175
  """
176
  Encodes all `text_original` values of `df` DataFrame with `encoder`,
177
  uses t-SNE algorithm for visualization on these embeddings and on
@@ -193,12 +197,21 @@ def tsne_plots(df, encoder, emotion_cols, color_emotion, tsne_perplexity):
193
  # Also use predicted emotions
194
  if emotion_cols:
195
  tsne_cols = embedding_cols + emotion_cols
196
- color = color_emotion
197
  hover_data = ['first_emotion', 'second_emotion', 'text_original']
198
  else:
199
  tsne_cols = embedding_cols
200
  color = None
201
- hover_data = 'text_original'
 
 
 
 
 
 
 
 
 
202
 
203
  tsne_results = tsne.fit_transform(df[tsne_cols])
204
  tsne_results = pd.DataFrame(
@@ -230,7 +243,8 @@ def tsne_plots(df, encoder, emotion_cols, color_emotion, tsne_perplexity):
230
  hover_data=hover_data
231
  )
232
  fig3d.update_layout(
233
- title_text="t-SNE Visualization Over Time"
 
234
  )
235
 
236
  return df, [fig2d, fig3d]
@@ -285,7 +299,15 @@ yt_api = YouTubeAPI(
285
 
286
  # Input form
287
  with st.form(key='input'):
288
- video_id = st.text_input("Video ID")
 
 
 
 
 
 
 
 
289
 
290
  # Emotions
291
  emotions_checkbox = st.checkbox(
@@ -302,7 +324,7 @@ with st.form(key='input'):
302
  nmf_components = st.slider(
303
  "Topics (NMF Components)",
304
  min_value=2,
305
- max_value=20,
306
  value=8,
307
  step=1,
308
  )
@@ -335,9 +357,9 @@ with st.form(key='input'):
335
  step=1,
336
  )
337
 
338
- tsne_color_emotion = st.selectbox(
339
- "Emotion For The Plot Color",
340
- options=['first_emotion', 'second_emotion']
341
  )
342
 
343
  # Language Map
@@ -356,6 +378,9 @@ if submit:
356
  comments = yt_api.get_comments(video_id)
357
  except KeyError:
358
  st.write("Video not found.")
 
 
 
359
  bad_id = True
360
 
361
  if not bad_id:
@@ -387,10 +412,15 @@ if submit:
387
 
388
  if tsne_checkbox:
389
  # t-SNE visualization
 
 
 
 
 
390
  df, tsne_figs = tsne_plots(df,
391
  sentence_encoder,
392
  emotion_cols,
393
- tsne_color_emotion,
394
  tsne_perplexity)
395
  plots.extend(tsne_figs)
396
 
 
1
  import os
2
+ import urllib.parse as urlparse
3
  from dotenv import load_dotenv
4
  from transformers import pipeline
5
  from sentence_transformers import SentenceTransformer
 
135
  for i, col in enumerate(topic_cols):
136
  df[col] = nmf_embeddings[i]
137
 
138
+ # Create `main_topic` column with the highest value topic name
139
+ df['main_topic'] = df[topic_cols].apply(lambda row: row.idxmax(), axis=1)
140
+
141
  # Get word values for every topic
142
  word_df = pd.DataFrame(
143
  nmf.components_.T,
 
175
  return df, [topic_words_fig, contributions_fig]
176
 
177
 
178
+ def tsne_plots(df, encoder, emotion_cols, tsne_color, tsne_perplexity):
179
  """
180
  Encodes all `text_original` values of `df` DataFrame with `encoder`,
181
  uses t-SNE algorithm for visualization on these embeddings and on
 
197
  # Also use predicted emotions
198
  if emotion_cols:
199
  tsne_cols = embedding_cols + emotion_cols
200
+ color = tsne_color
201
  hover_data = ['first_emotion', 'second_emotion', 'text_original']
202
  else:
203
  tsne_cols = embedding_cols
204
  color = None
205
+ hover_data = ['text_original']
206
+
207
+ if 'main_topic' in df.columns:
208
+ hover_data.append('main_topic')
209
+
210
+ # Color column
211
+ if 'main_topic' in df.columns or emotion_cols:
212
+ color = tsne_color
213
+ else:
214
+ color = None
215
 
216
  tsne_results = tsne.fit_transform(df[tsne_cols])
217
  tsne_results = pd.DataFrame(
 
243
  hover_data=hover_data
244
  )
245
  fig3d.update_layout(
246
+ title_text="t-SNE Visualization Over Time",
247
+ height=800
248
  )
249
 
250
  return df, [fig2d, fig3d]
 
299
 
300
  # Input form
301
  with st.form(key='input'):
302
+ # Input
303
+ url_input = st.text_input("URL or ID")
304
+ # Get ID from URL
305
+ url_data = urlparse.urlparse(url_input)
306
+ query = urlparse.parse_qs(url_data.query)
307
+ if 'v' in query:
308
+ video_id = query['v'][0]
309
+ else:
310
+ video_id = url_input
311
 
312
  # Emotions
313
  emotions_checkbox = st.checkbox(
 
324
  nmf_components = st.slider(
325
  "Topics (NMF Components)",
326
  min_value=2,
327
+ max_value=12,
328
  value=8,
329
  step=1,
330
  )
 
357
  step=1,
358
  )
359
 
360
+ tsne_color = st.selectbox(
361
+ "Plot Color",
362
+ options=['main_topic', 'first_emotion', 'second_emotion']
363
  )
364
 
365
  # Language Map
 
378
  comments = yt_api.get_comments(video_id)
379
  except KeyError:
380
  st.write("Video not found.")
381
+ st.write(query)
382
+ st.write('v' in query)
383
+ st.write(video_id)
384
  bad_id = True
385
 
386
  if not bad_id:
 
412
 
413
  if tsne_checkbox:
414
  # t-SNE visualization
415
+ if not nmf_checkbox:
416
+ tsne_color = 'first_emotion'
417
+ if not emotions_checkbox:
418
+ tsne_color = 'main_topic'
419
+
420
  df, tsne_figs = tsne_plots(df,
421
  sentence_encoder,
422
  emotion_cols,
423
+ tsne_color,
424
  tsne_perplexity)
425
  plots.extend(tsne_figs)
426