MarMont commited on
Commit
49e4936
1 Parent(s): 95dd02a

more debugging

Browse files
Files changed (2) hide show
  1. app.py +6 -0
  2. appv1.py +2 -1
app.py CHANGED
@@ -185,6 +185,8 @@ def get_topic_value(row, i):
185
  print(e)
186
 
187
  def full_lda(df):
 
 
188
  df.rename(columns = {'tweet':'original_tweets'}, inplace = True)
189
 
190
  # Apply the function above and get tweets free of emoji's
@@ -243,6 +245,7 @@ def full_lda(df):
243
  # Apply tokenizer
244
  df['lemma_tokens'] = df['lemmas_back_to_text'].apply(tokenize)
245
 
 
246
  # Create a id2word dictionary
247
  global id2word
248
  id2word = Dictionary(df['lemma_tokens'])
@@ -289,6 +292,7 @@ def full_lda(df):
289
  global num_topics
290
  num_topics = coherence_averages.index(k_max) + 2
291
 
 
292
  grid = {}
293
  grid['Validation_Set'] = {}
294
 
@@ -360,6 +364,7 @@ def full_lda(df):
360
 
361
  lda_topics = lda_model_final.show_topics(num_words=10)
362
 
 
363
  topics = []
364
  filters = [lambda x: x.lower(), strip_punctuation, strip_numeric]
365
 
@@ -377,6 +382,7 @@ def full_lda(df):
377
  topic_clusters.append(df[df['max_topic'].isin(([i]))])
378
  topic_clusters[i] = topic_clusters[i]['original_tweets'].tolist()
379
 
 
380
  global top_tweets
381
  top_tweets = []
382
  for i in range(len(topic_clusters)):
 
185
  print(e)
186
 
187
  def full_lda(df):
188
+
189
+ print('cleaning')
190
  df.rename(columns = {'tweet':'original_tweets'}, inplace = True)
191
 
192
  # Apply the function above and get tweets free of emoji's
 
245
  # Apply tokenizer
246
  df['lemma_tokens'] = df['lemmas_back_to_text'].apply(tokenize)
247
 
248
+ print('base model setup')
249
  # Create a id2word dictionary
250
  global id2word
251
  id2word = Dictionary(df['lemma_tokens'])
 
292
  global num_topics
293
  num_topics = coherence_averages.index(k_max) + 2
294
 
295
+ print('hyperparameter opt')
296
  grid = {}
297
  grid['Validation_Set'] = {}
298
 
 
364
 
365
  lda_topics = lda_model_final.show_topics(num_words=10)
366
 
367
+ print('assign topics')
368
  topics = []
369
  filters = [lambda x: x.lower(), strip_punctuation, strip_numeric]
370
 
 
382
  topic_clusters.append(df[df['max_topic'].isin(([i]))])
383
  topic_clusters[i] = topic_clusters[i]['original_tweets'].tolist()
384
 
385
+ print('rep topics')
386
  global top_tweets
387
  top_tweets = []
388
  for i in range(len(topic_clusters)):
appv1.py CHANGED
@@ -555,5 +555,6 @@ iface = gr.Interface(fn=main,
555
  ],
556
  # examples=examples,
557
  outputs=["text",
558
- "text"])
 
559
  iface.launch()
 
555
  ],
556
  # examples=examples,
557
  outputs=["text",
558
+ "text"]
559
+ )
560
  iface.launch()