Spaces:

wilmerags
/

tweet-snest

Build error

wilmerags commited on Nov 25, 2021

Commit

4b206d5

1 Parent(s): 1ef5823

test: Experiment with reusing initial embeddings to avoid recalculation and speed up the process

Files changed (1) hide show

app.py CHANGED Viewed

@@ -150,12 +150,15 @@ def generate_plot(
                 continue
             cluster_keyword[label] = []
             cluster_tws = []
             for ix, obs in enumerate(encoded_labels):
                 if obs == label:
                     cluster_tws.append(tws_cleaned[ix])
             cluster_words = [tw.split(' ') for tw in cluster_tws]
             cluster_words = list(set(itertools.chain.from_iterable(cluster_words)))
-            cluster_embeddings = embed_text(cluster_tws, model)
             cluster_embeddings_avg = np.mean(cluster_embeddings, axis=0)
             cluster_words_embeddings = embed_text(cluster_words, model)
             cluster_to_words_similarities = util.dot_score(cluster_embeddings_avg, cluster_words_embeddings)

                 continue
             cluster_keyword[label] = []
             cluster_tws = []
+            cluster_ixs = []
             for ix, obs in enumerate(encoded_labels):
                 if obs == label:
                     cluster_tws.append(tws_cleaned[ix])
+                    cluster_ixs.append(ix)
             cluster_words = [tw.split(' ') for tw in cluster_tws]
             cluster_words = list(set(itertools.chain.from_iterable(cluster_words)))
+            # cluster_embeddings = embed_text(cluster_tws, model)
+            cluster_embeddings = [embeddings[i] for i in ixs]
             cluster_embeddings_avg = np.mean(cluster_embeddings, axis=0)
             cluster_words_embeddings = embed_text(cluster_words, model)
             cluster_to_words_similarities = util.dot_score(cluster_embeddings_avg, cluster_words_embeddings)