Spaces:
Build error
Build error
test: Experiment with reusing initial embeddings to avoid recalculation and speed up the process
Browse files
app.py
CHANGED
@@ -150,12 +150,15 @@ def generate_plot(
|
|
150 |
continue
|
151 |
cluster_keyword[label] = []
|
152 |
cluster_tws = []
|
|
|
153 |
for ix, obs in enumerate(encoded_labels):
|
154 |
if obs == label:
|
155 |
cluster_tws.append(tws_cleaned[ix])
|
|
|
156 |
cluster_words = [tw.split(' ') for tw in cluster_tws]
|
157 |
cluster_words = list(set(itertools.chain.from_iterable(cluster_words)))
|
158 |
-
cluster_embeddings = embed_text(cluster_tws, model)
|
|
|
159 |
cluster_embeddings_avg = np.mean(cluster_embeddings, axis=0)
|
160 |
cluster_words_embeddings = embed_text(cluster_words, model)
|
161 |
cluster_to_words_similarities = util.dot_score(cluster_embeddings_avg, cluster_words_embeddings)
|
|
|
150 |
continue
|
151 |
cluster_keyword[label] = []
|
152 |
cluster_tws = []
|
153 |
+
cluster_ixs = []
|
154 |
for ix, obs in enumerate(encoded_labels):
|
155 |
if obs == label:
|
156 |
cluster_tws.append(tws_cleaned[ix])
|
157 |
+
cluster_ixs.append(ix)
|
158 |
cluster_words = [tw.split(' ') for tw in cluster_tws]
|
159 |
cluster_words = list(set(itertools.chain.from_iterable(cluster_words)))
|
160 |
+
# cluster_embeddings = embed_text(cluster_tws, model)
|
161 |
+
cluster_embeddings = [embeddings[i] for i in ixs]
|
162 |
cluster_embeddings_avg = np.mean(cluster_embeddings, axis=0)
|
163 |
cluster_words_embeddings = embed_text(cluster_words, model)
|
164 |
cluster_to_words_similarities = util.dot_score(cluster_embeddings_avg, cluster_words_embeddings)
|