Spaces:
Sleeping
Sleeping
Update model.py
Browse files
model.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
import os
|
| 2 |
os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf-cache"
|
| 3 |
os.environ["HF_HOME"] = "/tmp/hf-home"
|
|
@@ -42,10 +43,9 @@ def smart_summarize(text, n_clusters=1):
|
|
| 42 |
if not len(idx):
|
| 43 |
continue
|
| 44 |
# Average vector from cluster, converted to ndarray
|
| 45 |
-
avg_vector =
|
| 46 |
-
#
|
| 47 |
sim = cosine_similarity(avg_vector, tfidf_matrix[idx].toarray())
|
| 48 |
-
# Select sentence with highest similarity to avg
|
| 49 |
most_representative = sentences[idx[np.argmax(sim)]]
|
| 50 |
summary_sentences.append(most_representative)
|
| 51 |
|
|
|
|
| 1 |
+
|
| 2 |
import os
|
| 3 |
os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf-cache"
|
| 4 |
os.environ["HF_HOME"] = "/tmp/hf-home"
|
|
|
|
| 43 |
if not len(idx):
|
| 44 |
continue
|
| 45 |
# Average vector from cluster, converted to ndarray
|
| 46 |
+
avg_vector = tfidf_matrix[idx].mean(axis=0).A # Convert matrix to ndarray
|
| 47 |
+
# Compute cosine similarity between avg_vector and tfidf vectors in cluster
|
| 48 |
sim = cosine_similarity(avg_vector, tfidf_matrix[idx].toarray())
|
|
|
|
| 49 |
most_representative = sentences[idx[np.argmax(sim)]]
|
| 50 |
summary_sentences.append(most_representative)
|
| 51 |
|