04-trends / analyze.py
Leeps's picture
Upload folder using huggingface_hub
68e42b1 verified
raw
history blame contribute delete
899 Bytes
from sentence_transformers import SentenceTransformer
import hdbscan
model = SentenceTransformer("all-MiniLM-L6-v2")
def cluster_items(items, min_cluster_size=2):
texts = [item["title"] for item in items]
if not texts:
return []
embeddings = model.encode(texts)
clusterer = hdbscan.HDBSCAN(min_cluster_size=min_cluster_size, metric="euclidean")
labels = clusterer.fit_predict(embeddings)
clusters = {}
for label, text in zip(labels, texts):
if label == -1:
continue
clusters.setdefault(label, []).append(text)
cluster_text = []
for i, titles in clusters.items():
cluster_text.append(f"🔸 Cluster {i} ({len(titles)} items):")
cluster_text.extend(f"- {t}" for t in titles)
cluster_text.append("") # add space between clusters
return "\n".join(cluster_text) or "No meaningful clusters found."