radames's picture
radames HF staff
add transformers emmbeddings and UMAP
9663a4b
import umap
import hdbscan
import copy
class UMAPReducer:
def __init__(self, umap_options={}, cluster_options={}):
# set options with defaults
self.umap_options = {'n_components': 2, 'spread': 1, 'min_dist': 0.1, 'n_neighbors': 15,
'metric': 'cosine', "verbose": True, **umap_options}
self.cluster_options = {'allow_single_cluster': True, 'min_cluster_size': 500, 'min_samples': 10, **cluster_options}
def setParams(self, umap_options={}, cluster_options={}):
# update params
self.umap_options = {**self.umap_options, **umap_options}
self.cluster_options = {**self.cluster_options, **cluster_options}
def clusterAnalysis(self, data):
print("Cluster params:", self.cluster_options)
clusters = hdbscan.HDBSCAN().fit(data) # **self.cluster_options
return clusters
def embed(self, data):
print("UMAP params:", self.umap_options)
result = umap.UMAP(**self.umap_options).fit_transform(data)
return result