File size: 1,046 Bytes
57535ba
 
 
 
9663a4b
57535ba
9663a4b
57535ba
 
9663a4b
 
 
57535ba
9663a4b
57535ba
9663a4b
 
57535ba
 
9663a4b
 
57535ba
 
 
9663a4b
 
57535ba
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import umap
import hdbscan
import copy


class UMAPReducer:
    def __init__(self, umap_options={}, cluster_options={}):

        # set options with defaults
        self.umap_options = {'n_components': 2, 'spread': 1, 'min_dist': 0.1, 'n_neighbors': 15,
                             'metric': 'cosine', "verbose": True, **umap_options}
        self.cluster_options = {'allow_single_cluster': True, 'min_cluster_size': 500, 'min_samples': 10, **cluster_options}

    def setParams(self, umap_options={}, cluster_options={}):
        # update params
        self.umap_options = {**self.umap_options, **umap_options}
        self.cluster_options = {**self.cluster_options, **cluster_options}

    def clusterAnalysis(self, data):
        print("Cluster params:", self.cluster_options)
        clusters = hdbscan.HDBSCAN().fit(data) # **self.cluster_options
        return clusters

    def embed(self, data):
        print("UMAP params:", self.umap_options)
        result = umap.UMAP(**self.umap_options).fit_transform(data)
        return result