from dataclasses import dataclass
import numpy as np

from cluster.clusterer import Clusterer


@dataclass
class Kmeans(Clusterer):
    k: int
    max_iter: int

    def build(
        self,
        X: np.array,
    ) -> dict[str, np.array]:
        # randomly initialize centroids
        centroids = X[np.random.choice(
            X.shape[0],
            self.k,
            replace=False,
        )]

        # Calculate Euclidean distance between each data point and each centroid
        # then assign each point to its closest cluster
        clusters = self.assign_clusters(X, centroids)
        centroids = self.update_centroids(self.k, X, clusters)

        while True:
            new_clusts = self.assign_clusters(X, centroids)
            if np.array_equal(new_clusts, clusters):
                break
            clusters = new_clusts
            centroids = self.update_centroids(self.k, X, clusters)
        return {
            "clusters": clusters,
            "centroids": centroids,
        }

    @staticmethod
    def assign_clusters(
        X: np.array,
        centroids: np.array,
    ) -> np.array:
        distances = np.sqrt(((X - centroids[:, np.newaxis])**2).sum(axis=2))
        clusts = np.argmin(distances, axis=0)
        return clusts

    @staticmethod
    def update_centroids(
        k: int,
        X: np.array,
        clusters: np.array,
    ) -> np.array:
        centroids = np.zeros((k, X.shape[1]))
        for i in range(k):
            centroids[i] = X[clusters == i].mean(axis=0)
        return centroids

    def label():
        ...

    def main(self):
        return self.from_dict()