# src: https://gist.github.com/iamaziz/ff570a6826b6d56c32b9d497a73e688c # src: https://gist.github.com/iamaziz/0786e3de174c79839e42a5926f25acb2 def distance(u, v): """ Calculates Euclidean distance between two point distance = square_root( sum(u_i - v_i)^2 ) u: [float, float], point1 v: [float, float], point2 """ sum_ = sum((u[i] - v[i]) ** 2 for i in range(len(u))) return sum_ ** (1 / 2) def get_closer(target, *args): """ Return the closest point (from points in `args`) to target target: [float], target point *args: [[float]], list of points """ min_distance = float("inf") closer = target for point in args: d = distance(point, target) if d < min_distance: min_distance = d closer = point return closer def get_center(cluster): """ Calculates the centroid point for `cluster` cluster: [[float]], list of the points in cluster """ center = [] n = len(cluster) for i in range(len(cluster[0])): c = sum(p[i] for p in cluster) / n center.append(round(c, 1)) return center def k_means(data, k=2, *centers): """ Recursive k_means algorithm data: [[float]], data points to consider for clustering k: int, number of clusters centers: [[float]], optiona - initial centroids """ centers = list(centers) if centers else [data[i] for i in range(k)] clusters = [[] for _ in range(k)] for point in data: nearest = get_closer(point, *centers) nearest_cluster_index = centers.index(nearest) clusters[nearest_cluster_index].append(point) new_centers = [get_center(cluster) for cluster in clusters] if centers == new_centers: return clusters, centers return k_means(data, k, *new_centers)