Spaces:
Runtime error
Runtime error
# src: https://gist.github.com/iamaziz/ff570a6826b6d56c32b9d497a73e688c | |
# src: https://gist.github.com/iamaziz/0786e3de174c79839e42a5926f25acb2 | |
def distance(u, v): | |
""" | |
Calculates Euclidean distance between two point | |
distance = square_root( sum(u_i - v_i)^2 ) | |
u: [float, float], point1 | |
v: [float, float], point2 | |
""" | |
sum_ = sum((u[i] - v[i]) ** 2 for i in range(len(u))) | |
return sum_ ** (1 / 2) | |
def get_closer(target, *args): | |
""" | |
Return the closest point (from points in `args`) to target | |
target: [float], target point | |
*args: [[float]], list of points | |
""" | |
min_distance = float("inf") | |
closer = target | |
for point in args: | |
d = distance(point, target) | |
if d < min_distance: | |
min_distance = d | |
closer = point | |
return closer | |
def get_center(cluster): | |
""" | |
Calculates the centroid point for `cluster` | |
cluster: [[float]], list of the points in cluster | |
""" | |
center = [] | |
n = len(cluster) | |
for i in range(len(cluster[0])): | |
c = sum(p[i] for p in cluster) / n | |
center.append(round(c, 1)) | |
return center | |
def k_means(data, k=2, *centers): | |
""" | |
Recursive k_means algorithm | |
data: [[float]], data points to consider for clustering | |
k: int, number of clusters | |
centers: [[float]], optiona - initial centroids | |
""" | |
centers = list(centers) if centers else [data[i] for i in range(k)] | |
clusters = [[] for _ in range(k)] | |
for point in data: | |
nearest = get_closer(point, *centers) | |
nearest_cluster_index = centers.index(nearest) | |
clusters[nearest_cluster_index].append(point) | |
new_centers = [get_center(cluster) for cluster in clusters] | |
if centers == new_centers: | |
return clusters, centers | |
return k_means(data, k, *new_centers) | |