azizalto's picture
init
c5301d0
raw
history blame
1.84 kB
# src: https://gist.github.com/iamaziz/ff570a6826b6d56c32b9d497a73e688c
# src: https://gist.github.com/iamaziz/0786e3de174c79839e42a5926f25acb2
def distance(u, v):
"""
Calculates Euclidean distance between two point
distance = square_root( sum(u_i - v_i)^2 )
u: [float, float], point1
v: [float, float], point2
"""
sum_ = sum((u[i] - v[i]) ** 2 for i in range(len(u)))
return sum_ ** (1 / 2)
def get_closer(target, *args):
"""
Return the closest point (from points in `args`) to target
target: [float], target point
*args: [[float]], list of points
"""
min_distance = float("inf")
closer = target
for point in args:
d = distance(point, target)
if d < min_distance:
min_distance = d
closer = point
return closer
def get_center(cluster):
"""
Calculates the centroid point for `cluster`
cluster: [[float]], list of the points in cluster
"""
center = []
n = len(cluster)
for i in range(len(cluster[0])):
c = sum(p[i] for p in cluster) / n
center.append(round(c, 1))
return center
def k_means(data, k=2, *centers):
"""
Recursive k_means algorithm
data: [[float]], data points to consider for clustering
k: int, number of clusters
centers: [[float]], optiona - initial centroids
"""
centers = list(centers) if centers else [data[i] for i in range(k)]
clusters = [[] for _ in range(k)]
for point in data:
nearest = get_closer(point, *centers)
nearest_cluster_index = centers.index(nearest)
clusters[nearest_cluster_index].append(point)
new_centers = [get_center(cluster) for cluster in clusters]
if centers == new_centers:
return clusters, centers
return k_means(data, k, *new_centers)