File size: 1,355 Bytes
c138d1f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri May 20 15:18:56 2022
@author: dinesh
"""
import numpy as np
import math
def kMedoids(D, k, tmax=100):
# determine dimensions of distance matrix D
m, n = D.shape
np.fill_diagonal(D, math.inf)
if k > n:
raise Exception('too many medoids')
# randomly initialize an array of k medoid indices
M = np.arange(n)
np.random.shuffle(M)
M = np.sort(M[:k])
# create a copy of the array of medoid indices
Mnew = np.copy(M)
# initialize a dictionary to represent clusters
C = {}
for t in range(tmax):
# determine clusters, i. e. arrays of data indices
J = np.argmin(D[:,M], axis=1)
for kappa in range(k):
C[kappa] = np.where(J==kappa)[0]
# update cluster medoids
for kappa in range(k):
J = np.mean(D[np.ix_(C[kappa],C[kappa])],axis=1)
j = np.argmin(J)
Mnew[kappa] = C[kappa][j]
np.sort(Mnew)
# check for convergence
if np.array_equal(M, Mnew):
break
M = np.copy(Mnew)
else:
# final update of cluster memberships
J = np.argmin(D[:,M], axis=1)
for kappa in range(k):
C[kappa] = np.where(J==kappa)[0]
np.fill_diagonal(D, 0)
# return results
return M, C |