#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Fri May 20 15:18:56 2022 @author: dinesh """ import numpy as np import math def kMedoids(D, k, tmax=100): # determine dimensions of distance matrix D m, n = D.shape np.fill_diagonal(D, math.inf) if k > n: raise Exception('too many medoids') # randomly initialize an array of k medoid indices M = np.arange(n) np.random.shuffle(M) M = np.sort(M[:k]) # create a copy of the array of medoid indices Mnew = np.copy(M) # initialize a dictionary to represent clusters C = {} for t in range(tmax): # determine clusters, i. e. arrays of data indices J = np.argmin(D[:,M], axis=1) for kappa in range(k): C[kappa] = np.where(J==kappa)[0] # update cluster medoids for kappa in range(k): J = np.mean(D[np.ix_(C[kappa],C[kappa])],axis=1) j = np.argmin(J) Mnew[kappa] = C[kappa][j] np.sort(Mnew) # check for convergence if np.array_equal(M, Mnew): break M = np.copy(Mnew) else: # final update of cluster memberships J = np.argmin(D[:,M], axis=1) for kappa in range(k): C[kappa] = np.where(J==kappa)[0] np.fill_diagonal(D, 0) # return results return M, C