Amit Kumar
fit data using kmeans
e7b83d9
raw
history blame
477 Bytes
def get_scaler():
data = pd.read_csv("data.csv")
song_cluster_pipeline = Pipeline([("scaler", StandardScaler()),
("kmeans", KMeans(n_clusters=20,
verbose=False))],
verbose=False)
X = data.select_dtypes(np.number)
number_cols = list(X.columns)
data["cluster_label"] = song_cluster_pipeline.fit_predict(X)
return data, song_cluster_pipeline