def get_scaler(): | |
data = pd.read_csv("data.csv") | |
song_cluster_pipeline = Pipeline([("scaler", StandardScaler()), | |
("kmeans", KMeans(n_clusters=20, | |
verbose=False))], | |
verbose=False) | |
X = data.select_dtypes(np.number) | |
number_cols = list(X.columns) | |
data["cluster_label"] = song_cluster_pipeline.fit_predict(X) | |
return data, song_cluster_pipeline | |