import pandas as pd import numpy as np from sklearn.cluster import KMeans from sklearn.preprocessing import StandardScaler from sklearn.pipeline import Pipeline def get_scaler(): data = pd.read_csv("data.csv") song_cluster_pipeline = Pipeline([("scaler", StandardScaler()), ("kmeans", KMeans(n_clusters=20, verbose=False))], verbose=False) X = data.select_dtypes(np.number) number_cols = list(X.columns) data["cluster_label"] = song_cluster_pipeline.fit_predict(X) return data, song_cluster_pipeline