File size: 639 Bytes
63a8664 ddd74cb 63a8664 e7b83d9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 |
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
def get_scaler():
data = pd.read_csv("data.csv")
song_cluster_pipeline = Pipeline([("scaler", StandardScaler()),
("kmeans", KMeans(n_clusters=20,
verbose=False))],
verbose=False)
X = data.select_dtypes(np.number)
number_cols = list(X.columns)
data["cluster_label"] = song_cluster_pipeline.fit_predict(X)
return data, song_cluster_pipeline
|