File size: 639 Bytes
63a8664
ddd74cb
63a8664
 
 
 
e7b83d9
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
import pandas as pd
import numpy as np

from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

def get_scaler():

  data = pd.read_csv("data.csv")
  song_cluster_pipeline = Pipeline([("scaler", StandardScaler()),
                                    ("kmeans", KMeans(n_clusters=20,
                                                    verbose=False))],
                                  verbose=False)

  X = data.select_dtypes(np.number)
  number_cols = list(X.columns)

  data["cluster_label"] = song_cluster_pipeline.fit_predict(X)
  return data, song_cluster_pipeline