Amit Kumar
add numpy import
ddd74cb
raw
history blame contribute delete
No virus
639 Bytes
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
def get_scaler():
data = pd.read_csv("data.csv")
song_cluster_pipeline = Pipeline([("scaler", StandardScaler()),
("kmeans", KMeans(n_clusters=20,
verbose=False))],
verbose=False)
X = data.select_dtypes(np.number)
number_cols = list(X.columns)
data["cluster_label"] = song_cluster_pipeline.fit_predict(X)
return data, song_cluster_pipeline