|
import pandas as pd
|
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
from sklearn.cluster import KMeans
|
|
import pickle
|
|
|
|
product_descriptions = pd.read_csv("./train.csv")
|
|
product_descriptions = product_descriptions.dropna()
|
|
|
|
vectorizer = TfidfVectorizer(stop_words='english')
|
|
X1 = vectorizer.fit_transform(product_descriptions["product_descriptions"])
|
|
|
|
true_k = 10
|
|
model = KMeans(n_clusters=true_k, init='k-means++', max_iter=100, n_init=1)
|
|
model.fit(X1)
|
|
|
|
|
|
pickle.dump(model, open("model.pkl", "wb"))
|
|
pickle.dump(vectorizer, open("vectorizer.pkl", "wb"))
|
|
|
|
|
|
|
|
|