| import pandas as pd | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.cluster import KMeans | |
| import pickle | |
| product_descriptions = pd.read_csv("./train.csv") | |
| product_descriptions = product_descriptions.dropna() | |
| vectorizer = TfidfVectorizer(stop_words='english') | |
| X1 = vectorizer.fit_transform(product_descriptions["product_descriptions"]) | |
| true_k = 10 | |
| model = KMeans(n_clusters=true_k, init='k-means++', max_iter=100, n_init=1) | |
| model.fit(X1) | |
| pickle.dump(model, open("model.pkl", "wb")) | |
| pickle.dump(vectorizer, open("vectorizer.pkl", "wb")) | |