Ishaan Shah commited on
Commit
267e3a7
β€’
1 Parent(s): 29af37a
Files changed (8) hide show
  1. Dockerfile +9 -0
  2. README.md +6 -4
  3. api.py +25 -0
  4. app.py +46 -0
  5. model.pkl +3 -0
  6. requirements.txt +0 -0
  7. train.py +41 -0
  8. vectorizer.pkl +3 -0
Dockerfile ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ COPY . .
6
+
7
+ RUN pip install --no-cache-dir -r requirements.txt
8
+
9
+ CMD ["uvicorn", "api:app", "--host", "0.0.0.0", "--port", "8000"]
README.md CHANGED
@@ -1,9 +1,11 @@
1
  ---
2
- title: Prodrec
3
- emoji: 😻
4
- colorFrom: green
5
  colorTo: blue
6
- sdk: docker
 
 
7
  pinned: false
8
  license: mit
9
  ---
 
1
  ---
2
+ title: Prodrectest
3
+ emoji: πŸ“‰
4
+ colorFrom: pink
5
  colorTo: blue
6
+ sdk: streamlit
7
+ sdk_version: 1.35.0
8
+ app_file: app.py
9
  pinned: false
10
  license: mit
11
  ---
api.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ import joblib
3
+
4
+ def show_recommendations(product):
5
+ Y = vectorizer.transform([product])
6
+ prediction = model.predict(Y)
7
+ return prediction,
8
+
9
+ def get_cluster_terms(cluster_index):
10
+ cluster_terms = [terms[ind] for ind in order_centroids[cluster_index, :10]]
11
+ return cluster_terms
12
+
13
+ model = joblib.load("./model.pkl")
14
+ vectorizer = joblib.load("./vectorizer.pkl")
15
+
16
+ order_centroids = model.cluster_centers_.argsort()[:, ::-1]
17
+ terms = vectorizer.get_feature_names_out()
18
+
19
+ app = FastAPI()
20
+
21
+ @app.post("/inference")
22
+ def get_recommendations(product: str):
23
+ cluster_index = int(show_recommendations(product)[0])
24
+ cluster_terms = get_cluster_terms(cluster_index)
25
+ return {"cluster": cluster_index, "top_terms": cluster_terms}
app.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from sklearn.feature_extraction.text import TfidfVectorizer
4
+ from sklearn.cluster import KMeans
5
+ from fastapi import FastAPI
6
+ import joblib
7
+
8
+ def show_recommendations(product):
9
+ Y = vectorizer.transform([product])
10
+ prediction = model.predict(Y)
11
+ return prediction
12
+
13
+ def print_cluster(i):
14
+ for ind in order_centroids[i, :10]:
15
+ print(' %s' % terms[ind]),
16
+
17
+ def get_cluster_terms(cluster_index):
18
+ cluster_terms = [terms[ind] for ind in order_centroids[cluster_index, :10]]
19
+ return cluster_terms
20
+
21
+ model = joblib.load("./model.pkl")
22
+ vectorizer = joblib.load("./vectorizer.pkl")
23
+
24
+ order_centroids = model.cluster_centers_.argsort()[:, ::-1]
25
+ terms = vectorizer.get_feature_names_out()
26
+
27
+ st.title("Product Recommendation System")
28
+
29
+ # Input for product description
30
+ product_input = st.text_input("Enter a product description:", "")
31
+
32
+ # Button to trigger recommendation
33
+ if st.button("Get Recommendations"):
34
+ if product_input:
35
+ # Get cluster for the input product
36
+ cluster_index = show_recommendations(product_input)[0]
37
+
38
+ # Display the cluster number
39
+ st.write(f"The product belongs to cluster: {cluster_index}")
40
+
41
+ # Display the top terms in the cluster
42
+ cluster_terms = get_cluster_terms(cluster_index)
43
+ st.write("Top terms in this cluster:")
44
+ st.write(", ".join(cluster_terms))
45
+ else:
46
+ st.write("Please enter a product description.")
model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb889cc791652561f2c91b22cee7216ef634479ed86a5a7602de6f21f5f24ad6
3
+ size 717173
requirements.txt ADDED
Binary file (286 Bytes). View file
 
train.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from sklearn.feature_extraction.text import TfidfVectorizer
3
+ from sklearn.cluster import KMeans
4
+ import pickle
5
+
6
+ product_descriptions = pd.read_csv("./train.csv")
7
+ product_descriptions = product_descriptions.dropna()
8
+
9
+ vectorizer = TfidfVectorizer(stop_words='english')
10
+ X1 = vectorizer.fit_transform(product_descriptions["value"])
11
+
12
+ true_k = 10
13
+ model = KMeans(n_clusters=true_k, init='k-means++', max_iter=100, n_init=1)
14
+ model.fit(X1)
15
+
16
+ def show_recommendations(product):
17
+ Y = vectorizer.transform([product])
18
+ prediction = model.predict(Y)
19
+ return prediction
20
+
21
+ def print_cluster(i):
22
+ for ind in order_centroids[i, :10]:
23
+ print(' %s' % terms[ind]),
24
+
25
+ def get_cluster_terms(cluster_index):
26
+ cluster_terms = [terms[ind] for ind in order_centroids[cluster_index, :10]]
27
+ return cluster_terms
28
+
29
+ order_centroids = model.cluster_centers_.argsort()[:, ::-1]
30
+ terms = vectorizer.get_feature_names_out()
31
+
32
+ print(print_cluster(show_recommendations("red dress")[0]))
33
+ print(print_cluster(show_recommendations("water")[0]))
34
+ print(print_cluster(show_recommendations("shoes")[0]))
35
+ print(print_cluster(show_recommendations("cutting tool")[0]))
36
+
37
+ pickle.dump(model, open("model.pkl", "wb"))
38
+ pickle.dump(vectorizer, open("vectorizer.pkl", "wb"))
39
+
40
+
41
+
vectorizer.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7ba96577981c278c57616ebfe977663c2e82e0be3e32282a517a50baaa99b35
3
+ size 272049