Spaces:
Sleeping
Sleeping
juanmartip95
commited on
Commit
•
9112b55
1
Parent(s):
8d44559
Update recommender_system.py
Browse files- recommender_system.py +70 -65
recommender_system.py
CHANGED
@@ -16,11 +16,10 @@ import plotly.graph_objects as go
|
|
16 |
|
17 |
|
18 |
COLUMN_NOT_DISPLAY = [
|
19 |
-
"
|
20 |
-
"
|
21 |
-
"
|
22 |
-
"
|
23 |
-
"ProductIndex",
|
24 |
]
|
25 |
|
26 |
|
@@ -92,15 +91,15 @@ def explain_recommendation(
|
|
92 |
suggested_items_id = [id[0] for id in explained]
|
93 |
|
94 |
suggested_description = (
|
95 |
-
df.loc[df.
|
96 |
-
.drop_duplicates(subset=["
|
97 |
.unique()[0]
|
98 |
)
|
99 |
similar_items_description = (
|
100 |
-
df.loc[df["
|
101 |
-
["
|
102 |
]
|
103 |
-
.drop_duplicates(subset=["
|
104 |
.unique()
|
105 |
)
|
106 |
|
@@ -120,8 +119,8 @@ def explain_recommendation(
|
|
120 |
|
121 |
def print_suggestions(suggestions: List[int], df: pd.DataFrame):
|
122 |
similar_items_description = (
|
123 |
-
df.loc[df["
|
124 |
-
.drop_duplicates(subset=["
|
125 |
.unique()
|
126 |
)
|
127 |
|
@@ -131,35 +130,39 @@ def print_suggestions(suggestions: List[int], df: pd.DataFrame):
|
|
131 |
|
132 |
st.write("\n".join(output))
|
133 |
|
134 |
-
|
135 |
-
|
136 |
-
subset = data[data.CustomerIndex == user]
|
137 |
-
# products = subset.groupby("ProductIndex").agg(
|
138 |
-
# {"Description": lambda x: x.iloc[0], "Quantity": sum}
|
139 |
-
# )
|
140 |
|
141 |
st.write(
|
142 |
-
"The user {}
|
143 |
-
user, subset["
|
144 |
)
|
145 |
)
|
|
|
|
|
146 |
st.dataframe(
|
147 |
-
subset.sort_values("
|
148 |
# Do not show the customer since we are display the
|
149 |
# information for a specific customer.
|
150 |
-
COLUMN_NOT_DISPLAY
|
151 |
axis=1,
|
152 |
)
|
153 |
)
|
|
|
154 |
st.write("-----")
|
155 |
|
156 |
|
157 |
-
def _extract_description(df, products):
|
158 |
-
desc = df[df["ProductIndex"].isin(products)].drop_duplicates(
|
159 |
-
"ProductIndex", ignore_index=True
|
160 |
-
)[["ProductIndex", "Description"]]
|
161 |
-
return desc.set_index("ProductIndex")
|
162 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
|
164 |
def display_recommendation_plots(
|
165 |
user_id: int,
|
@@ -186,32 +189,32 @@ def display_recommendation_plots(
|
|
186 |
print("=" * 80)
|
187 |
|
188 |
# Find the purchases of similar users
|
189 |
-
bought_by_similar_users = []
|
190 |
|
191 |
-
sim_users, _ = model.similar_users(user_id)
|
192 |
|
193 |
-
for u in sim_users:
|
194 |
-
|
195 |
-
|
196 |
|
197 |
-
bought_by_similar_users = np.unique(np.concatenate(bought_by_similar_users))
|
198 |
|
199 |
-
print("Similar bought computed")
|
200 |
-
print(bought_by_similar_users)
|
201 |
-
print("=" * 80)
|
202 |
|
203 |
# Compute the t-sne
|
204 |
|
205 |
# Concate all the vectors to compute a single time the decomposition
|
206 |
-
to_decompose = np.concatenate(
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
)
|
213 |
|
214 |
-
print(f"Shape to decompose: {to_decompose.shape}")
|
215 |
|
216 |
with st.spinner("Computing plots (this might take around 60 seconds)..."):
|
217 |
elapsed = time.time()
|
@@ -238,9 +241,9 @@ def display_recommendation_plots(
|
|
238 |
# Also, extract the description to create a nice hover in
|
239 |
# the final plot.
|
240 |
|
241 |
-
contribution_description = _extract_description(
|
242 |
-
items_other_description = _extract_description(
|
243 |
-
suggestion_description = _extract_description(
|
244 |
|
245 |
# Plot the scatterplot
|
246 |
|
@@ -260,21 +263,21 @@ def display_recommendation_plots(
|
|
260 |
)
|
261 |
)
|
262 |
|
263 |
-
fig.add_trace(
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
)
|
278 |
|
279 |
fig.add_trace(
|
280 |
go.Scatter(
|
@@ -310,9 +313,11 @@ def _tsne_decomposition(data: np.ndarray, tsne_args: Dict[str, Any]):
|
|
310 |
def main():
|
311 |
# Load and process data
|
312 |
data, users, products = load_and_preprocess_data()
|
|
|
|
|
313 |
recommender = create_and_fit_recommender(
|
314 |
"als",
|
315 |
-
data["
|
316 |
users,
|
317 |
products,
|
318 |
)
|
@@ -332,7 +337,7 @@ The dataset used for these computations is the following:
|
|
332 |
# Convert to int just to display the column without trailing decimals.
|
333 |
# @note: I know I can use the "format" function of pandas, but I found out
|
334 |
# it is super slow when fomratting large tables.
|
335 |
-
to_display["
|
336 |
|
337 |
# Show the data
|
338 |
st.dataframe(
|
@@ -353,7 +358,7 @@ The dataset used for these computations is the following:
|
|
353 |
submitted = st.form_submit_button("Recommend!")
|
354 |
if submitted:
|
355 |
# show_purhcase_history(user, data)
|
356 |
-
|
357 |
suggestions_and_score = recommender.recommend_products(
|
358 |
user, items_to_recommend
|
359 |
)
|
|
|
16 |
|
17 |
|
18 |
COLUMN_NOT_DISPLAY = [
|
19 |
+
"ISBN",
|
20 |
+
"Location",
|
21 |
+
"Age",
|
22 |
+
"User-ID",
|
|
|
23 |
]
|
24 |
|
25 |
|
|
|
91 |
suggested_items_id = [id[0] for id in explained]
|
92 |
|
93 |
suggested_description = (
|
94 |
+
df.loc[df.ISBN == suggestion][["Book-Title", "ISBN"]]
|
95 |
+
.drop_duplicates(subset=["ISBN"])["Book-Title"]
|
96 |
.unique()[0]
|
97 |
)
|
98 |
similar_items_description = (
|
99 |
+
df.loc[df["ISBN"].isin(suggested_items_id)][
|
100 |
+
["Book-Title", "ISBN"]
|
101 |
]
|
102 |
+
.drop_duplicates(subset=["ISBN"])["Book-Title"]
|
103 |
.unique()
|
104 |
)
|
105 |
|
|
|
119 |
|
120 |
def print_suggestions(suggestions: List[int], df: pd.DataFrame):
|
121 |
similar_items_description = (
|
122 |
+
df.loc[df["ISBN"].isin(suggestions)][["Book-Title", "ISBN"]]
|
123 |
+
.drop_duplicates(subset=["ISBN"])["Book-Title"]
|
124 |
.unique()
|
125 |
)
|
126 |
|
|
|
130 |
|
131 |
st.write("\n".join(output))
|
132 |
|
133 |
+
def display_user_rat(user: int, data: pd.DataFrame):
|
134 |
+
subset = data[data["User-ID"] == user]
|
|
|
|
|
|
|
|
|
135 |
|
136 |
st.write(
|
137 |
+
"The user {} rated {} distinct books. Here is the rating history: ".format(
|
138 |
+
user, subset["Book-Title"].nunique()
|
139 |
)
|
140 |
)
|
141 |
+
|
142 |
+
# Displaying the subset of books rated by the user
|
143 |
st.dataframe(
|
144 |
+
subset.sort_values("User-ID").drop(
|
145 |
# Do not show the customer since we are display the
|
146 |
# information for a specific customer.
|
147 |
+
COLUMN_NOT_DISPLAY,
|
148 |
axis=1,
|
149 |
)
|
150 |
)
|
151 |
+
|
152 |
st.write("-----")
|
153 |
|
154 |
|
|
|
|
|
|
|
|
|
|
|
155 |
|
156 |
+
def _extract_author(df, products):
|
157 |
+
desc = merged_df[merged_df["ISBN"].isin(products)].drop_duplicates(
|
158 |
+
"ISBN", ignore_index=True
|
159 |
+
)[["ISBN", "Book-Author"]]
|
160 |
+
return desc.set_index("ISBN")
|
161 |
+
def _extract_title(df, products):
|
162 |
+
desc = merged_df[merged_df["ISBN"].isin(products)].drop_duplicates(
|
163 |
+
"ISBN", ignore_index=True
|
164 |
+
)[["ISBN", "Book-Title"]]
|
165 |
+
return desc.set_index("ProductIndex")
|
166 |
|
167 |
def display_recommendation_plots(
|
168 |
user_id: int,
|
|
|
189 |
print("=" * 80)
|
190 |
|
191 |
# Find the purchases of similar users
|
192 |
+
#bought_by_similar_users = []
|
193 |
|
194 |
+
#sim_users, _ = model.similar_users(user_id)
|
195 |
|
196 |
+
#for u in sim_users:
|
197 |
+
# _, sim_purchases = model.user_product_matrix[u].nonzero()
|
198 |
+
# bought_by_similar_users.append(sim_purchases)
|
199 |
|
200 |
+
#bought_by_similar_users = np.unique(np.concatenate(bought_by_similar_users))
|
201 |
|
202 |
+
#print("Similar bought computed")
|
203 |
+
#print(bought_by_similar_users)
|
204 |
+
#print("=" * 80)
|
205 |
|
206 |
# Compute the t-sne
|
207 |
|
208 |
# Concate all the vectors to compute a single time the decomposition
|
209 |
+
#to_decompose = np.concatenate(
|
210 |
+
# (
|
211 |
+
# model.item_factors[suggestions],
|
212 |
+
# model.item_factors[contributions],
|
213 |
+
# model.item_factors[bought_by_similar_users],
|
214 |
+
# )
|
215 |
+
#)
|
216 |
|
217 |
+
#print(f"Shape to decompose: {to_decompose.shape}")
|
218 |
|
219 |
with st.spinner("Computing plots (this might take around 60 seconds)..."):
|
220 |
elapsed = time.time()
|
|
|
241 |
# Also, extract the description to create a nice hover in
|
242 |
# the final plot.
|
243 |
|
244 |
+
contribution_description = _extract_description(merged_df, contributions)
|
245 |
+
#items_other_description = _extract_description(merged_df, bought_by_similar_users)
|
246 |
+
suggestion_description = _extract_description(merged_df, suggestions)
|
247 |
|
248 |
# Plot the scatterplot
|
249 |
|
|
|
263 |
)
|
264 |
)
|
265 |
|
266 |
+
#fig.add_trace(
|
267 |
+
# go.Scatter(
|
268 |
+
# x=items_others_dec[:, 0],
|
269 |
+
# y=items_others_dec[:, 1],
|
270 |
+
# mode="markers",
|
271 |
+
# name="Product bought by similar users",
|
272 |
+
# opacity=0.7,
|
273 |
+
# marker_symbol="circle-open",
|
274 |
+
# marker_color="#FA5F19",
|
275 |
+
# marker_size=10,
|
276 |
+
# hovertext=items_other_description.loc[
|
277 |
+
# bought_by_similar_users
|
278 |
+
# ].values.squeeze(),
|
279 |
+
# )
|
280 |
+
#)
|
281 |
|
282 |
fig.add_trace(
|
283 |
go.Scatter(
|
|
|
313 |
def main():
|
314 |
# Load and process data
|
315 |
data, users, products = load_and_preprocess_data()
|
316 |
+
users=data['User-ID']
|
317 |
+
products=data['ISBN']
|
318 |
recommender = create_and_fit_recommender(
|
319 |
"als",
|
320 |
+
data["Book-Rating"],
|
321 |
users,
|
322 |
products,
|
323 |
)
|
|
|
337 |
# Convert to int just to display the column without trailing decimals.
|
338 |
# @note: I know I can use the "format" function of pandas, but I found out
|
339 |
# it is super slow when fomratting large tables.
|
340 |
+
to_display["Book-Rating"] = to_display["Book-Rating"].astype(int)
|
341 |
|
342 |
# Show the data
|
343 |
st.dataframe(
|
|
|
358 |
submitted = st.form_submit_button("Recommend!")
|
359 |
if submitted:
|
360 |
# show_purhcase_history(user, data)
|
361 |
+
display_user_rat(user, data)
|
362 |
suggestions_and_score = recommender.recommend_products(
|
363 |
user, items_to_recommend
|
364 |
)
|