juanmartip95 commited on
Commit
9112b55
1 Parent(s): 8d44559

Update recommender_system.py

Browse files
Files changed (1) hide show
  1. recommender_system.py +70 -65
recommender_system.py CHANGED
@@ -16,11 +16,10 @@ import plotly.graph_objects as go
16
 
17
 
18
  COLUMN_NOT_DISPLAY = [
19
- "StockCode",
20
- "UnitPrice",
21
- "Country",
22
- "CustomerIndex",
23
- "ProductIndex",
24
  ]
25
 
26
 
@@ -92,15 +91,15 @@ def explain_recommendation(
92
  suggested_items_id = [id[0] for id in explained]
93
 
94
  suggested_description = (
95
- df.loc[df.ProductIndex == suggestion][["Description", "ProductIndex"]]
96
- .drop_duplicates(subset=["ProductIndex"])["Description"]
97
  .unique()[0]
98
  )
99
  similar_items_description = (
100
- df.loc[df["ProductIndex"].isin(suggested_items_id)][
101
- ["Description", "ProductIndex"]
102
  ]
103
- .drop_duplicates(subset=["ProductIndex"])["Description"]
104
  .unique()
105
  )
106
 
@@ -120,8 +119,8 @@ def explain_recommendation(
120
 
121
  def print_suggestions(suggestions: List[int], df: pd.DataFrame):
122
  similar_items_description = (
123
- df.loc[df["ProductIndex"].isin(suggestions)][["Description", "ProductIndex"]]
124
- .drop_duplicates(subset=["ProductIndex"])["Description"]
125
  .unique()
126
  )
127
 
@@ -131,35 +130,39 @@ def print_suggestions(suggestions: List[int], df: pd.DataFrame):
131
 
132
  st.write("\n".join(output))
133
 
134
-
135
- def display_user_char(user: int, data: pd.DataFrame):
136
- subset = data[data.CustomerIndex == user]
137
- # products = subset.groupby("ProductIndex").agg(
138
- # {"Description": lambda x: x.iloc[0], "Quantity": sum}
139
- # )
140
 
141
  st.write(
142
- "The user {} bought {} distinct products. Here is the purchase history: ".format(
143
- user, subset["Description"].nunique()
144
  )
145
  )
 
 
146
  st.dataframe(
147
- subset.sort_values("InvoiceDate").drop(
148
  # Do not show the customer since we are display the
149
  # information for a specific customer.
150
- COLUMN_NOT_DISPLAY + ["CustomerID"],
151
  axis=1,
152
  )
153
  )
 
154
  st.write("-----")
155
 
156
 
157
- def _extract_description(df, products):
158
- desc = df[df["ProductIndex"].isin(products)].drop_duplicates(
159
- "ProductIndex", ignore_index=True
160
- )[["ProductIndex", "Description"]]
161
- return desc.set_index("ProductIndex")
162
 
 
 
 
 
 
 
 
 
 
 
163
 
164
  def display_recommendation_plots(
165
  user_id: int,
@@ -186,32 +189,32 @@ def display_recommendation_plots(
186
  print("=" * 80)
187
 
188
  # Find the purchases of similar users
189
- bought_by_similar_users = []
190
 
191
- sim_users, _ = model.similar_users(user_id)
192
 
193
- for u in sim_users:
194
- _, sim_purchases = model.user_product_matrix[u].nonzero()
195
- bought_by_similar_users.append(sim_purchases)
196
 
197
- bought_by_similar_users = np.unique(np.concatenate(bought_by_similar_users))
198
 
199
- print("Similar bought computed")
200
- print(bought_by_similar_users)
201
- print("=" * 80)
202
 
203
  # Compute the t-sne
204
 
205
  # Concate all the vectors to compute a single time the decomposition
206
- to_decompose = np.concatenate(
207
- (
208
- model.item_factors[suggestions],
209
- model.item_factors[contributions],
210
- model.item_factors[bought_by_similar_users],
211
- )
212
- )
213
 
214
- print(f"Shape to decompose: {to_decompose.shape}")
215
 
216
  with st.spinner("Computing plots (this might take around 60 seconds)..."):
217
  elapsed = time.time()
@@ -238,9 +241,9 @@ def display_recommendation_plots(
238
  # Also, extract the description to create a nice hover in
239
  # the final plot.
240
 
241
- contribution_description = _extract_description(df, contributions)
242
- items_other_description = _extract_description(df, bought_by_similar_users)
243
- suggestion_description = _extract_description(df, suggestions)
244
 
245
  # Plot the scatterplot
246
 
@@ -260,21 +263,21 @@ def display_recommendation_plots(
260
  )
261
  )
262
 
263
- fig.add_trace(
264
- go.Scatter(
265
- x=items_others_dec[:, 0],
266
- y=items_others_dec[:, 1],
267
- mode="markers",
268
- name="Product bought by similar users",
269
- opacity=0.7,
270
- marker_symbol="circle-open",
271
- marker_color="#FA5F19",
272
- marker_size=10,
273
- hovertext=items_other_description.loc[
274
- bought_by_similar_users
275
- ].values.squeeze(),
276
- )
277
- )
278
 
279
  fig.add_trace(
280
  go.Scatter(
@@ -310,9 +313,11 @@ def _tsne_decomposition(data: np.ndarray, tsne_args: Dict[str, Any]):
310
  def main():
311
  # Load and process data
312
  data, users, products = load_and_preprocess_data()
 
 
313
  recommender = create_and_fit_recommender(
314
  "als",
315
- data["Quantity"],
316
  users,
317
  products,
318
  )
@@ -332,7 +337,7 @@ The dataset used for these computations is the following:
332
  # Convert to int just to display the column without trailing decimals.
333
  # @note: I know I can use the "format" function of pandas, but I found out
334
  # it is super slow when fomratting large tables.
335
- to_display["Price"] = to_display["Price"].astype(int)
336
 
337
  # Show the data
338
  st.dataframe(
@@ -353,7 +358,7 @@ The dataset used for these computations is the following:
353
  submitted = st.form_submit_button("Recommend!")
354
  if submitted:
355
  # show_purhcase_history(user, data)
356
- display_user_char(user, data)
357
  suggestions_and_score = recommender.recommend_products(
358
  user, items_to_recommend
359
  )
 
16
 
17
 
18
  COLUMN_NOT_DISPLAY = [
19
+ "ISBN",
20
+ "Location",
21
+ "Age",
22
+ "User-ID",
 
23
  ]
24
 
25
 
 
91
  suggested_items_id = [id[0] for id in explained]
92
 
93
  suggested_description = (
94
+ df.loc[df.ISBN == suggestion][["Book-Title", "ISBN"]]
95
+ .drop_duplicates(subset=["ISBN"])["Book-Title"]
96
  .unique()[0]
97
  )
98
  similar_items_description = (
99
+ df.loc[df["ISBN"].isin(suggested_items_id)][
100
+ ["Book-Title", "ISBN"]
101
  ]
102
+ .drop_duplicates(subset=["ISBN"])["Book-Title"]
103
  .unique()
104
  )
105
 
 
119
 
120
  def print_suggestions(suggestions: List[int], df: pd.DataFrame):
121
  similar_items_description = (
122
+ df.loc[df["ISBN"].isin(suggestions)][["Book-Title", "ISBN"]]
123
+ .drop_duplicates(subset=["ISBN"])["Book-Title"]
124
  .unique()
125
  )
126
 
 
130
 
131
  st.write("\n".join(output))
132
 
133
+ def display_user_rat(user: int, data: pd.DataFrame):
134
+ subset = data[data["User-ID"] == user]
 
 
 
 
135
 
136
  st.write(
137
+ "The user {} rated {} distinct books. Here is the rating history: ".format(
138
+ user, subset["Book-Title"].nunique()
139
  )
140
  )
141
+
142
+ # Displaying the subset of books rated by the user
143
  st.dataframe(
144
+ subset.sort_values("User-ID").drop(
145
  # Do not show the customer since we are display the
146
  # information for a specific customer.
147
+ COLUMN_NOT_DISPLAY,
148
  axis=1,
149
  )
150
  )
151
+
152
  st.write("-----")
153
 
154
 
 
 
 
 
 
155
 
156
+ def _extract_author(df, products):
157
+ desc = merged_df[merged_df["ISBN"].isin(products)].drop_duplicates(
158
+ "ISBN", ignore_index=True
159
+ )[["ISBN", "Book-Author"]]
160
+ return desc.set_index("ISBN")
161
+ def _extract_title(df, products):
162
+ desc = merged_df[merged_df["ISBN"].isin(products)].drop_duplicates(
163
+ "ISBN", ignore_index=True
164
+ )[["ISBN", "Book-Title"]]
165
+ return desc.set_index("ProductIndex")
166
 
167
  def display_recommendation_plots(
168
  user_id: int,
 
189
  print("=" * 80)
190
 
191
  # Find the purchases of similar users
192
+ #bought_by_similar_users = []
193
 
194
+ #sim_users, _ = model.similar_users(user_id)
195
 
196
+ #for u in sim_users:
197
+ # _, sim_purchases = model.user_product_matrix[u].nonzero()
198
+ # bought_by_similar_users.append(sim_purchases)
199
 
200
+ #bought_by_similar_users = np.unique(np.concatenate(bought_by_similar_users))
201
 
202
+ #print("Similar bought computed")
203
+ #print(bought_by_similar_users)
204
+ #print("=" * 80)
205
 
206
  # Compute the t-sne
207
 
208
  # Concate all the vectors to compute a single time the decomposition
209
+ #to_decompose = np.concatenate(
210
+ # (
211
+ # model.item_factors[suggestions],
212
+ # model.item_factors[contributions],
213
+ # model.item_factors[bought_by_similar_users],
214
+ # )
215
+ #)
216
 
217
+ #print(f"Shape to decompose: {to_decompose.shape}")
218
 
219
  with st.spinner("Computing plots (this might take around 60 seconds)..."):
220
  elapsed = time.time()
 
241
  # Also, extract the description to create a nice hover in
242
  # the final plot.
243
 
244
+ contribution_description = _extract_description(merged_df, contributions)
245
+ #items_other_description = _extract_description(merged_df, bought_by_similar_users)
246
+ suggestion_description = _extract_description(merged_df, suggestions)
247
 
248
  # Plot the scatterplot
249
 
 
263
  )
264
  )
265
 
266
+ #fig.add_trace(
267
+ # go.Scatter(
268
+ # x=items_others_dec[:, 0],
269
+ # y=items_others_dec[:, 1],
270
+ # mode="markers",
271
+ # name="Product bought by similar users",
272
+ # opacity=0.7,
273
+ # marker_symbol="circle-open",
274
+ # marker_color="#FA5F19",
275
+ # marker_size=10,
276
+ # hovertext=items_other_description.loc[
277
+ # bought_by_similar_users
278
+ # ].values.squeeze(),
279
+ # )
280
+ #)
281
 
282
  fig.add_trace(
283
  go.Scatter(
 
313
  def main():
314
  # Load and process data
315
  data, users, products = load_and_preprocess_data()
316
+ users=data['User-ID']
317
+ products=data['ISBN']
318
  recommender = create_and_fit_recommender(
319
  "als",
320
+ data["Book-Rating"],
321
  users,
322
  products,
323
  )
 
337
  # Convert to int just to display the column without trailing decimals.
338
  # @note: I know I can use the "format" function of pandas, but I found out
339
  # it is super slow when fomratting large tables.
340
+ to_display["Book-Rating"] = to_display["Book-Rating"].astype(int)
341
 
342
  # Show the data
343
  st.dataframe(
 
358
  submitted = st.form_submit_button("Recommend!")
359
  if submitted:
360
  # show_purhcase_history(user, data)
361
+ display_user_rat(user, data)
362
  suggestions_and_score = recommender.recommend_products(
363
  user, items_to_recommend
364
  )