juanmartip95 commited on
Commit
e6fd8db
1 Parent(s): eec1ef5

Update recommender_system.py

Browse files
Files changed (1) hide show
  1. recommender_system.py +54 -53
recommender_system.py CHANGED
@@ -19,7 +19,8 @@ COLUMN_NOT_DISPLAY = [
19
  "ISBN",
20
  "Location",
21
  "Age",
22
- "User-ID",
 
23
  ]
24
 
25
 
@@ -91,22 +92,22 @@ def explain_recommendation(
91
  suggested_items_id = [id[0] for id in explained]
92
 
93
  suggested_description = (
94
- df.loc[df.ISBN == suggestion][["Book-Title", "ISBN"]]
95
- .drop_duplicates(subset=["ISBN"])["Book-Title"]
96
  .unique()[0]
97
  )
98
  similar_items_description = (
99
- df.loc[df["ISBN"].isin(suggested_items_id)][
100
- ["Book-Title", "ISBN"]
101
  ]
102
- .drop_duplicates(subset=["ISBN"])["Book-Title"]
103
  .unique()
104
  )
105
 
106
  output.append(
107
  f"The item **{suggested_description.strip()}** "
108
  "has been suggested because it is similar to the following products"
109
- " bought by the user:"
110
  )
111
  for description in similar_items_description:
112
  output.append(f"- {description.strip()}")
@@ -119,8 +120,8 @@ def explain_recommendation(
119
 
120
  def print_suggestions(suggestions: List[int], df: pd.DataFrame):
121
  similar_items_description = (
122
- df.loc[df["ISBN"].isin(suggestions)][["Book-Title", "ISBN"]]
123
- .drop_duplicates(subset=["ISBN"])["Book-Title"]
124
  .unique()
125
  )
126
 
@@ -131,7 +132,7 @@ def print_suggestions(suggestions: List[int], df: pd.DataFrame):
131
  st.write("\n".join(output))
132
 
133
  def display_user_rat(user: int, data: pd.DataFrame):
134
- subset = data[data["User-ID"] == user]
135
 
136
  st.write(
137
  "The user {} rated {} distinct books. Here is the rating history: ".format(
@@ -141,10 +142,10 @@ def display_user_rat(user: int, data: pd.DataFrame):
141
 
142
  # Displaying the subset of books rated by the user
143
  st.dataframe(
144
- subset.sort_values("User-ID").drop(
145
  # Do not show the customer since we are display the
146
  # information for a specific customer.
147
- COLUMN_NOT_DISPLAY,
148
  axis=1,
149
  )
150
  )
@@ -154,14 +155,14 @@ def display_user_rat(user: int, data: pd.DataFrame):
154
 
155
 
156
  def _extract_author(df, products):
157
- desc = merged_df[merged_df["ISBN"].isin(products)].drop_duplicates(
158
- "ISBN", ignore_index=True
159
  )[["ISBN", "Book-Author"]]
160
- return desc.set_index("ISBN")
161
  def _extract_title(df, products):
162
- desc = merged_df[merged_df["ISBN"].isin(products)].drop_duplicates(
163
- "ISBN", ignore_index=True
164
- )[["ISBN", "Book-Title"]]
165
  return desc.set_index("ProductIndex")
166
 
167
  def display_recommendation_plots(
@@ -189,32 +190,32 @@ def display_recommendation_plots(
189
  print("=" * 80)
190
 
191
  # Find the purchases of similar users
192
- #bought_by_similar_users = []
193
 
194
- #sim_users, _ = model.similar_users(user_id)
195
 
196
- #for u in sim_users:
197
- # _, sim_purchases = model.user_product_matrix[u].nonzero()
198
- # bought_by_similar_users.append(sim_purchases)
199
 
200
- #bought_by_similar_users = np.unique(np.concatenate(bought_by_similar_users))
201
 
202
- #print("Similar bought computed")
203
- #print(bought_by_similar_users)
204
- #print("=" * 80)
205
 
206
  # Compute the t-sne
207
 
208
  # Concate all the vectors to compute a single time the decomposition
209
- #to_decompose = np.concatenate(
210
- # (
211
- # model.item_factors[suggestions],
212
- # model.item_factors[contributions],
213
- # model.item_factors[bought_by_similar_users],
214
- # )
215
- #)
216
 
217
- #print(f"Shape to decompose: {to_decompose.shape}")
218
 
219
  with st.spinner("Computing plots (this might take around 60 seconds)..."):
220
  elapsed = time.time()
@@ -236,13 +237,13 @@ def display_recommendation_plots(
236
  contribution_dec = decomposed[
237
  len(suggestions) : len(suggestions) + len(contributions), :
238
  ]
239
- items_others_dec = decomposed[-len(bought_by_similar_users) :, :]
240
 
241
  # Also, extract the description to create a nice hover in
242
  # the final plot.
243
 
244
  contribution_description = _extract_title(merged_df, contributions)
245
- #items_other_description = _extract_description(merged_df, bought_by_similar_users)
246
  suggestion_description = _extract_title(merged_df, suggestions)
247
 
248
  # Plot the scatterplot
@@ -255,7 +256,7 @@ def display_recommendation_plots(
255
  y=contribution_dec[:, 1],
256
  mode="markers",
257
  opacity=0.8,
258
- name="Similar bought by user",
259
  marker_symbol="square-open",
260
  marker_color="#010CFA",
261
  marker_size=10,
@@ -263,21 +264,21 @@ def display_recommendation_plots(
263
  )
264
  )
265
 
266
- #fig.add_trace(
267
- # go.Scatter(
268
- # x=items_others_dec[:, 0],
269
- # y=items_others_dec[:, 1],
270
- # mode="markers",
271
- # name="Product bought by similar users",
272
- # opacity=0.7,
273
- # marker_symbol="circle-open",
274
- # marker_color="#FA5F19",
275
- # marker_size=10,
276
- # hovertext=items_other_description.loc[
277
- # bought_by_similar_users
278
- # ].values.squeeze(),
279
- # )
280
- #)
281
 
282
  fig.add_trace(
283
  go.Scatter(
 
19
  "ISBN",
20
  "Location",
21
  "Age",
22
+ "CustomerIndex",
23
+ "ProductIndex",
24
  ]
25
 
26
 
 
92
  suggested_items_id = [id[0] for id in explained]
93
 
94
  suggested_description = (
95
+ df.loc[df.ISBN == suggestion][["Book-Title", "ProductIndex"]]
96
+ .drop_duplicates(subset=["ProductIndex"])["Book-Title"]
97
  .unique()[0]
98
  )
99
  similar_items_description = (
100
+ df.loc[df["ProductIndex"].isin(suggested_items_id)][
101
+ ["Book-Title", "ProductIndex"]
102
  ]
103
+ .drop_duplicates(subset=["ProductIndex"])["Book-Title"]
104
  .unique()
105
  )
106
 
107
  output.append(
108
  f"The item **{suggested_description.strip()}** "
109
  "has been suggested because it is similar to the following products"
110
+ " rated by the user:"
111
  )
112
  for description in similar_items_description:
113
  output.append(f"- {description.strip()}")
 
120
 
121
  def print_suggestions(suggestions: List[int], df: pd.DataFrame):
122
  similar_items_description = (
123
+ df.loc[df["ProductIndex"].isin(suggestions)][["Book-Title", "ProductIndex"]]
124
+ .drop_duplicates(subset=["ProductIndex"])["Book-Title"]
125
  .unique()
126
  )
127
 
 
132
  st.write("\n".join(output))
133
 
134
  def display_user_rat(user: int, data: pd.DataFrame):
135
+ subset = data[data.CustomerIndex == user]
136
 
137
  st.write(
138
  "The user {} rated {} distinct books. Here is the rating history: ".format(
 
142
 
143
  # Displaying the subset of books rated by the user
144
  st.dataframe(
145
+ subset.sort_values("CustomerIndex").drop(
146
  # Do not show the customer since we are display the
147
  # information for a specific customer.
148
+ COLUMN_NOT_DISPLAY+ ["CustomerID"],
149
  axis=1,
150
  )
151
  )
 
155
 
156
 
157
  def _extract_author(df, products):
158
+ desc = merged_df[merged_df["ProductIndex"].isin(products)].drop_duplicates(
159
+ "ProductIndex", ignore_index=True
160
  )[["ISBN", "Book-Author"]]
161
+ return desc.set_index("ProductIndex")
162
  def _extract_title(df, products):
163
+ desc = merged_df[merged_df["ProductIndex"].isin(products)].drop_duplicates(
164
+ "ProductIndex", ignore_index=True
165
+ )[["ProductIndex", "Book-Title"]]
166
  return desc.set_index("ProductIndex")
167
 
168
  def display_recommendation_plots(
 
190
  print("=" * 80)
191
 
192
  # Find the purchases of similar users
193
+ rated_by_similar_users = []
194
 
195
+ sim_users, _ = model.similar_users(user_id)
196
 
197
+ for u in sim_users:
198
+ _, sim_purchases = model.user_product_matrix[u].nonzero()
199
+ rated_by_similar_users.append(sim_purchases)
200
 
201
+ rated_by_similar_users = np.unique(np.concatenate(rated_by_similar_users))
202
 
203
+ print("Similar rated computed")
204
+ print(rated_by_similar_users)
205
+ print("=" * 80)
206
 
207
  # Compute the t-sne
208
 
209
  # Concate all the vectors to compute a single time the decomposition
210
+ to_decompose = np.concatenate(
211
+ (
212
+ model.item_factors[suggestions],
213
+ model.item_factors[contributions],
214
+ model.item_factors[rated_by_similar_users_by_similar_users],
215
+ )
216
+ )
217
 
218
+ print(f"Shape to decompose: {to_decompose.shape}")
219
 
220
  with st.spinner("Computing plots (this might take around 60 seconds)..."):
221
  elapsed = time.time()
 
237
  contribution_dec = decomposed[
238
  len(suggestions) : len(suggestions) + len(contributions), :
239
  ]
240
+ items_others_dec = decomposed[-len(rated_by_similar_users) :, :]
241
 
242
  # Also, extract the description to create a nice hover in
243
  # the final plot.
244
 
245
  contribution_description = _extract_title(merged_df, contributions)
246
+ items_other_description = _extract_title(merged_df, rated_by_similar_users)
247
  suggestion_description = _extract_title(merged_df, suggestions)
248
 
249
  # Plot the scatterplot
 
256
  y=contribution_dec[:, 1],
257
  mode="markers",
258
  opacity=0.8,
259
+ name="Similar rated by user",
260
  marker_symbol="square-open",
261
  marker_color="#010CFA",
262
  marker_size=10,
 
264
  )
265
  )
266
 
267
+ fig.add_trace(
268
+ go.Scatter(
269
+ x=items_others_dec[:, 0],
270
+ y=items_others_dec[:, 1],
271
+ mode="markers",
272
+ name="Product rated by similar users",
273
+ opacity=0.7,
274
+ marker_symbol="circle-open",
275
+ marker_color="#FA5F19",
276
+ marker_size=10,
277
+ hovertext=items_other_description.loc[
278
+ rated_by_similar_users
279
+ ].values.squeeze(),
280
+ )
281
+ )
282
 
283
  fig.add_trace(
284
  go.Scatter(