Spaces:
Sleeping
Sleeping
juanmartip95
commited on
Commit
•
e6fd8db
1
Parent(s):
eec1ef5
Update recommender_system.py
Browse files- recommender_system.py +54 -53
recommender_system.py
CHANGED
@@ -19,7 +19,8 @@ COLUMN_NOT_DISPLAY = [
|
|
19 |
"ISBN",
|
20 |
"Location",
|
21 |
"Age",
|
22 |
-
"
|
|
|
23 |
]
|
24 |
|
25 |
|
@@ -91,22 +92,22 @@ def explain_recommendation(
|
|
91 |
suggested_items_id = [id[0] for id in explained]
|
92 |
|
93 |
suggested_description = (
|
94 |
-
df.loc[df.ISBN == suggestion][["Book-Title", "
|
95 |
-
.drop_duplicates(subset=["
|
96 |
.unique()[0]
|
97 |
)
|
98 |
similar_items_description = (
|
99 |
-
df.loc[df["
|
100 |
-
["Book-Title", "
|
101 |
]
|
102 |
-
.drop_duplicates(subset=["
|
103 |
.unique()
|
104 |
)
|
105 |
|
106 |
output.append(
|
107 |
f"The item **{suggested_description.strip()}** "
|
108 |
"has been suggested because it is similar to the following products"
|
109 |
-
"
|
110 |
)
|
111 |
for description in similar_items_description:
|
112 |
output.append(f"- {description.strip()}")
|
@@ -119,8 +120,8 @@ def explain_recommendation(
|
|
119 |
|
120 |
def print_suggestions(suggestions: List[int], df: pd.DataFrame):
|
121 |
similar_items_description = (
|
122 |
-
df.loc[df["
|
123 |
-
.drop_duplicates(subset=["
|
124 |
.unique()
|
125 |
)
|
126 |
|
@@ -131,7 +132,7 @@ def print_suggestions(suggestions: List[int], df: pd.DataFrame):
|
|
131 |
st.write("\n".join(output))
|
132 |
|
133 |
def display_user_rat(user: int, data: pd.DataFrame):
|
134 |
-
subset = data[data
|
135 |
|
136 |
st.write(
|
137 |
"The user {} rated {} distinct books. Here is the rating history: ".format(
|
@@ -141,10 +142,10 @@ def display_user_rat(user: int, data: pd.DataFrame):
|
|
141 |
|
142 |
# Displaying the subset of books rated by the user
|
143 |
st.dataframe(
|
144 |
-
subset.sort_values("
|
145 |
# Do not show the customer since we are display the
|
146 |
# information for a specific customer.
|
147 |
-
COLUMN_NOT_DISPLAY,
|
148 |
axis=1,
|
149 |
)
|
150 |
)
|
@@ -154,14 +155,14 @@ def display_user_rat(user: int, data: pd.DataFrame):
|
|
154 |
|
155 |
|
156 |
def _extract_author(df, products):
|
157 |
-
desc = merged_df[merged_df["
|
158 |
-
"
|
159 |
)[["ISBN", "Book-Author"]]
|
160 |
-
return desc.set_index("
|
161 |
def _extract_title(df, products):
|
162 |
-
desc = merged_df[merged_df["
|
163 |
-
"
|
164 |
-
)[["
|
165 |
return desc.set_index("ProductIndex")
|
166 |
|
167 |
def display_recommendation_plots(
|
@@ -189,32 +190,32 @@ def display_recommendation_plots(
|
|
189 |
print("=" * 80)
|
190 |
|
191 |
# Find the purchases of similar users
|
192 |
-
|
193 |
|
194 |
-
|
195 |
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
|
200 |
-
|
201 |
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
|
206 |
# Compute the t-sne
|
207 |
|
208 |
# Concate all the vectors to compute a single time the decomposition
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
|
217 |
-
|
218 |
|
219 |
with st.spinner("Computing plots (this might take around 60 seconds)..."):
|
220 |
elapsed = time.time()
|
@@ -236,13 +237,13 @@ def display_recommendation_plots(
|
|
236 |
contribution_dec = decomposed[
|
237 |
len(suggestions) : len(suggestions) + len(contributions), :
|
238 |
]
|
239 |
-
items_others_dec = decomposed[-len(
|
240 |
|
241 |
# Also, extract the description to create a nice hover in
|
242 |
# the final plot.
|
243 |
|
244 |
contribution_description = _extract_title(merged_df, contributions)
|
245 |
-
|
246 |
suggestion_description = _extract_title(merged_df, suggestions)
|
247 |
|
248 |
# Plot the scatterplot
|
@@ -255,7 +256,7 @@ def display_recommendation_plots(
|
|
255 |
y=contribution_dec[:, 1],
|
256 |
mode="markers",
|
257 |
opacity=0.8,
|
258 |
-
name="Similar
|
259 |
marker_symbol="square-open",
|
260 |
marker_color="#010CFA",
|
261 |
marker_size=10,
|
@@ -263,21 +264,21 @@ def display_recommendation_plots(
|
|
263 |
)
|
264 |
)
|
265 |
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
|
282 |
fig.add_trace(
|
283 |
go.Scatter(
|
|
|
19 |
"ISBN",
|
20 |
"Location",
|
21 |
"Age",
|
22 |
+
"CustomerIndex",
|
23 |
+
"ProductIndex",
|
24 |
]
|
25 |
|
26 |
|
|
|
92 |
suggested_items_id = [id[0] for id in explained]
|
93 |
|
94 |
suggested_description = (
|
95 |
+
df.loc[df.ISBN == suggestion][["Book-Title", "ProductIndex"]]
|
96 |
+
.drop_duplicates(subset=["ProductIndex"])["Book-Title"]
|
97 |
.unique()[0]
|
98 |
)
|
99 |
similar_items_description = (
|
100 |
+
df.loc[df["ProductIndex"].isin(suggested_items_id)][
|
101 |
+
["Book-Title", "ProductIndex"]
|
102 |
]
|
103 |
+
.drop_duplicates(subset=["ProductIndex"])["Book-Title"]
|
104 |
.unique()
|
105 |
)
|
106 |
|
107 |
output.append(
|
108 |
f"The item **{suggested_description.strip()}** "
|
109 |
"has been suggested because it is similar to the following products"
|
110 |
+
" rated by the user:"
|
111 |
)
|
112 |
for description in similar_items_description:
|
113 |
output.append(f"- {description.strip()}")
|
|
|
120 |
|
121 |
def print_suggestions(suggestions: List[int], df: pd.DataFrame):
|
122 |
similar_items_description = (
|
123 |
+
df.loc[df["ProductIndex"].isin(suggestions)][["Book-Title", "ProductIndex"]]
|
124 |
+
.drop_duplicates(subset=["ProductIndex"])["Book-Title"]
|
125 |
.unique()
|
126 |
)
|
127 |
|
|
|
132 |
st.write("\n".join(output))
|
133 |
|
134 |
def display_user_rat(user: int, data: pd.DataFrame):
|
135 |
+
subset = data[data.CustomerIndex == user]
|
136 |
|
137 |
st.write(
|
138 |
"The user {} rated {} distinct books. Here is the rating history: ".format(
|
|
|
142 |
|
143 |
# Displaying the subset of books rated by the user
|
144 |
st.dataframe(
|
145 |
+
subset.sort_values("CustomerIndex").drop(
|
146 |
# Do not show the customer since we are display the
|
147 |
# information for a specific customer.
|
148 |
+
COLUMN_NOT_DISPLAY+ ["CustomerID"],
|
149 |
axis=1,
|
150 |
)
|
151 |
)
|
|
|
155 |
|
156 |
|
157 |
def _extract_author(df, products):
|
158 |
+
desc = merged_df[merged_df["ProductIndex"].isin(products)].drop_duplicates(
|
159 |
+
"ProductIndex", ignore_index=True
|
160 |
)[["ISBN", "Book-Author"]]
|
161 |
+
return desc.set_index("ProductIndex")
|
162 |
def _extract_title(df, products):
|
163 |
+
desc = merged_df[merged_df["ProductIndex"].isin(products)].drop_duplicates(
|
164 |
+
"ProductIndex", ignore_index=True
|
165 |
+
)[["ProductIndex", "Book-Title"]]
|
166 |
return desc.set_index("ProductIndex")
|
167 |
|
168 |
def display_recommendation_plots(
|
|
|
190 |
print("=" * 80)
|
191 |
|
192 |
# Find the purchases of similar users
|
193 |
+
rated_by_similar_users = []
|
194 |
|
195 |
+
sim_users, _ = model.similar_users(user_id)
|
196 |
|
197 |
+
for u in sim_users:
|
198 |
+
_, sim_purchases = model.user_product_matrix[u].nonzero()
|
199 |
+
rated_by_similar_users.append(sim_purchases)
|
200 |
|
201 |
+
rated_by_similar_users = np.unique(np.concatenate(rated_by_similar_users))
|
202 |
|
203 |
+
print("Similar rated computed")
|
204 |
+
print(rated_by_similar_users)
|
205 |
+
print("=" * 80)
|
206 |
|
207 |
# Compute the t-sne
|
208 |
|
209 |
# Concate all the vectors to compute a single time the decomposition
|
210 |
+
to_decompose = np.concatenate(
|
211 |
+
(
|
212 |
+
model.item_factors[suggestions],
|
213 |
+
model.item_factors[contributions],
|
214 |
+
model.item_factors[rated_by_similar_users_by_similar_users],
|
215 |
+
)
|
216 |
+
)
|
217 |
|
218 |
+
print(f"Shape to decompose: {to_decompose.shape}")
|
219 |
|
220 |
with st.spinner("Computing plots (this might take around 60 seconds)..."):
|
221 |
elapsed = time.time()
|
|
|
237 |
contribution_dec = decomposed[
|
238 |
len(suggestions) : len(suggestions) + len(contributions), :
|
239 |
]
|
240 |
+
items_others_dec = decomposed[-len(rated_by_similar_users) :, :]
|
241 |
|
242 |
# Also, extract the description to create a nice hover in
|
243 |
# the final plot.
|
244 |
|
245 |
contribution_description = _extract_title(merged_df, contributions)
|
246 |
+
items_other_description = _extract_title(merged_df, rated_by_similar_users)
|
247 |
suggestion_description = _extract_title(merged_df, suggestions)
|
248 |
|
249 |
# Plot the scatterplot
|
|
|
256 |
y=contribution_dec[:, 1],
|
257 |
mode="markers",
|
258 |
opacity=0.8,
|
259 |
+
name="Similar rated by user",
|
260 |
marker_symbol="square-open",
|
261 |
marker_color="#010CFA",
|
262 |
marker_size=10,
|
|
|
264 |
)
|
265 |
)
|
266 |
|
267 |
+
fig.add_trace(
|
268 |
+
go.Scatter(
|
269 |
+
x=items_others_dec[:, 0],
|
270 |
+
y=items_others_dec[:, 1],
|
271 |
+
mode="markers",
|
272 |
+
name="Product rated by similar users",
|
273 |
+
opacity=0.7,
|
274 |
+
marker_symbol="circle-open",
|
275 |
+
marker_color="#FA5F19",
|
276 |
+
marker_size=10,
|
277 |
+
hovertext=items_other_description.loc[
|
278 |
+
rated_by_similar_users
|
279 |
+
].values.squeeze(),
|
280 |
+
)
|
281 |
+
)
|
282 |
|
283 |
fig.add_trace(
|
284 |
go.Scatter(
|