juanmartip95 commited on
Commit
172edb9
1 Parent(s): a4e0069

Upload 5 files

Browse files
Files changed (5) hide show
  1. README.md +61 -7
  2. recommender.py +126 -0
  3. recommender_system.py +372 -0
  4. requirements (1).txt +12 -0
  5. utils.py +47 -0
README.md CHANGED
@@ -1,13 +1,67 @@
1
  ---
2
- title: Recomenderlacocreadora
3
- emoji: 🔥
4
- colorFrom: green
5
- colorTo: gray
6
  sdk: streamlit
7
- sdk_version: 1.29.0
8
- app_file: app.py
9
  pinned: false
10
  license: mit
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Recommender system and customer segmentation
3
+ emoji: 🐨
4
+ colorFrom: purple
5
+ colorTo: blue
6
  sdk: streamlit
7
+ sdk_version: 1.10.0
8
+ app_file: recommender_system.py
9
  pinned: false
10
  license: mit
11
  ---
12
 
13
+
14
+ # Recommender system and customer segmentation
15
+
16
+ Demo with recsys and clustering for the [online retail](https://www.kaggle.com/datasets/vijayuv/onlineretail?select=OnlineRetail.csv) dataset.
17
+
18
+ ## Objective
19
+
20
+ Recommender system:
21
+
22
+ 1. interactively select a user
23
+ 2. show all the recommendations for the user
24
+ 3. explain why we get these suggestions (which purchased object influences the most)
25
+ 4. plot the purchases and suggested articles
26
+
27
+ Clustering:
28
+
29
+ 1. compute the user clustering
30
+ 2. plot users and their clusters
31
+ 3. explain the meaning of the clusters (compute the mean metrics or literally explain them)
32
+
33
+ ## Setup
34
+
35
+ In your terminal run:
36
+
37
+ ```bash
38
+ # Enable the env
39
+ source .venv/bin/activate
40
+
41
+ # Install the dependencies
42
+
43
+ pip install -r requirements.txt
44
+
45
+ # Or install the freezed dependencies from the requirements_freezed.txt
46
+
47
+ # You are ready to rock!
48
+ ```
49
+
50
+ ## Run
51
+
52
+ In your terminal run:
53
+
54
+ ```bash
55
+ streamlit run recommender_system.py
56
+
57
+ # Now the defualt browser will be opened with
58
+ # the stramlit page. It you want to customize the
59
+ # execution of streaming, refer to its documentation.
60
+ ```
61
+
62
+ ## Resources
63
+
64
+ - [streamlit](https://streamlit.io/)
65
+ - [implicit](https://github.com/benfred/implicit), recsys library
66
+ - [t-sne guide](https://distill.pub/2016/misread-tsne/)
67
+ - [RFM segmentation](https://www.omniconvert.com/blog/rfm-score/)
recommender.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from implicit.als import AlternatingLeastSquares
2
+ from implicit.lmf import LogisticMatrixFactorization
3
+ from implicit.bpr import BayesianPersonalizedRanking
4
+ from implicit.nearest_neighbours import bm25_weight
5
+ from scipy.sparse import csr_matrix
6
+ from typing import Dict, Any
7
+
8
+ MODEL = {
9
+ "lmf": LogisticMatrixFactorization,
10
+ "als": AlternatingLeastSquares,
11
+ "bpr": BayesianPersonalizedRanking,
12
+ }
13
+
14
+
15
+ def _get_sparse_matrix(values, user_idx, product_idx):
16
+ return csr_matrix(
17
+ (values, (user_idx, product_idx)),
18
+ shape=(len(user_idx.unique()), len(product_idx.unique())),
19
+ )
20
+
21
+
22
+ def _get_model(name: str, **params):
23
+ model = MODEL.get(name)
24
+ if model is None:
25
+ raise ValueError("No model with name {}".format(name))
26
+ return model(**params)
27
+
28
+
29
+ class InternalStatusError(Exception):
30
+ pass
31
+
32
+
33
+ class Recommender:
34
+ def __init__(
35
+ self,
36
+ values,
37
+ user_idx,
38
+ product_idx,
39
+ ):
40
+ self.user_product_matrix = _get_sparse_matrix(values, user_idx, product_idx)
41
+ self.user_idx = user_idx
42
+ self.product_idx = product_idx
43
+
44
+ # This variable will be set during training phase
45
+ self.model = None
46
+ self.fitted = False
47
+
48
+ def create_and_fit(
49
+ self,
50
+ model_name: str,
51
+ weight_strategy: str = "bm25",
52
+ model_params: Dict[str, Any] = {},
53
+ ):
54
+ weight_strategy = weight_strategy.lower()
55
+ if weight_strategy == "bm25":
56
+ data = bm25_weight(
57
+ self.user_product_matrix,
58
+ K1=1.2,
59
+ B=0.75,
60
+ )
61
+ elif weight_strategy == "balanced":
62
+ # Balance the positive and negative (nan) entries
63
+ # http://stanford.edu/~rezab/nips2014workshop/submits/logmat.pdf
64
+ total_size = (
65
+ self.user_product_matrix.shape[0] * self.user_product_matrix.shape[1]
66
+ )
67
+ sum = self.user_product_matrix.sum()
68
+ num_zeros = total_size - self.user_product_matrix.count_nonzero()
69
+ data = self.user_product_matrix.multiply(num_zeros / sum)
70
+ elif weight_strategy == "same":
71
+ data = self.user_product_matrix
72
+ else:
73
+ raise ValueError("Weight strategy not supported")
74
+
75
+ self.model = _get_model(model_name, **model_params)
76
+ self.fitted = True
77
+
78
+ self.model.fit(data)
79
+
80
+ return self
81
+
82
+ def recommend_products(
83
+ self,
84
+ user_id,
85
+ items_to_recommend=5,
86
+ ):
87
+ """Finds the recommended items for the user.
88
+
89
+ Returns:
90
+ (items, scores) pair, where item is already the name of the suggested item.
91
+ """
92
+
93
+ if not self.fitted:
94
+ raise InternalStatusError(
95
+ "Cannot recommend products without previously fitting the model."
96
+ " Please, consider fitting the model before recommening products."
97
+ )
98
+
99
+ return self.model.recommend(
100
+ user_id,
101
+ self.user_product_matrix[user_id],
102
+ filter_already_liked_items=True,
103
+ N=items_to_recommend,
104
+ )
105
+
106
+ def explain_recommendation(
107
+ self,
108
+ user_id,
109
+ suggested_item_id,
110
+ recommended_items,
111
+ ):
112
+ _, items_score_contrib, _ = self.model.explain(
113
+ user_id,
114
+ self.user_product_matrix,
115
+ suggested_item_id,
116
+ N=recommended_items,
117
+ )
118
+
119
+ return items_score_contrib
120
+
121
+ def similar_users(self, user_id):
122
+ return self.model.similar_users(user_id)
123
+
124
+ @property
125
+ def item_factors(self):
126
+ return self.model.item_factors
recommender_system.py ADDED
@@ -0,0 +1,372 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import altair as alt
4
+ from recommender import Recommender
5
+ from sklearn.decomposition import PCA
6
+ from sklearn.manifold import TSNE
7
+ from os import cpu_count
8
+ import numpy as np
9
+ import time
10
+
11
+ from utils import load_and_preprocess_data
12
+
13
+ import matplotlib.pyplot as plt
14
+ from typing import Union, List, Dict, Any
15
+ import plotly.graph_objects as go
16
+
17
+
18
+ COLUMN_NOT_DISPLAY = [
19
+ "StockCode",
20
+ "UnitPrice",
21
+ "Country",
22
+ "CustomerIndex",
23
+ "ProductIndex",
24
+ ]
25
+
26
+
27
+ SIDEBAR_DESCRIPTION = """
28
+ # Recommender system
29
+
30
+ ## What is it?
31
+ A recommender system is a tool that suggests something new to a particular
32
+ user that she/he might be interested in. It becomes useful when
33
+ the number of items a user can choose from is high.
34
+
35
+ ## How does it work?
36
+ A recommender system internally finds similar users and similar items,
37
+ based on a suitable definition of "similarity".
38
+ For example, users that purchased the same items can be considered similar.
39
+ When we want to suggest new items to a user, a recommender system exploits
40
+ the items bought by similar users as a starting point for the suggestion.
41
+ The items bought by similar users are compared to the items that the user
42
+ already bought. If they are new and similar, the model suggests them.
43
+
44
+ ## How we prepare the data
45
+ For each user, we compute the quantity purchased for every single item.
46
+ This will be the metric the value considered by the model to compute
47
+ the similarity. The item that a user has never bought will
48
+ be left at zero. These zeros will be the subject of the recommendation.
49
+ """.lstrip()
50
+
51
+
52
+ @st.cache(allow_output_mutation=True)
53
+ def create_and_fit_recommender(
54
+ model_name: str,
55
+ values: Union[pd.DataFrame, "np.ndarray"],
56
+ users: Union[pd.DataFrame, "np.ndarray"],
57
+ products: Union[pd.DataFrame, "np.ndarray"],
58
+ ) -> Recommender:
59
+ recommender = Recommender(
60
+ values,
61
+ users,
62
+ products,
63
+ )
64
+
65
+ recommender.create_and_fit(
66
+ model_name,
67
+ # Fine-tuned values
68
+ model_params=dict(
69
+ factors=190,
70
+ alpha=0.6,
71
+ regularization=0.06,
72
+ random_state=42,
73
+ ),
74
+ )
75
+ return recommender
76
+
77
+
78
+ def explain_recommendation(
79
+ recommender: Recommender,
80
+ user_id: int,
81
+ suggestions: List[int],
82
+ df: pd.DataFrame,
83
+ ):
84
+ output = []
85
+
86
+ n_recommended = len(suggestions)
87
+ for suggestion in suggestions:
88
+ explained = recommender.explain_recommendation(
89
+ user_id, suggestion, n_recommended
90
+ )
91
+
92
+ suggested_items_id = [id[0] for id in explained]
93
+
94
+ suggested_description = (
95
+ df.loc[df.ProductIndex == suggestion][["Description", "ProductIndex"]]
96
+ .drop_duplicates(subset=["ProductIndex"])["Description"]
97
+ .unique()[0]
98
+ )
99
+ similar_items_description = (
100
+ df.loc[df["ProductIndex"].isin(suggested_items_id)][
101
+ ["Description", "ProductIndex"]
102
+ ]
103
+ .drop_duplicates(subset=["ProductIndex"])["Description"]
104
+ .unique()
105
+ )
106
+
107
+ output.append(
108
+ f"The item **{suggested_description.strip()}** "
109
+ "has been suggested because it is similar to the following products"
110
+ " bought by the user:"
111
+ )
112
+ for description in similar_items_description:
113
+ output.append(f"- {description.strip()}")
114
+
115
+ with st.expander("See why the model recommended these products"):
116
+ st.write("\n".join(output))
117
+
118
+ st.write("------")
119
+
120
+
121
+ def print_suggestions(suggestions: List[int], df: pd.DataFrame):
122
+ similar_items_description = (
123
+ df.loc[df["ProductIndex"].isin(suggestions)][["Description", "ProductIndex"]]
124
+ .drop_duplicates(subset=["ProductIndex"])["Description"]
125
+ .unique()
126
+ )
127
+
128
+ output = ["The model suggests the following products:"]
129
+ for description in similar_items_description:
130
+ output.append(f"- {description.strip()}")
131
+
132
+ st.write("\n".join(output))
133
+
134
+
135
+ def display_user_char(user: int, data: pd.DataFrame):
136
+ subset = data[data.CustomerIndex == user]
137
+ # products = subset.groupby("ProductIndex").agg(
138
+ # {"Description": lambda x: x.iloc[0], "Quantity": sum}
139
+ # )
140
+
141
+ st.write(
142
+ "The user {} bought {} distinct products. Here is the purchase history: ".format(
143
+ user, subset["Description"].nunique()
144
+ )
145
+ )
146
+ st.dataframe(
147
+ subset.sort_values("InvoiceDate").drop(
148
+ # Do not show the customer since we are display the
149
+ # information for a specific customer.
150
+ COLUMN_NOT_DISPLAY + ["CustomerID"],
151
+ axis=1,
152
+ )
153
+ )
154
+ st.write("-----")
155
+
156
+
157
+ def _extract_description(df, products):
158
+ desc = df[df["ProductIndex"].isin(products)].drop_duplicates(
159
+ "ProductIndex", ignore_index=True
160
+ )[["ProductIndex", "Description"]]
161
+ return desc.set_index("ProductIndex")
162
+
163
+
164
+ def display_recommendation_plots(
165
+ user_id: int,
166
+ suggestions: List[int],
167
+ df: pd.DataFrame,
168
+ model: Recommender,
169
+ ):
170
+ """Plots a t-SNE with the suggested items, togheter with the purchases of
171
+ similar users.
172
+ """
173
+ # Get the purchased items that contribute the most to the suggestions
174
+ contributions = []
175
+ n_recommended = len(suggestions)
176
+ for suggestion in suggestions:
177
+ items_and_score = model.explain_recommendation(
178
+ user_id, suggestion, n_recommended
179
+ )
180
+ contributions.append([t[0] for t in items_and_score])
181
+
182
+ contributions = np.unique(np.concatenate(contributions))
183
+
184
+ print("Contribution computed")
185
+ print(contributions)
186
+ print("=" * 80)
187
+
188
+ # Find the purchases of similar users
189
+ bought_by_similar_users = []
190
+
191
+ sim_users, _ = model.similar_users(user_id)
192
+
193
+ for u in sim_users:
194
+ _, sim_purchases = model.user_product_matrix[u].nonzero()
195
+ bought_by_similar_users.append(sim_purchases)
196
+
197
+ bought_by_similar_users = np.unique(np.concatenate(bought_by_similar_users))
198
+
199
+ print("Similar bought computed")
200
+ print(bought_by_similar_users)
201
+ print("=" * 80)
202
+
203
+ # Compute the t-sne
204
+
205
+ # Concate all the vectors to compute a single time the decomposition
206
+ to_decompose = np.concatenate(
207
+ (
208
+ model.item_factors[suggestions],
209
+ model.item_factors[contributions],
210
+ model.item_factors[bought_by_similar_users],
211
+ )
212
+ )
213
+
214
+ print(f"Shape to decompose: {to_decompose.shape}")
215
+
216
+ with st.spinner("Computing plots (this might take around 60 seconds)..."):
217
+ elapsed = time.time()
218
+ decomposed = _tsne_decomposition(
219
+ to_decompose,
220
+ dict(
221
+ perplexity=30,
222
+ metric="euclidean",
223
+ n_iter=1_000,
224
+ random_state=42,
225
+ ),
226
+ )
227
+ elapsed = time.time() - elapsed
228
+ print(f"TSNE computed in {elapsed}")
229
+ print("=" * 80)
230
+
231
+ # Extract the decomposed vectors
232
+ suggestion_dec = decomposed[: len(suggestions), :]
233
+ contribution_dec = decomposed[
234
+ len(suggestions) : len(suggestions) + len(contributions), :
235
+ ]
236
+ items_others_dec = decomposed[-len(bought_by_similar_users) :, :]
237
+
238
+ # Also, extract the description to create a nice hover in
239
+ # the final plot.
240
+
241
+ contribution_description = _extract_description(df, contributions)
242
+ items_other_description = _extract_description(df, bought_by_similar_users)
243
+ suggestion_description = _extract_description(df, suggestions)
244
+
245
+ # Plot the scatterplot
246
+
247
+ fig = go.Figure()
248
+
249
+ fig.add_trace(
250
+ go.Scatter(
251
+ x=contribution_dec[:, 0],
252
+ y=contribution_dec[:, 1],
253
+ mode="markers",
254
+ opacity=0.8,
255
+ name="Similar bought by user",
256
+ marker_symbol="square-open",
257
+ marker_color="#010CFA",
258
+ marker_size=10,
259
+ hovertext=contribution_description.loc[contributions].values.squeeze(),
260
+ )
261
+ )
262
+
263
+ fig.add_trace(
264
+ go.Scatter(
265
+ x=items_others_dec[:, 0],
266
+ y=items_others_dec[:, 1],
267
+ mode="markers",
268
+ name="Product bought by similar users",
269
+ opacity=0.7,
270
+ marker_symbol="circle-open",
271
+ marker_color="#FA5F19",
272
+ marker_size=10,
273
+ hovertext=items_other_description.loc[
274
+ bought_by_similar_users
275
+ ].values.squeeze(),
276
+ )
277
+ )
278
+
279
+ fig.add_trace(
280
+ go.Scatter(
281
+ x=suggestion_dec[:, 0],
282
+ y=suggestion_dec[:, 1],
283
+ mode="markers",
284
+ name="Suggested",
285
+ marker_color="#1A9626",
286
+ marker_symbol="star",
287
+ marker_size=10,
288
+ hovertext=suggestion_description.loc[suggestions].values.squeeze(),
289
+ )
290
+ )
291
+
292
+ fig.update_xaxes(visible=False)
293
+ fig.update_yaxes(visible=False)
294
+ fig.update_layout(plot_bgcolor="white")
295
+
296
+ return fig
297
+
298
+
299
+ def _tsne_decomposition(data: np.ndarray, tsne_args: Dict[str, Any]):
300
+ if data.shape[1] > 50:
301
+ print("Performing PCA...")
302
+ data = PCA(n_components=50).fit_transform(data)
303
+ return TSNE(
304
+ n_components=2,
305
+ n_jobs=cpu_count(),
306
+ **tsne_args,
307
+ ).fit_transform(data)
308
+
309
+
310
+ def main():
311
+ # Load and process data
312
+ data, users, products = load_and_preprocess_data()
313
+ recommender = create_and_fit_recommender(
314
+ "als",
315
+ data["Quantity"],
316
+ users,
317
+ products,
318
+ )
319
+
320
+ st.markdown(
321
+ """# Recommender system
322
+ The dataset used for these computations is the following:
323
+ """
324
+ )
325
+ st.sidebar.markdown(SIDEBAR_DESCRIPTION)
326
+
327
+ to_display = data.drop(
328
+ COLUMN_NOT_DISPLAY,
329
+ axis=1,
330
+ )
331
+
332
+ # Convert to int just to display the column without trailing decimals.
333
+ # @note: I know I can use the "format" function of pandas, but I found out
334
+ # it is super slow when fomratting large tables.
335
+ to_display["Price"] = to_display["Price"].astype(int)
336
+
337
+ # Show the data
338
+ st.dataframe(
339
+ to_display,
340
+ )
341
+
342
+ st.markdown("## Interactive suggestion")
343
+ with st.form("recommend"):
344
+ # Let the user select the user to investigate
345
+ user = st.selectbox(
346
+ "Select a customer to get his recommendations",
347
+ users.unique(),
348
+ )
349
+
350
+ items_to_recommend = st.slider("How many items to recommend?", 1, 10, 5)
351
+ print(items_to_recommend)
352
+
353
+ submitted = st.form_submit_button("Recommend!")
354
+ if submitted:
355
+ # show_purhcase_history(user, data)
356
+ display_user_char(user, data)
357
+ suggestions_and_score = recommender.recommend_products(
358
+ user, items_to_recommend
359
+ )
360
+ print_suggestions(suggestions_and_score[0], data)
361
+ explain_recommendation(recommender, user, suggestions_and_score[0], data)
362
+
363
+ st.markdown(
364
+ "## How the purchases of similar users influnce the recommendation"
365
+ )
366
+ fig = display_recommendation_plots(
367
+ user, suggestions_and_score[0], data, recommender
368
+ )
369
+ st.plotly_chart(fig)
370
+
371
+
372
+ main()
requirements (1).txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pandas
2
+ scikit-learn
3
+ implicit
4
+ scipy
5
+ tqdm
6
+ numpy
7
+ matplotlib
8
+ seaborn
9
+ mlxtend
10
+ plotly==5.9.0
11
+ black
12
+ altair<5
utils.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+
4
+
5
+ @st.cache
6
+ def load_and_preprocess_data():
7
+ df = pd.read_csv(
8
+ "Data/OnlineRetail.csv",
9
+ encoding="latin-1",
10
+ )
11
+
12
+ # Remove nans values
13
+ df = df.dropna()
14
+
15
+ # Use only positive quantites. This is not a robust approach,
16
+ # but to keep things simple it quite good.
17
+ df = df[df["Quantity"] > 0]
18
+
19
+ # Parse the date column and add 10 years, just to better visualization
20
+ df["InvoiceDate"] = pd.to_datetime(df["InvoiceDate"]).dt.floor(
21
+ "d"
22
+ ) + pd.offsets.DateOffset(years=10)
23
+
24
+ # Change customer id to int
25
+ df["CustomerID"] = df["CustomerID"].astype(int)
26
+
27
+ # Add price column
28
+ df["Price"] = df["Quantity"] * df["UnitPrice"]
29
+
30
+ # Get unique entries in the dataset of users and products
31
+ users = df["CustomerID"].unique()
32
+ products = df["StockCode"].unique()
33
+
34
+ # Create a categorical type for users and product. User ordered to ensure
35
+ # reproducibility
36
+ user_cat = pd.CategoricalDtype(categories=sorted(users), ordered=True)
37
+ product_cat = pd.CategoricalDtype(categories=sorted(products), ordered=True)
38
+
39
+ # Transform and get the indexes of the columns
40
+ user_idx = df["CustomerID"].astype(user_cat).cat.codes
41
+ product_idx = df["StockCode"].astype(product_cat).cat.codes
42
+
43
+ # Add the categorical index to the starting dataframe
44
+ df["CustomerIndex"] = user_idx
45
+ df["ProductIndex"] = product_idx
46
+
47
+ return df, user_idx, product_idx