Spaces:

juanmartip95
/

recomenderlacocreadora

Sleeping

App Files Files Community

juanmartip95 commited on Jan 10

Commit

e6fd8db

•

1 Parent(s): eec1ef5

Update recommender_system.py

Browse files

Files changed (1) hide show

recommender_system.py +54 -53

recommender_system.py CHANGED Viewed

@@ -19,7 +19,8 @@ COLUMN_NOT_DISPLAY = [
     "ISBN",
     "Location",
     "Age",
-    "User-ID",
 ]
@@ -91,22 +92,22 @@ def explain_recommendation(
         suggested_items_id = [id[0] for id in explained]
         suggested_description = (
-            df.loc[df.ISBN == suggestion][["Book-Title", "ISBN"]]
-            .drop_duplicates(subset=["ISBN"])["Book-Title"]
             .unique()[0]
         )
         similar_items_description = (
-            df.loc[df["ISBN"].isin(suggested_items_id)][
-                ["Book-Title", "ISBN"]
             ]
-            .drop_duplicates(subset=["ISBN"])["Book-Title"]
             .unique()
         )
         output.append(
             f"The item **{suggested_description.strip()}** "
             "has been suggested because it is similar to the following products"
-            " bought by the user:"
         )
         for description in similar_items_description:
             output.append(f"- {description.strip()}")
@@ -119,8 +120,8 @@ def explain_recommendation(
 def print_suggestions(suggestions: List[int], df: pd.DataFrame):
     similar_items_description = (
-        df.loc[df["ISBN"].isin(suggestions)][["Book-Title", "ISBN"]]
-        .drop_duplicates(subset=["ISBN"])["Book-Title"]
         .unique()
     )
@@ -131,7 +132,7 @@ def print_suggestions(suggestions: List[int], df: pd.DataFrame):
     st.write("\n".join(output))
 def display_user_rat(user: int, data: pd.DataFrame):
-    subset = data[data["User-ID"] == user]
     st.write(
         "The user {} rated {} distinct books. Here is the rating history: ".format(
@@ -141,10 +142,10 @@ def display_user_rat(user: int, data: pd.DataFrame):
     # Displaying the subset of books rated by the user
     st.dataframe(
-        subset.sort_values("User-ID").drop(
             # Do not show the customer since we are display the
             # information for a specific customer.
-            COLUMN_NOT_DISPLAY,
             axis=1,
         )
     )
@@ -154,14 +155,14 @@ def display_user_rat(user: int, data: pd.DataFrame):
 def _extract_author(df, products):
-    desc = merged_df[merged_df["ISBN"].isin(products)].drop_duplicates(
-        "ISBN", ignore_index=True
     )[["ISBN", "Book-Author"]]
-    return desc.set_index("ISBN")
 def _extract_title(df, products):
-    desc = merged_df[merged_df["ISBN"].isin(products)].drop_duplicates(
-        "ISBN", ignore_index=True
-    )[["ISBN", "Book-Title"]]
     return desc.set_index("ProductIndex")
 def display_recommendation_plots(
@@ -189,32 +190,32 @@ def display_recommendation_plots(
     print("=" * 80)
     # Find the purchases of similar users
-    #bought_by_similar_users = []
-    #sim_users, _ = model.similar_users(user_id)
-    #for u in sim_users:
-    #    _, sim_purchases = model.user_product_matrix[u].nonzero()
-    #    bought_by_similar_users.append(sim_purchases)
-    #bought_by_similar_users = np.unique(np.concatenate(bought_by_similar_users))
-    #print("Similar bought computed")
-    #print(bought_by_similar_users)
-    #print("=" * 80)
     # Compute the t-sne
     # Concate all the vectors to compute a single time the decomposition
-    #to_decompose = np.concatenate(
-    #    (
-    #        model.item_factors[suggestions],
-    #        model.item_factors[contributions],
-    #        model.item_factors[bought_by_similar_users],
-    #    )
-    #)
-    #print(f"Shape to decompose: {to_decompose.shape}")
     with st.spinner("Computing plots (this might take around 60 seconds)..."):
         elapsed = time.time()
@@ -236,13 +237,13 @@ def display_recommendation_plots(
     contribution_dec = decomposed[
         len(suggestions) : len(suggestions) + len(contributions), :
     ]
-    items_others_dec = decomposed[-len(bought_by_similar_users) :, :]
     # Also, extract the description to create a nice hover in
     # the final plot.
     contribution_description = _extract_title(merged_df, contributions)
-    #items_other_description = _extract_description(merged_df, bought_by_similar_users)
     suggestion_description = _extract_title(merged_df, suggestions)
     # Plot the scatterplot
@@ -255,7 +256,7 @@ def display_recommendation_plots(
             y=contribution_dec[:, 1],
             mode="markers",
             opacity=0.8,
-            name="Similar bought by user",
             marker_symbol="square-open",
             marker_color="#010CFA",
             marker_size=10,
@@ -263,21 +264,21 @@ def display_recommendation_plots(
         )
     )
-    #fig.add_trace(
-    #    go.Scatter(
-    #        x=items_others_dec[:, 0],
-    #        y=items_others_dec[:, 1],
-    #        mode="markers",
-    #        name="Product bought by similar users",
-    #        opacity=0.7,
-    #        marker_symbol="circle-open",
-    #        marker_color="#FA5F19",
-    #        marker_size=10,
-    #        hovertext=items_other_description.loc[
-    #            bought_by_similar_users
-    #        ].values.squeeze(),
-    #    )
-    #)
     fig.add_trace(
         go.Scatter(

     "ISBN",
     "Location",
     "Age",
+    "CustomerIndex",
+    "ProductIndex",
 ]
         suggested_items_id = [id[0] for id in explained]
         suggested_description = (
+            df.loc[df.ISBN == suggestion][["Book-Title", "ProductIndex"]]
+            .drop_duplicates(subset=["ProductIndex"])["Book-Title"]
             .unique()[0]
         )
         similar_items_description = (
+            df.loc[df["ProductIndex"].isin(suggested_items_id)][
+                ["Book-Title", "ProductIndex"]
             ]
+            .drop_duplicates(subset=["ProductIndex"])["Book-Title"]
             .unique()
         )
         output.append(
             f"The item **{suggested_description.strip()}** "
             "has been suggested because it is similar to the following products"
+            " rated by the user:"
         )
         for description in similar_items_description:
             output.append(f"- {description.strip()}")
 def print_suggestions(suggestions: List[int], df: pd.DataFrame):
     similar_items_description = (
+        df.loc[df["ProductIndex"].isin(suggestions)][["Book-Title", "ProductIndex"]]
+        .drop_duplicates(subset=["ProductIndex"])["Book-Title"]
         .unique()
     )
     st.write("\n".join(output))
 def display_user_rat(user: int, data: pd.DataFrame):
+    subset = data[data.CustomerIndex == user]
     st.write(
         "The user {} rated {} distinct books. Here is the rating history: ".format(
     # Displaying the subset of books rated by the user
     st.dataframe(
+        subset.sort_values("CustomerIndex").drop(
             # Do not show the customer since we are display the
             # information for a specific customer.
+            COLUMN_NOT_DISPLAY+ ["CustomerID"],
             axis=1,
         )
     )
 def _extract_author(df, products):
+    desc = merged_df[merged_df["ProductIndex"].isin(products)].drop_duplicates(
+        "ProductIndex", ignore_index=True
     )[["ISBN", "Book-Author"]]
+    return desc.set_index("ProductIndex")
 def _extract_title(df, products):
+    desc = merged_df[merged_df["ProductIndex"].isin(products)].drop_duplicates(
+        "ProductIndex", ignore_index=True
+    )[["ProductIndex", "Book-Title"]]
     return desc.set_index("ProductIndex")
 def display_recommendation_plots(
     print("=" * 80)
     # Find the purchases of similar users
+    rated_by_similar_users = []
+    sim_users, _ = model.similar_users(user_id)
+    for u in sim_users:
+        _, sim_purchases = model.user_product_matrix[u].nonzero()
+        rated_by_similar_users.append(sim_purchases)
+    rated_by_similar_users = np.unique(np.concatenate(rated_by_similar_users))
+    print("Similar rated computed")
+    print(rated_by_similar_users)
+    print("=" * 80)
     # Compute the t-sne
     # Concate all the vectors to compute a single time the decomposition
+    to_decompose = np.concatenate(
+        (
+            model.item_factors[suggestions],
+            model.item_factors[contributions],
+            model.item_factors[rated_by_similar_users_by_similar_users],
+        )
+    )
+    print(f"Shape to decompose: {to_decompose.shape}")
     with st.spinner("Computing plots (this might take around 60 seconds)..."):
         elapsed = time.time()
     contribution_dec = decomposed[
         len(suggestions) : len(suggestions) + len(contributions), :
     ]
+    items_others_dec = decomposed[-len(rated_by_similar_users) :, :]
     # Also, extract the description to create a nice hover in
     # the final plot.
     contribution_description = _extract_title(merged_df, contributions)
+    items_other_description = _extract_title(merged_df, rated_by_similar_users)
     suggestion_description = _extract_title(merged_df, suggestions)
     # Plot the scatterplot
             y=contribution_dec[:, 1],
             mode="markers",
             opacity=0.8,
+            name="Similar rated by user",
             marker_symbol="square-open",
             marker_color="#010CFA",
             marker_size=10,
         )
     )
+    fig.add_trace(
+        go.Scatter(
+            x=items_others_dec[:, 0],
+            y=items_others_dec[:, 1],
+            mode="markers",
+            name="Product rated by similar users",
+            opacity=0.7,
+            marker_symbol="circle-open",
+            marker_color="#FA5F19",
+            marker_size=10,
+            hovertext=items_other_description.loc[
+                rated_by_similar_users
+            ].values.squeeze(),
+        )
+    )
     fig.add_trace(
         go.Scatter(