Spaces:

juanmartip95
/

recomenderlacocreadora

Sleeping

App Files Files Community

juanmartip95 commited on Jan 10

Commit

9112b55

•

1 Parent(s): 8d44559

Update recommender_system.py

Browse files

Files changed (1) hide show

recommender_system.py +70 -65

recommender_system.py CHANGED Viewed

@@ -16,11 +16,10 @@ import plotly.graph_objects as go
 COLUMN_NOT_DISPLAY = [
-    "StockCode",
-    "UnitPrice",
-    "Country",
-    "CustomerIndex",
-    "ProductIndex",
 ]
@@ -92,15 +91,15 @@ def explain_recommendation(
         suggested_items_id = [id[0] for id in explained]
         suggested_description = (
-            df.loc[df.ProductIndex == suggestion][["Description", "ProductIndex"]]
-            .drop_duplicates(subset=["ProductIndex"])["Description"]
             .unique()[0]
         )
         similar_items_description = (
-            df.loc[df["ProductIndex"].isin(suggested_items_id)][
-                ["Description", "ProductIndex"]
             ]
-            .drop_duplicates(subset=["ProductIndex"])["Description"]
             .unique()
         )
@@ -120,8 +119,8 @@ def explain_recommendation(
 def print_suggestions(suggestions: List[int], df: pd.DataFrame):
     similar_items_description = (
-        df.loc[df["ProductIndex"].isin(suggestions)][["Description", "ProductIndex"]]
-        .drop_duplicates(subset=["ProductIndex"])["Description"]
         .unique()
     )
@@ -131,35 +130,39 @@ def print_suggestions(suggestions: List[int], df: pd.DataFrame):
     st.write("\n".join(output))
-def display_user_char(user: int, data: pd.DataFrame):
-    subset = data[data.CustomerIndex == user]
-    # products = subset.groupby("ProductIndex").agg(
-    #     {"Description": lambda x: x.iloc[0], "Quantity": sum}
-    # )
     st.write(
-        "The user {} bought {} distinct products. Here is the purchase history: ".format(
-            user, subset["Description"].nunique()
         )
     )
     st.dataframe(
-        subset.sort_values("InvoiceDate").drop(
             # Do not show the customer since we are display the
             # information for a specific customer.
-            COLUMN_NOT_DISPLAY + ["CustomerID"],
             axis=1,
         )
     )
     st.write("-----")
-def _extract_description(df, products):
-    desc = df[df["ProductIndex"].isin(products)].drop_duplicates(
-        "ProductIndex", ignore_index=True
-    )[["ProductIndex", "Description"]]
-    return desc.set_index("ProductIndex")
 def display_recommendation_plots(
     user_id: int,
@@ -186,32 +189,32 @@ def display_recommendation_plots(
     print("=" * 80)
     # Find the purchases of similar users
-    bought_by_similar_users = []
-    sim_users, _ = model.similar_users(user_id)
-    for u in sim_users:
-        _, sim_purchases = model.user_product_matrix[u].nonzero()
-        bought_by_similar_users.append(sim_purchases)
-    bought_by_similar_users = np.unique(np.concatenate(bought_by_similar_users))
-    print("Similar bought computed")
-    print(bought_by_similar_users)
-    print("=" * 80)
     # Compute the t-sne
     # Concate all the vectors to compute a single time the decomposition
-    to_decompose = np.concatenate(
-        (
-            model.item_factors[suggestions],
-            model.item_factors[contributions],
-            model.item_factors[bought_by_similar_users],
-        )
-    )
-    print(f"Shape to decompose: {to_decompose.shape}")
     with st.spinner("Computing plots (this might take around 60 seconds)..."):
         elapsed = time.time()
@@ -238,9 +241,9 @@ def display_recommendation_plots(
     # Also, extract the description to create a nice hover in
     # the final plot.
-    contribution_description = _extract_description(df, contributions)
-    items_other_description = _extract_description(df, bought_by_similar_users)
-    suggestion_description = _extract_description(df, suggestions)
     # Plot the scatterplot
@@ -260,21 +263,21 @@ def display_recommendation_plots(
         )
     )
-    fig.add_trace(
-        go.Scatter(
-            x=items_others_dec[:, 0],
-            y=items_others_dec[:, 1],
-            mode="markers",
-            name="Product bought by similar users",
-            opacity=0.7,
-            marker_symbol="circle-open",
-            marker_color="#FA5F19",
-            marker_size=10,
-            hovertext=items_other_description.loc[
-                bought_by_similar_users
-            ].values.squeeze(),
-        )
-    )
     fig.add_trace(
         go.Scatter(
@@ -310,9 +313,11 @@ def _tsne_decomposition(data: np.ndarray, tsne_args: Dict[str, Any]):
 def main():
     # Load and process data
     data, users, products = load_and_preprocess_data()
     recommender = create_and_fit_recommender(
         "als",
-        data["Quantity"],
         users,
         products,
     )
@@ -332,7 +337,7 @@ The dataset used for these computations is the following:
     # Convert to int just to display the column without trailing decimals.
     # @note: I know I can use the "format" function of pandas, but I found out
     #   it is super slow when fomratting large tables.
-    to_display["Price"] = to_display["Price"].astype(int)
     # Show the data
     st.dataframe(
@@ -353,7 +358,7 @@ The dataset used for these computations is the following:
         submitted = st.form_submit_button("Recommend!")
         if submitted:
             # show_purhcase_history(user, data)
-            display_user_char(user, data)
             suggestions_and_score = recommender.recommend_products(
                 user, items_to_recommend
             )

 COLUMN_NOT_DISPLAY = [
+    "ISBN",
+    "Location",
+    "Age",
+    "User-ID",
 ]
         suggested_items_id = [id[0] for id in explained]
         suggested_description = (
+            df.loc[df.ISBN == suggestion][["Book-Title", "ISBN"]]
+            .drop_duplicates(subset=["ISBN"])["Book-Title"]
             .unique()[0]
         )
         similar_items_description = (
+            df.loc[df["ISBN"].isin(suggested_items_id)][
+                ["Book-Title", "ISBN"]
             ]
+            .drop_duplicates(subset=["ISBN"])["Book-Title"]
             .unique()
         )
 def print_suggestions(suggestions: List[int], df: pd.DataFrame):
     similar_items_description = (
+        df.loc[df["ISBN"].isin(suggestions)][["Book-Title", "ISBN"]]
+        .drop_duplicates(subset=["ISBN"])["Book-Title"]
         .unique()
     )
     st.write("\n".join(output))
+def display_user_rat(user: int, data: pd.DataFrame):
+    subset = data[data["User-ID"] == user]
     st.write(
+        "The user {} rated {} distinct books. Here is the rating history: ".format(
+            user, subset["Book-Title"].nunique()
         )
     )
+    # Displaying the subset of books rated by the user
     st.dataframe(
+        subset.sort_values("User-ID").drop(
             # Do not show the customer since we are display the
             # information for a specific customer.
+            COLUMN_NOT_DISPLAY,
             axis=1,
         )
     )
     st.write("-----")
+def _extract_author(df, products):
+    desc = merged_df[merged_df["ISBN"].isin(products)].drop_duplicates(
+        "ISBN", ignore_index=True
+    )[["ISBN", "Book-Author"]]
+    return desc.set_index("ISBN")
+def _extract_title(df, products):
+    desc = merged_df[merged_df["ISBN"].isin(products)].drop_duplicates(
+        "ISBN", ignore_index=True
+    )[["ISBN", "Book-Title"]]
+    return desc.set_index("ProductIndex")
 def display_recommendation_plots(
     user_id: int,
     print("=" * 80)
     # Find the purchases of similar users
+    #bought_by_similar_users = []
+    #sim_users, _ = model.similar_users(user_id)
+    #for u in sim_users:
+    #    _, sim_purchases = model.user_product_matrix[u].nonzero()
+    #    bought_by_similar_users.append(sim_purchases)
+    #bought_by_similar_users = np.unique(np.concatenate(bought_by_similar_users))
+    #print("Similar bought computed")
+    #print(bought_by_similar_users)
+    #print("=" * 80)
     # Compute the t-sne
     # Concate all the vectors to compute a single time the decomposition
+    #to_decompose = np.concatenate(
+    #    (
+    #        model.item_factors[suggestions],
+    #        model.item_factors[contributions],
+    #        model.item_factors[bought_by_similar_users],
+    #    )
+    #)
+    #print(f"Shape to decompose: {to_decompose.shape}")
     with st.spinner("Computing plots (this might take around 60 seconds)..."):
         elapsed = time.time()
     # Also, extract the description to create a nice hover in
     # the final plot.
+    contribution_description = _extract_description(merged_df, contributions)
+    #items_other_description = _extract_description(merged_df, bought_by_similar_users)
+    suggestion_description = _extract_description(merged_df, suggestions)
     # Plot the scatterplot
         )
     )
+    #fig.add_trace(
+    #    go.Scatter(
+    #        x=items_others_dec[:, 0],
+    #        y=items_others_dec[:, 1],
+    #        mode="markers",
+    #        name="Product bought by similar users",
+    #        opacity=0.7,
+    #        marker_symbol="circle-open",
+    #        marker_color="#FA5F19",
+    #        marker_size=10,
+    #        hovertext=items_other_description.loc[
+    #            bought_by_similar_users
+    #        ].values.squeeze(),
+    #    )
+    #)
     fig.add_trace(
         go.Scatter(
 def main():
     # Load and process data
     data, users, products = load_and_preprocess_data()
+    users=data['User-ID']
+    products=data['ISBN']
     recommender = create_and_fit_recommender(
         "als",
+        data["Book-Rating"],
         users,
         products,
     )
     # Convert to int just to display the column without trailing decimals.
     # @note: I know I can use the "format" function of pandas, but I found out
     #   it is super slow when fomratting large tables.
+    to_display["Book-Rating"] = to_display["Book-Rating"].astype(int)
     # Show the data
     st.dataframe(
         submitted = st.form_submit_button("Recommend!")
         if submitted:
             # show_purhcase_history(user, data)
+            display_user_rat(user, data)
             suggestions_and_score = recommender.recommend_products(
                 user, items_to_recommend
             )