File size: 1,598 Bytes
f8c0ae2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import pandas as pd
from sklearn.metrics import jaccard_score
import numpy as np
from load_data import items

# Example of populating unique_items
unique_items = set(item for sublist in items for item in sublist)

# Create a product-item matrix
unique_items_list = list(unique_items)
product_item_matrix = pd.DataFrame(0, index=range(len(items)), columns=unique_items_list)

# Populate the product-item matrix
for i, transaction in enumerate(items):
    for item in transaction:
        product_item_matrix.loc[i, item] += 1

# Convert the DataFrame to a binary matrix
product_item_matrix_binary = product_item_matrix.map(lambda x: 1 if x > 0 else 0)

# Calculate Jacquard similarity
similarity_matrix_jaccard = pd.DataFrame(np.nan, index=product_item_matrix.columns, columns=product_item_matrix.columns)

for i in range(len(similarity_matrix_jaccard.columns)):
    for j in range(len(similarity_matrix_jaccard.columns)):
        similarity_matrix_jaccard.iloc[i, j] = jaccard_score(
            product_item_matrix_binary.iloc[:, i],
            product_item_matrix_binary.iloc[:, j]
        )


# Function to get collaborative recommendations based on a product
def collaborative_recommendations(product_name, similarity_matrix=similarity_matrix_jaccard, threshold=0.01):
    if product_name not in unique_items_list:
        return f"Product '{product_name}' not found in the database."

    similar_products = similarity_matrix[product_name][similarity_matrix[product_name] > threshold].sort_values(ascending=False).index.tolist()
    return similar_products[:5]  # top 5 most similar products