Spaces:
Sleeping
Sleeping
Upload 5 files
Browse files- .gitattributes +1 -0
- app.py +47 -0
- collaborative_book_metadata.csv +0 -0
- collaborative_books_df.csv +3 -0
- requirements.txt +5 -0
- utils.py +48 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
collaborative_books_df.csv filter=lfs diff=lfs merge=lfs -text
|
app.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
from utils import *
|
4 |
+
|
5 |
+
# Assuming data is loaded and matrices are prepared as discussed
|
6 |
+
def load_data():
|
7 |
+
ratings = pd.read_csv('./data/collaborative_books_df.csv', index_col=0)
|
8 |
+
books = pd.read_csv('./data/collaborative_book_metadata.csv', index_col=0)
|
9 |
+
# book_titles=pd.read_csv('./data/book_titles.csv', index_col=0)
|
10 |
+
# book_titles = book_titles.reset_index()
|
11 |
+
|
12 |
+
# Merge data
|
13 |
+
ratings = ratings.merge(books, on='book_id')
|
14 |
+
book_titles = dict(zip(ratings['book_id'], ratings['title_x']))
|
15 |
+
return ratings, books,book_titles
|
16 |
+
|
17 |
+
def initialize_session_state():
|
18 |
+
if "ratings" not in st.session_state:
|
19 |
+
st.session_state.ratings, st.session_state.books, st.session_state.book_titles = load_data()
|
20 |
+
st.session_state.X, st.session_state.user_mapper, st.session_state.book_mapper, st.session_state.user_inv_mapper, st.session_state.book_inv_mapper = create_matrix(st.session_state.ratings)
|
21 |
+
st.session_state.book_id_mapping = pd.Series( st.session_state.books.book_id.values, index= st.session_state.books.title).to_dict()
|
22 |
+
|
23 |
+
initialize_session_state()
|
24 |
+
# Streamlit interface for book recommendation
|
25 |
+
st.title('Book Recommender System')
|
26 |
+
|
27 |
+
# User inputs
|
28 |
+
title_input = st.selectbox('Select or type a book title', st.session_state.books['title'].unique())
|
29 |
+
k_input = st.number_input('How many recommendations do you want?', min_value=1, max_value=20, value=5)
|
30 |
+
|
31 |
+
if st.button('Find Similar Books'):
|
32 |
+
|
33 |
+
if title_input in st.session_state.book_id_mapping:
|
34 |
+
book_id = st.session_state.book_id_mapping[title_input]
|
35 |
+
distances, similar_ids = find_similar_books(book_id, st.session_state.X, k=k_input,book_mapper= st.session_state.book_mapper,book_inv_mapper= st.session_state.book_inv_mapper)
|
36 |
+
similar_books = pd.DataFrame({
|
37 |
+
'Book Title': [ st.session_state.book_titles[ids] for ids in similar_ids],
|
38 |
+
'Distance': distances[0][1:]
|
39 |
+
})
|
40 |
+
|
41 |
+
st.write(f"Books similar to {title_input}:")
|
42 |
+
st.dataframe(similar_books.sort_values(by='Distance', ascending=True))
|
43 |
+
else:
|
44 |
+
st.error("Book title not found. Please check the spelling or try another title.")
|
45 |
+
|
46 |
+
|
47 |
+
|
collaborative_book_metadata.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
collaborative_books_df.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a9903754ef7733b97016246714ef00c8b898fdae7a88a686637133d3b97c0751
|
3 |
+
size 11243089
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
pandas==1.4.3
|
2 |
+
numpy==1.23.1
|
3 |
+
scipy==1.8.1
|
4 |
+
scikit-learn==1.1.1
|
5 |
+
streamlit==1.10.0
|
utils.py
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import numpy as np
|
3 |
+
from scipy.sparse import csr_matrix
|
4 |
+
from sklearn.neighbors import NearestNeighbors
|
5 |
+
|
6 |
+
# Define create_matrix function
|
7 |
+
def create_matrix(df):
|
8 |
+
# Adapt the function to match your DataFrame's column names and structure
|
9 |
+
N = len(df['user_id_mapping'].unique()) # Adjusted to 'user_id_mapping'
|
10 |
+
M = len(df['book_id'].unique()) # Adjusted to 'book_id'
|
11 |
+
|
12 |
+
# Map IDs to indices
|
13 |
+
user_mapper = dict(zip(np.unique(df["user_id_mapping"]), list(range(N))))
|
14 |
+
book_mapper = dict(zip(np.unique(df["book_id"]), list(range(M))))
|
15 |
+
|
16 |
+
# Map indices to IDs
|
17 |
+
user_inv_mapper = dict(zip(list(range(N)), np.unique(df["user_id_mapping"])))
|
18 |
+
book_inv_mapper = dict(zip(list(range(M)), np.unique(df["book_id"])))
|
19 |
+
|
20 |
+
user_index = [user_mapper[i] for i in df['user_id_mapping']] # Adjusted to 'user_id_mapping'
|
21 |
+
book_index = [book_mapper[i] for i in df['book_id']] # Adjusted to 'book_id'
|
22 |
+
|
23 |
+
# Use 'Actual Rating' for the matrix values; corrected data handling for sparse matrix creation
|
24 |
+
X = csr_matrix((df["Actual Rating"], (book_index, user_index)), shape=(M, N))
|
25 |
+
|
26 |
+
return X, user_mapper, book_mapper, user_inv_mapper, book_inv_mapper
|
27 |
+
|
28 |
+
|
29 |
+
"""
|
30 |
+
Find similar books using KNN
|
31 |
+
"""
|
32 |
+
# Define find_similar_books function - Source - https://www.geeksforgeeks.org/recommendation-system-in-python/?ref=rp
|
33 |
+
def find_similar_books(book_id, X, k,book_mapper,book_inv_mapper, metric='cosine'):
|
34 |
+
|
35 |
+
neighbour_ids = []
|
36 |
+
|
37 |
+
book_ind = book_mapper[book_id]
|
38 |
+
book_vec = X[book_ind]
|
39 |
+
k+=1
|
40 |
+
kNN = NearestNeighbors(n_neighbors=k, algorithm="brute", metric=metric)
|
41 |
+
kNN.fit(X)
|
42 |
+
book_vec = book_vec.reshape(1,-1)
|
43 |
+
distances, indices = kNN.kneighbors(book_vec, return_distance=True)
|
44 |
+
for i in range(0,k):
|
45 |
+
n = indices.item(i)
|
46 |
+
neighbour_ids.append(book_inv_mapper[n])
|
47 |
+
neighbour_ids.pop(0)
|
48 |
+
return distances, neighbour_ids
|