Nechba commited on
Commit
ceee38b
·
verified ·
1 Parent(s): 93a32e5

Upload 5 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ collaborative_books_df.csv filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from utils import *
4
+
5
+ # Assuming data is loaded and matrices are prepared as discussed
6
+ def load_data():
7
+ ratings = pd.read_csv('./data/collaborative_books_df.csv', index_col=0)
8
+ books = pd.read_csv('./data/collaborative_book_metadata.csv', index_col=0)
9
+ # book_titles=pd.read_csv('./data/book_titles.csv', index_col=0)
10
+ # book_titles = book_titles.reset_index()
11
+
12
+ # Merge data
13
+ ratings = ratings.merge(books, on='book_id')
14
+ book_titles = dict(zip(ratings['book_id'], ratings['title_x']))
15
+ return ratings, books,book_titles
16
+
17
+ def initialize_session_state():
18
+ if "ratings" not in st.session_state:
19
+ st.session_state.ratings, st.session_state.books, st.session_state.book_titles = load_data()
20
+ st.session_state.X, st.session_state.user_mapper, st.session_state.book_mapper, st.session_state.user_inv_mapper, st.session_state.book_inv_mapper = create_matrix(st.session_state.ratings)
21
+ st.session_state.book_id_mapping = pd.Series( st.session_state.books.book_id.values, index= st.session_state.books.title).to_dict()
22
+
23
+ initialize_session_state()
24
+ # Streamlit interface for book recommendation
25
+ st.title('Book Recommender System')
26
+
27
+ # User inputs
28
+ title_input = st.selectbox('Select or type a book title', st.session_state.books['title'].unique())
29
+ k_input = st.number_input('How many recommendations do you want?', min_value=1, max_value=20, value=5)
30
+
31
+ if st.button('Find Similar Books'):
32
+
33
+ if title_input in st.session_state.book_id_mapping:
34
+ book_id = st.session_state.book_id_mapping[title_input]
35
+ distances, similar_ids = find_similar_books(book_id, st.session_state.X, k=k_input,book_mapper= st.session_state.book_mapper,book_inv_mapper= st.session_state.book_inv_mapper)
36
+ similar_books = pd.DataFrame({
37
+ 'Book Title': [ st.session_state.book_titles[ids] for ids in similar_ids],
38
+ 'Distance': distances[0][1:]
39
+ })
40
+
41
+ st.write(f"Books similar to {title_input}:")
42
+ st.dataframe(similar_books.sort_values(by='Distance', ascending=True))
43
+ else:
44
+ st.error("Book title not found. Please check the spelling or try another title.")
45
+
46
+
47
+
collaborative_book_metadata.csv ADDED
The diff for this file is too large to render. See raw diff
 
collaborative_books_df.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9903754ef7733b97016246714ef00c8b898fdae7a88a686637133d3b97c0751
3
+ size 11243089
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ pandas==1.4.3
2
+ numpy==1.23.1
3
+ scipy==1.8.1
4
+ scikit-learn==1.1.1
5
+ streamlit==1.10.0
utils.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from scipy.sparse import csr_matrix
4
+ from sklearn.neighbors import NearestNeighbors
5
+
6
+ # Define create_matrix function
7
+ def create_matrix(df):
8
+ # Adapt the function to match your DataFrame's column names and structure
9
+ N = len(df['user_id_mapping'].unique()) # Adjusted to 'user_id_mapping'
10
+ M = len(df['book_id'].unique()) # Adjusted to 'book_id'
11
+
12
+ # Map IDs to indices
13
+ user_mapper = dict(zip(np.unique(df["user_id_mapping"]), list(range(N))))
14
+ book_mapper = dict(zip(np.unique(df["book_id"]), list(range(M))))
15
+
16
+ # Map indices to IDs
17
+ user_inv_mapper = dict(zip(list(range(N)), np.unique(df["user_id_mapping"])))
18
+ book_inv_mapper = dict(zip(list(range(M)), np.unique(df["book_id"])))
19
+
20
+ user_index = [user_mapper[i] for i in df['user_id_mapping']] # Adjusted to 'user_id_mapping'
21
+ book_index = [book_mapper[i] for i in df['book_id']] # Adjusted to 'book_id'
22
+
23
+ # Use 'Actual Rating' for the matrix values; corrected data handling for sparse matrix creation
24
+ X = csr_matrix((df["Actual Rating"], (book_index, user_index)), shape=(M, N))
25
+
26
+ return X, user_mapper, book_mapper, user_inv_mapper, book_inv_mapper
27
+
28
+
29
+ """
30
+ Find similar books using KNN
31
+ """
32
+ # Define find_similar_books function - Source - https://www.geeksforgeeks.org/recommendation-system-in-python/?ref=rp
33
+ def find_similar_books(book_id, X, k,book_mapper,book_inv_mapper, metric='cosine'):
34
+
35
+ neighbour_ids = []
36
+
37
+ book_ind = book_mapper[book_id]
38
+ book_vec = X[book_ind]
39
+ k+=1
40
+ kNN = NearestNeighbors(n_neighbors=k, algorithm="brute", metric=metric)
41
+ kNN.fit(X)
42
+ book_vec = book_vec.reshape(1,-1)
43
+ distances, indices = kNN.kneighbors(book_vec, return_distance=True)
44
+ for i in range(0,k):
45
+ n = indices.item(i)
46
+ neighbour_ids.append(book_inv_mapper[n])
47
+ neighbour_ids.pop(0)
48
+ return distances, neighbour_ids