nirajandhakal commited on
Commit
350eabd
1 Parent(s): 834c95e

initial commit

Browse files
.gitattributes CHANGED
@@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ data/dataset/book_tags.csv filter=lfs diff=lfs merge=lfs -text
37
+ data/dataset/goodbooks-10k.csv filter=lfs diff=lfs merge=lfs -text
38
+ data/dataset/ratings.csv filter=lfs diff=lfs merge=lfs -text
39
+ models/recommendation_model_2.keras filter=lfs diff=lfs merge=lfs -text
40
+ models/recommendation_model_3.keras filter=lfs diff=lfs merge=lfs -text
41
+ models/recommendation_model.keras filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,3 +1,4 @@
1
  ---
2
  license: apache-2.0
 
3
  ---
 
1
  ---
2
  license: apache-2.0
3
+ library: tensorflow
4
  ---
data/dataset/book_tags.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ff2f0659be9ddbe10bf47ba33b5b69024f893f017ac665236f15bc97d862950
3
+ size 16665883
data/dataset/books.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/dataset/goodbooks-10k.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78139caa6f50bce2373d5fd82e3479e24b51a715d52a5782b073b0e4fa054674
3
+ size 78103306
data/dataset/ratings.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ee8a97172bd6d97147a30d9cfb3029ac240bd27ba2aa9365981c051c65113f8
3
+ size 72126826
data/dataset/tags.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/dataset/to_read.csv ADDED
The diff for this file is too large to render. See raw diff
 
inference.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from sklearn.metrics.pairwise import cosine_similarity
3
+
4
+ from model import build_model
5
+ from utils import map_book_name_to_id
6
+
7
+ # Load the saved model for inference
8
+ loaded_model = build_model(
9
+ num_users=len(ratings["user_id"].unique()),
10
+ num_books=len(ratings["book_id"].unique()),
11
+ )
12
+ loaded_model.load_weights("recommendation_model.h5")
13
+
14
+
15
+ # Function to recommend books for a user based on input book name or author name
16
+ def recommend_books_for_user(input_name, model, num_recommendations=10):
17
+ """
18
+ Recommend books for a user based on input book name or author name.
19
+
20
+ Args:
21
+ input_name (str): The input book name or author name.
22
+ model: The trained recommendation model.
23
+ num_recommendations (int): The number of books to recommend.
24
+
25
+ Returns:
26
+ tuple: A tuple containing the recommended book names and their similarity scores.
27
+ """
28
+ # Check if input_name is a book name or author name
29
+ is_author = input_name.lower() in books["authors"].str.lower().values
30
+
31
+ # Rest of the code...
32
+
33
+
34
+ # Recommend books for a user based on input name along with similarity score.
35
+
36
+ input_name = "Harry Potter and the Sorcerer's Stone"
37
+ recommended_books, similarity_scores = recommend_books_for_user(
38
+ input_name, loaded_model
39
+ )
40
+
41
+ if recommended_books is not None:
42
+ print("Recommended Books:")
43
+ print("------------------")
44
+ for book, score in zip(recommended_books, similarity_scores):
45
+ print(f"{book:<60} {score:.4f}")
46
+ else:
47
+ print("No recommendations found.")
model.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from sklearn.model_selection import train_test_split
4
+ from sklearn.preprocessing import LabelEncoder
5
+ from sklearn.metrics.pairwise import cosine_similarity
6
+ import matplotlib.pyplot as plt
7
+ from tensorflow.keras.models import Model
8
+ from tensorflow.keras.layers import Input, Embedding, Flatten, concatenate, Dense
9
+ from tensorflow.keras.optimizers import Adam
10
+
11
+ # Load datasets
12
+ books = pd.read_csv("../data/dataset/books.csv")
13
+ ratings = pd.read_csv("../data/dataset/ratings.csv")
14
+
15
+ # Preprocess data
16
+ user_encoder = LabelEncoder()
17
+ book_encoder = LabelEncoder()
18
+
19
+ ratings["user_id"] = user_encoder.fit_transform(ratings["user_id"])
20
+ ratings["book_id"] = book_encoder.fit_transform(ratings["book_id"])
21
+
22
+ # Split the data into training and testing sets
23
+ train, test = train_test_split(ratings, test_size=0.2, random_state=42)
24
+
25
+
26
+ # Define the neural network model
27
+ def build_model(num_users, num_books, embedding_size=50):
28
+ """
29
+ Build a recommendation model.
30
+
31
+ Args:
32
+ num_users (int): The number of users in the dataset.
33
+ num_books (int): The number of books in the dataset.
34
+ embedding_size (int, optional): The size of the embedding vectors. Defaults to 50.
35
+
36
+ Returns:
37
+ keras.Model: The compiled recommendation model.
38
+ """
39
+ user_input = Input(shape=(1,))
40
+ book_input = Input(shape=(1,))
41
+
42
+ user_embedding = Embedding(input_dim=num_users, output_dim=embedding_size)(
43
+ user_input
44
+ )
45
+ book_embedding = Embedding(input_dim=num_books, output_dim=embedding_size)(
46
+ book_input
47
+ )
48
+
49
+ user_flat = Flatten()(user_embedding)
50
+ book_flat = Flatten()(book_embedding)
51
+
52
+ merged = concatenate([user_flat, book_flat])
53
+ dense1 = Dense(128, activation="relu")(merged)
54
+ output = Dense(1)(dense1)
55
+
56
+ model = Model(inputs=[user_input, book_input], outputs=output)
57
+ model.compile(loss="mean_squared_error", optimizer=Adam(learning_rate=0.001))
58
+
59
+ return model
60
+
61
+
62
+ # Train the model
63
+ model = build_model(
64
+ num_users=len(ratings["user_id"].unique()),
65
+ num_books=len(ratings["book_id"].unique()),
66
+ )
67
+ history = model.fit(
68
+ [train["user_id"], train["book_id"]],
69
+ train["rating"],
70
+ epochs=5,
71
+ batch_size=128,
72
+ validation_split=0.1,
73
+ )
74
+
75
+ # Plot training and validation loss
76
+ plt.figure(figsize=(12, 6))
77
+ plt.plot(history.history["loss"], label="Training Loss")
78
+ plt.plot(history.history["val_loss"], label="Validation Loss")
79
+ plt.xlabel("Epoch")
80
+ plt.ylabel("Loss")
81
+ plt.legend()
82
+ plt.show()
83
+
84
+ # Save the model
85
+ model.save("recommendation_model.h5")
86
+
87
+ # Evaluate the model
88
+ test_loss = model.evaluate([test["user_id"], test["book_id"]], test["rating"])
89
+ print(f"Test Loss: {test_loss}")
models/authors_w2v.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:254a7bb6b32780bbc3df2575c65fad32042738af828cf11b634f5bc9066f817d
3
+ size 4978284
models/recommendation_model.keras ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b04908f5db3844db15d02c6142c9bb27cd43cd4a0cca67dc0edf91c7a5c94e06
3
+ size 38266810
models/recommendation_model_2.keras ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a605732f83df3df5f30d0ff20375bb873b3366e9d89123d3ef39749299cb6eae
3
+ size 38266842
models/recommendation_model_3.keras ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:813736e08ec0713d5b7e2bdbd6cbbb88ab2b7e4bd33ae540da81267d4cc5178d
3
+ size 38248024
models/title_w2v.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a05d399ab55578c046a190d2d3015bcfc36fc0e7289f09c257eb17c0e78035ce
3
+ size 6747050
tfidf_models/tfidf_model_authors.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ace0fd2a9e5165c218e4d211206596fbe0798a8f9977af5981b95f76ffce30ea
3
+ size 170167
tfidf_models/tfidf_model_titles.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71e93e399430ea1ba19833d5b5dc4172a8bdf5e5cec23fc2efabddec52974b37
3
+ size 233964