nirajandhakal
commited on
initial commit
Browse files- .gitattributes +6 -0
- README.md +1 -0
- data/dataset/book_tags.csv +3 -0
- data/dataset/books.csv +0 -0
- data/dataset/goodbooks-10k.csv +3 -0
- data/dataset/ratings.csv +3 -0
- data/dataset/tags.csv +0 -0
- data/dataset/to_read.csv +0 -0
- inference.py +47 -0
- model.py +89 -0
- models/authors_w2v.model +3 -0
- models/recommendation_model.keras +3 -0
- models/recommendation_model_2.keras +3 -0
- models/recommendation_model_3.keras +3 -0
- models/title_w2v.model +3 -0
- tfidf_models/tfidf_model_authors.pkl +3 -0
- tfidf_models/tfidf_model_titles.pkl +3 -0
.gitattributes
CHANGED
@@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
data/dataset/book_tags.csv filter=lfs diff=lfs merge=lfs -text
|
37 |
+
data/dataset/goodbooks-10k.csv filter=lfs diff=lfs merge=lfs -text
|
38 |
+
data/dataset/ratings.csv filter=lfs diff=lfs merge=lfs -text
|
39 |
+
models/recommendation_model_2.keras filter=lfs diff=lfs merge=lfs -text
|
40 |
+
models/recommendation_model_3.keras filter=lfs diff=lfs merge=lfs -text
|
41 |
+
models/recommendation_model.keras filter=lfs diff=lfs merge=lfs -text
|
README.md
CHANGED
@@ -1,3 +1,4 @@
|
|
1 |
---
|
2 |
license: apache-2.0
|
|
|
3 |
---
|
|
|
1 |
---
|
2 |
license: apache-2.0
|
3 |
+
library: tensorflow
|
4 |
---
|
data/dataset/book_tags.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8ff2f0659be9ddbe10bf47ba33b5b69024f893f017ac665236f15bc97d862950
|
3 |
+
size 16665883
|
data/dataset/books.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/dataset/goodbooks-10k.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:78139caa6f50bce2373d5fd82e3479e24b51a715d52a5782b073b0e4fa054674
|
3 |
+
size 78103306
|
data/dataset/ratings.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1ee8a97172bd6d97147a30d9cfb3029ac240bd27ba2aa9365981c051c65113f8
|
3 |
+
size 72126826
|
data/dataset/tags.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/dataset/to_read.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
inference.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
3 |
+
|
4 |
+
from model import build_model
|
5 |
+
from utils import map_book_name_to_id
|
6 |
+
|
7 |
+
# Load the saved model for inference
|
8 |
+
loaded_model = build_model(
|
9 |
+
num_users=len(ratings["user_id"].unique()),
|
10 |
+
num_books=len(ratings["book_id"].unique()),
|
11 |
+
)
|
12 |
+
loaded_model.load_weights("recommendation_model.h5")
|
13 |
+
|
14 |
+
|
15 |
+
# Function to recommend books for a user based on input book name or author name
|
16 |
+
def recommend_books_for_user(input_name, model, num_recommendations=10):
|
17 |
+
"""
|
18 |
+
Recommend books for a user based on input book name or author name.
|
19 |
+
|
20 |
+
Args:
|
21 |
+
input_name (str): The input book name or author name.
|
22 |
+
model: The trained recommendation model.
|
23 |
+
num_recommendations (int): The number of books to recommend.
|
24 |
+
|
25 |
+
Returns:
|
26 |
+
tuple: A tuple containing the recommended book names and their similarity scores.
|
27 |
+
"""
|
28 |
+
# Check if input_name is a book name or author name
|
29 |
+
is_author = input_name.lower() in books["authors"].str.lower().values
|
30 |
+
|
31 |
+
# Rest of the code...
|
32 |
+
|
33 |
+
|
34 |
+
# Recommend books for a user based on input name along with similarity score.
|
35 |
+
|
36 |
+
input_name = "Harry Potter and the Sorcerer's Stone"
|
37 |
+
recommended_books, similarity_scores = recommend_books_for_user(
|
38 |
+
input_name, loaded_model
|
39 |
+
)
|
40 |
+
|
41 |
+
if recommended_books is not None:
|
42 |
+
print("Recommended Books:")
|
43 |
+
print("------------------")
|
44 |
+
for book, score in zip(recommended_books, similarity_scores):
|
45 |
+
print(f"{book:<60} {score:.4f}")
|
46 |
+
else:
|
47 |
+
print("No recommendations found.")
|
model.py
ADDED
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import numpy as np
|
3 |
+
from sklearn.model_selection import train_test_split
|
4 |
+
from sklearn.preprocessing import LabelEncoder
|
5 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
6 |
+
import matplotlib.pyplot as plt
|
7 |
+
from tensorflow.keras.models import Model
|
8 |
+
from tensorflow.keras.layers import Input, Embedding, Flatten, concatenate, Dense
|
9 |
+
from tensorflow.keras.optimizers import Adam
|
10 |
+
|
11 |
+
# Load datasets
|
12 |
+
books = pd.read_csv("../data/dataset/books.csv")
|
13 |
+
ratings = pd.read_csv("../data/dataset/ratings.csv")
|
14 |
+
|
15 |
+
# Preprocess data
|
16 |
+
user_encoder = LabelEncoder()
|
17 |
+
book_encoder = LabelEncoder()
|
18 |
+
|
19 |
+
ratings["user_id"] = user_encoder.fit_transform(ratings["user_id"])
|
20 |
+
ratings["book_id"] = book_encoder.fit_transform(ratings["book_id"])
|
21 |
+
|
22 |
+
# Split the data into training and testing sets
|
23 |
+
train, test = train_test_split(ratings, test_size=0.2, random_state=42)
|
24 |
+
|
25 |
+
|
26 |
+
# Define the neural network model
|
27 |
+
def build_model(num_users, num_books, embedding_size=50):
|
28 |
+
"""
|
29 |
+
Build a recommendation model.
|
30 |
+
|
31 |
+
Args:
|
32 |
+
num_users (int): The number of users in the dataset.
|
33 |
+
num_books (int): The number of books in the dataset.
|
34 |
+
embedding_size (int, optional): The size of the embedding vectors. Defaults to 50.
|
35 |
+
|
36 |
+
Returns:
|
37 |
+
keras.Model: The compiled recommendation model.
|
38 |
+
"""
|
39 |
+
user_input = Input(shape=(1,))
|
40 |
+
book_input = Input(shape=(1,))
|
41 |
+
|
42 |
+
user_embedding = Embedding(input_dim=num_users, output_dim=embedding_size)(
|
43 |
+
user_input
|
44 |
+
)
|
45 |
+
book_embedding = Embedding(input_dim=num_books, output_dim=embedding_size)(
|
46 |
+
book_input
|
47 |
+
)
|
48 |
+
|
49 |
+
user_flat = Flatten()(user_embedding)
|
50 |
+
book_flat = Flatten()(book_embedding)
|
51 |
+
|
52 |
+
merged = concatenate([user_flat, book_flat])
|
53 |
+
dense1 = Dense(128, activation="relu")(merged)
|
54 |
+
output = Dense(1)(dense1)
|
55 |
+
|
56 |
+
model = Model(inputs=[user_input, book_input], outputs=output)
|
57 |
+
model.compile(loss="mean_squared_error", optimizer=Adam(learning_rate=0.001))
|
58 |
+
|
59 |
+
return model
|
60 |
+
|
61 |
+
|
62 |
+
# Train the model
|
63 |
+
model = build_model(
|
64 |
+
num_users=len(ratings["user_id"].unique()),
|
65 |
+
num_books=len(ratings["book_id"].unique()),
|
66 |
+
)
|
67 |
+
history = model.fit(
|
68 |
+
[train["user_id"], train["book_id"]],
|
69 |
+
train["rating"],
|
70 |
+
epochs=5,
|
71 |
+
batch_size=128,
|
72 |
+
validation_split=0.1,
|
73 |
+
)
|
74 |
+
|
75 |
+
# Plot training and validation loss
|
76 |
+
plt.figure(figsize=(12, 6))
|
77 |
+
plt.plot(history.history["loss"], label="Training Loss")
|
78 |
+
plt.plot(history.history["val_loss"], label="Validation Loss")
|
79 |
+
plt.xlabel("Epoch")
|
80 |
+
plt.ylabel("Loss")
|
81 |
+
plt.legend()
|
82 |
+
plt.show()
|
83 |
+
|
84 |
+
# Save the model
|
85 |
+
model.save("recommendation_model.h5")
|
86 |
+
|
87 |
+
# Evaluate the model
|
88 |
+
test_loss = model.evaluate([test["user_id"], test["book_id"]], test["rating"])
|
89 |
+
print(f"Test Loss: {test_loss}")
|
models/authors_w2v.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:254a7bb6b32780bbc3df2575c65fad32042738af828cf11b634f5bc9066f817d
|
3 |
+
size 4978284
|
models/recommendation_model.keras
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b04908f5db3844db15d02c6142c9bb27cd43cd4a0cca67dc0edf91c7a5c94e06
|
3 |
+
size 38266810
|
models/recommendation_model_2.keras
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a605732f83df3df5f30d0ff20375bb873b3366e9d89123d3ef39749299cb6eae
|
3 |
+
size 38266842
|
models/recommendation_model_3.keras
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:813736e08ec0713d5b7e2bdbd6cbbb88ab2b7e4bd33ae540da81267d4cc5178d
|
3 |
+
size 38248024
|
models/title_w2v.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a05d399ab55578c046a190d2d3015bcfc36fc0e7289f09c257eb17c0e78035ce
|
3 |
+
size 6747050
|
tfidf_models/tfidf_model_authors.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ace0fd2a9e5165c218e4d211206596fbe0798a8f9977af5981b95f76ffce30ea
|
3 |
+
size 170167
|
tfidf_models/tfidf_model_titles.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:71e93e399430ea1ba19833d5b5dc4172a8bdf5e5cec23fc2efabddec52974b37
|
3 |
+
size 233964
|