feeeper commited on
Commit
33e1108
1 Parent(s): 07ffb2c

data, train/predict script

Browse files
Files changed (5) hide show
  1. .gitignore +2 -0
  2. amazon-books-titles.csv +0 -0
  3. amazon-books.zip +3 -0
  4. app.py +58 -2
  5. titles.txt +0 -0
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ .vscode/launch.json
2
+ amazon-books.csv
amazon-books-titles.csv ADDED
The diff for this file is too large to render. See raw diff
 
amazon-books.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13ccd03c37364738fe39d04e02cf1a07dc5b907f4534846feb9044a87ed8fb57
3
+ size 9962722
app.py CHANGED
@@ -1,4 +1,60 @@
 
 
 
1
  import streamlit as st
 
 
2
 
3
- x = st.slider('Select a value')
4
- st.write(x, 'squared is', x * x)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from surprise import SVDpp
2
+ from surprise import Dataset
3
+ from surprise import Reader
4
  import streamlit as st
5
+ import pandas as pd
6
+ import pickle
7
 
8
+
9
+ def predict():
10
+ with open('./model.pkl', 'rb') as f:
11
+ model = pickle.load(f)
12
+
13
+ titles = pd.read_csv('./amazon-books-titles.csv')
14
+ predictions = []
15
+ for row in titles.iterrows():
16
+ predictions.append({'title': row[1]['title'], 'rating': model.predict(x, row[1]['asin']).est})
17
+ sorted_predictions = sorted(predictions, key=lambda p: -p['rating'])
18
+ st.write(pd.DataFrame(sorted_predictions))
19
+
20
+
21
+ def train_model():
22
+ books = pd.read_csv('./amazon-books.zip')
23
+ titles = pd.read_csv('./amazon-books-titles.csv')
24
+
25
+ current_user_book_ids = titles[titles['title'].isin([t.strip() for t in y])]['asin'].values
26
+ current_user_ratings = pd.DataFrame({
27
+ 'reviewerID': [x] * len(current_user_book_ids),
28
+ 'asin': current_user_book_ids,
29
+ 'overall': [5] * len(current_user_book_ids)
30
+ })
31
+ st.write(current_user_ratings)
32
+
33
+ books = books.append(current_user_ratings)
34
+
35
+ data = Dataset.load_from_df(books[['reviewerID', 'asin', 'overall']], Reader(line_format='user item rating', rating_scale=(1, 5)))
36
+ trainset = data.build_full_trainset()
37
+ best_params = {
38
+ 'n_epochs': 15,
39
+ 'lr_all': 0.004760245463611792,
40
+ 'reg_all': 0.40040712444861504,
41
+ 'random_state': 42
42
+ }
43
+
44
+ algo = SVDpp(**best_params)
45
+ algo.fit(trainset)
46
+
47
+ with open('./model.pkl', 'wb') as f:
48
+ pickle.dump(algo, f)
49
+
50
+
51
+ with open('./titles.txt', 'r', encoding='utf8') as f:
52
+ options = f.readlines()
53
+
54
+ x = st.text_input('uid')
55
+
56
+ y = st.multiselect('Select book', options)
57
+
58
+ btn = st.button('Submit', on_click=train_model)
59
+
60
+ btn2 = st.button('Predict', on_click=predict)
titles.txt ADDED
The diff for this file is too large to render. See raw diff