nandovallec commited on
Commit
1ab13ba
·
1 Parent(s): 4da8e4d

Add dataset

Browse files
Files changed (3) hide show
  1. app.py +18 -4
  2. recommender.py +19 -4
  3. requirements.txt +1 -0
app.py CHANGED
@@ -21,14 +21,25 @@ import pandas as pd
21
  import os
22
  from scipy.sparse import vstack
23
  from recommender import *
 
 
24
 
 
 
 
 
25
 
26
- # gmaps_api_key = os.environ["GMAPS_API"]
27
- # transl_api_key= os.environ["TRANS_API"]
28
- # gmaps.configure(api_key=gmaps_api_key)
29
- # mode = "walking"
30
 
 
 
 
31
 
 
 
 
32
 
33
  def test(playlist_url, n_rec):
34
  n_rec = int(n_rec)
@@ -46,6 +57,9 @@ def test(playlist_url, n_rec):
46
  # if i % 5 == 0:
47
  # time.sleep(1)
48
  uri_links = inference_from_uri(list_uri, MAX_tid=n_rec)
 
 
 
49
  # uri_links = []
50
  frames = ""
51
  for uri_link in uri_links:
 
21
  import os
22
  from scipy.sparse import vstack
23
  from recommender import *
24
+ import huggingface_hub
25
+ from huggingface_hub import Repository
26
 
27
+ HF_TOKEN = os.environ.get("HF_TOKEN")
28
+ DATASET_REPO_URL_TRAIN = "https://huggingface.co/datasets/nandovallec/df_ps_train_extra"
29
+ DATA_FILENAME_TRAIN = "df_ps_train_extra.hdf"
30
+ DATA_FILE_TRAIN = os.path.join("data_train", DATA_FILENAME_TRAIN)
31
 
32
+ DATASET_REPO_URL_MAT = "https://huggingface.co/datasets/nandovallec/giantMatrix_extra"
33
+ DATA_FILENAME_MAT = "giantMatrix_extra.pickle"
34
+ DATA_FILE_MAT = os.path.join("data_mat", DATA_FILENAME_MAT)
 
35
 
36
+ repo_train = Repository(
37
+ local_dir="data_train", clone_from=DATASET_REPO_URL_TRAIN, use_auth_token=HF_TOKEN
38
+ )
39
 
40
+ repo_mat = Repository(
41
+ local_dir="data_mat", clone_from=DATASET_REPO_URL_MAT, use_auth_token=HF_TOKEN
42
+ )
43
 
44
  def test(playlist_url, n_rec):
45
  n_rec = int(n_rec)
 
57
  # if i % 5 == 0:
58
  # time.sleep(1)
59
  uri_links = inference_from_uri(list_uri, MAX_tid=n_rec)
60
+ commit_url = repo_train.push_to_hub()
61
+ commit_url = repo_mat.push_to_hub()
62
+
63
  # uri_links = []
64
  frames = ""
65
  for uri_link in uri_links:
recommender.py CHANGED
@@ -4,9 +4,21 @@ from scipy.sparse import csr_matrix
4
  import numpy as np
5
  import pandas as pd
6
  from scipy.sparse import vstack
 
 
 
7
 
8
 
9
 
 
 
 
 
 
 
 
 
 
10
  def add_row_train(df, list_tid):
11
  new_pid_add = df.iloc[-1].name +1
12
  list_tid_add = list_tid
@@ -28,7 +40,7 @@ def inference_row(list_tid, ps_matrix):
28
 
29
  def get_best_tid(current_list, ps_matrix_row, K=50, MAX_tid=10):
30
  df_ps_train = pd.read_hdf('model/df_ps_train_new.hdf')
31
- df_ps_train_extra = pd.read_hdf('model/df_ps_train_extra.hdf')
32
  df_ps_train = pd.concat([df_ps_train,df_ps_train_extra])
33
 
34
  sim_vector, sparse_row = inference_row(current_list, ps_matrix_row)
@@ -70,7 +82,10 @@ def get_best_tid(current_list, ps_matrix_row, K=50, MAX_tid=10):
70
  break
71
 
72
  df_ps_train_extra = add_row_train(df_ps_train_extra, current_list)
73
- df_ps_train_extra.to_hdf('model/df_ps_train_extra.hdf', key='abc')
 
 
 
74
  return new_list, sparse_row
75
 
76
 
@@ -81,7 +96,7 @@ def inference_from_tid(list_tid, K=50, MAX_tid=10):
81
  with open(pickle_path, 'rb') as f:
82
  ps_matrix = pickle.load(f)
83
 
84
- with open("model/giantMatrix_extra.pickle",'rb') as f:
85
  ps_matrix_extra = pickle.load(f)
86
 
87
  ps_matrix = vstack((ps_matrix,ps_matrix_extra))
@@ -89,7 +104,7 @@ def inference_from_tid(list_tid, K=50, MAX_tid=10):
89
  result, sparse_row = get_best_tid(list_tid, ps_matrix.tocsr(), K, MAX_tid)
90
  ps_matrix_extra = vstack((ps_matrix_extra,sparse_row.todok()))
91
 
92
- with open("model/giantMatrix_extra.pickle", 'wb') as f:
93
  pickle.dump(ps_matrix_extra, f)
94
 
95
  return result
 
4
  import numpy as np
5
  import pandas as pd
6
  from scipy.sparse import vstack
7
+ import dataset_url
8
+ import huggingface_hub
9
+ from huggingface_hub import Repository
10
 
11
 
12
 
13
+ def save_train_repo(df):
14
+ repo = Repository(
15
+ local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN
16
+ )
17
+ df.to_hdf(DATA_FILE, key='abc')
18
+ commit_url = repo.push_to_hub()
19
+ return
20
+
21
+
22
  def add_row_train(df, list_tid):
23
  new_pid_add = df.iloc[-1].name +1
24
  list_tid_add = list_tid
 
40
 
41
  def get_best_tid(current_list, ps_matrix_row, K=50, MAX_tid=10):
42
  df_ps_train = pd.read_hdf('model/df_ps_train_new.hdf')
43
+ df_ps_train_extra = pd.read_hdf('data_train/df_ps_train_extra.hdf')
44
  df_ps_train = pd.concat([df_ps_train,df_ps_train_extra])
45
 
46
  sim_vector, sparse_row = inference_row(current_list, ps_matrix_row)
 
82
  break
83
 
84
  df_ps_train_extra = add_row_train(df_ps_train_extra, current_list)
85
+
86
+
87
+ df_ps_train_extra.to_hdf('data_train/df_ps_train_extra.hdf', key='abc')
88
+
89
  return new_list, sparse_row
90
 
91
 
 
96
  with open(pickle_path, 'rb') as f:
97
  ps_matrix = pickle.load(f)
98
 
99
+ with open("data_mat/giantMatrix_extra.pickle",'rb') as f:
100
  ps_matrix_extra = pickle.load(f)
101
 
102
  ps_matrix = vstack((ps_matrix,ps_matrix_extra))
 
104
  result, sparse_row = get_best_tid(list_tid, ps_matrix.tocsr(), K, MAX_tid)
105
  ps_matrix_extra = vstack((ps_matrix_extra,sparse_row.todok()))
106
 
107
+ with open("data_mat/giantMatrix_extra.pickle", 'wb') as f:
108
  pickle.dump(ps_matrix_extra, f)
109
 
110
  return result
requirements.txt CHANGED
@@ -3,3 +3,4 @@ scikit-learn
3
  tables
4
  pandas
5
  numpy
 
 
3
  tables
4
  pandas
5
  numpy
6
+ huggingface_hub