FoodDesert commited on
Commit
1e4bd6c
1 Parent(s): 90290aa

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -10
app.py CHANGED
@@ -113,20 +113,17 @@ def extract_tags(tree):
113
 
114
 
115
  # Load the model and data once at startup
116
- with h5py.File('pca_reduced_artist_data.hdf5', 'r') as f:
 
117
  vectorizer_bytes = f['vectorizer'][()].tobytes()
118
  # Use io.BytesIO to convert bytes back to a file-like object for joblib to load
119
  vectorizer_buffer = BytesIO(vectorizer_bytes)
120
  vectorizer = load(vectorizer_buffer)
121
 
122
- # Assuming you've saved the PCA mean, components, and the transformed X_artist matrix in the file
123
- pca_mean = f['pca_mean'][:]
124
- pca_components = f['pca_components'][:]
125
- X_artist_reduced = f['X_artist_reduced'][:]
126
  artist_names = [name.decode() for name in f['artist_names'][:]]
127
- # Recreate PCA transformation (not the exact PCA object but its transformation ability)
128
- def pca_transform(X):
129
- return (X - pca_mean) @ pca_components.T
130
 
131
 
132
  with h5py.File('conditional_tag_probabilities_matrix.h5', 'r') as f:
@@ -288,8 +285,8 @@ def find_similar_artists(new_tags_string, top_n, similarity_weight):
288
  ###unseen_tags = list(set(OrderedDict.fromkeys(new_image_tags)) - set(vectorizer.vocabulary_.keys())) #We may want this line again later. These are the tags that were not used to calculate the artists list.
289
  unseen_tags_data = find_similar_tags(new_image_tags, similarity_weight)
290
 
291
- X_new_image_transformed = pca_transform(vectorizer.transform([','.join(new_image_tags)]))
292
- similarities = cosine_similarity(np.asarray(X_new_image_transformed), np.asarray(X_artist_reduced))[0]
293
 
294
  top_artist_indices = np.argsort(similarities)[-top_n:][::-1]
295
  top_artists = [(artist_names[i], similarities[i]) for i in top_artist_indices]
 
113
 
114
 
115
  # Load the model and data once at startup
116
+ with h5py.File('complete_artist_data.hdf5', 'r') as f:
117
+ # Deserialize the vectorizer
118
  vectorizer_bytes = f['vectorizer'][()].tobytes()
119
  # Use io.BytesIO to convert bytes back to a file-like object for joblib to load
120
  vectorizer_buffer = BytesIO(vectorizer_bytes)
121
  vectorizer = load(vectorizer_buffer)
122
 
123
+ # Load X_artist
124
+ X_artist = f['X_artist'][:]
125
+ # Load artist names and decode to strings
 
126
  artist_names = [name.decode() for name in f['artist_names'][:]]
 
 
 
127
 
128
 
129
  with h5py.File('conditional_tag_probabilities_matrix.h5', 'r') as f:
 
285
  ###unseen_tags = list(set(OrderedDict.fromkeys(new_image_tags)) - set(vectorizer.vocabulary_.keys())) #We may want this line again later. These are the tags that were not used to calculate the artists list.
286
  unseen_tags_data = find_similar_tags(new_image_tags, similarity_weight)
287
 
288
+ X_new_image = vectorizer.transform([','.join(new_image_tags)])
289
+ similarities = cosine_similarity(X_new_image, X_artist)[0]
290
 
291
  top_artist_indices = np.argsort(similarities)[-top_n:][::-1]
292
  top_artists = [(artist_names[i], similarities[i]) for i in top_artist_indices]