Spaces:

FoodDesert
/

Prompt_Squirrel

Running

App Files Files Community

FoodDesert commited on Mar 5, 2024

Commit

1e4bd6c

verified ·

1 Parent(s): 90290aa

Upload app.py

Browse files

Files changed (1) hide show

app.py +7 -10

app.py CHANGED Viewed

@@ -113,20 +113,17 @@ def extract_tags(tree):
 # Load the model and data once at startup
-with h5py.File('pca_reduced_artist_data.hdf5', 'r') as f:
     vectorizer_bytes = f['vectorizer'][()].tobytes()
     # Use io.BytesIO to convert bytes back to a file-like object for joblib to load
     vectorizer_buffer = BytesIO(vectorizer_bytes)
     vectorizer = load(vectorizer_buffer)
-    # Assuming you've saved the PCA mean, components, and the transformed X_artist matrix in the file
-    pca_mean = f['pca_mean'][:]
-    pca_components = f['pca_components'][:]
-    X_artist_reduced = f['X_artist_reduced'][:]
     artist_names = [name.decode() for name in f['artist_names'][:]]
-    # Recreate PCA transformation (not the exact PCA object but its transformation ability)
-    def pca_transform(X):
-        return (X - pca_mean) @ pca_components.T
 with h5py.File('conditional_tag_probabilities_matrix.h5', 'r') as f:
@@ -288,8 +285,8 @@ def find_similar_artists(new_tags_string, top_n, similarity_weight):
         ###unseen_tags = list(set(OrderedDict.fromkeys(new_image_tags)) - set(vectorizer.vocabulary_.keys()))   #We may want this line again later.  These are the tags that were not used to calculate the artists list.
         unseen_tags_data = find_similar_tags(new_image_tags, similarity_weight)
-        X_new_image_transformed = pca_transform(vectorizer.transform([','.join(new_image_tags)]))
-        similarities = cosine_similarity(np.asarray(X_new_image_transformed), np.asarray(X_artist_reduced))[0]
         top_artist_indices = np.argsort(similarities)[-top_n:][::-1]
         top_artists = [(artist_names[i], similarities[i]) for i in top_artist_indices]

 # Load the model and data once at startup
+with h5py.File('complete_artist_data.hdf5', 'r') as f:
+    # Deserialize the vectorizer
     vectorizer_bytes = f['vectorizer'][()].tobytes()
     # Use io.BytesIO to convert bytes back to a file-like object for joblib to load
     vectorizer_buffer = BytesIO(vectorizer_bytes)
     vectorizer = load(vectorizer_buffer)
+    # Load X_artist
+    X_artist = f['X_artist'][:]
+    # Load artist names and decode to strings
     artist_names = [name.decode() for name in f['artist_names'][:]]
 with h5py.File('conditional_tag_probabilities_matrix.h5', 'r') as f:
         ###unseen_tags = list(set(OrderedDict.fromkeys(new_image_tags)) - set(vectorizer.vocabulary_.keys()))   #We may want this line again later.  These are the tags that were not used to calculate the artists list.
         unseen_tags_data = find_similar_tags(new_image_tags, similarity_weight)
+        X_new_image = vectorizer.transform([','.join(new_image_tags)])
+        similarities = cosine_similarity(X_new_image, X_artist)[0]
         top_artist_indices = np.argsort(similarities)[-top_n:][::-1]
         top_artists = [(artist_names[i], similarities[i]) for i in top_artist_indices]