FoodDesert commited on
Commit
22f7149
1 Parent(s): 3f3bfef

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -5
app.py CHANGED
@@ -101,10 +101,6 @@ plain: /([^,\\\[\]():|]|\\.)+/
101
  # Initialize the parser
102
  parser = Lark(grammar, start='start')
103
 
104
-
105
- special_tags = ["score:0", "score:1", "score:2", "score:3", "score:4", "score:5", "score:6", "score:7", "score:8", "score:9"]
106
-
107
-
108
  # Function to extract tags
109
  def extract_tags(tree):
110
  tags = []
@@ -118,6 +114,14 @@ def extract_tags(tree):
118
  _traverse(tree)
119
  return tags
120
 
 
 
 
 
 
 
 
 
121
 
122
  # Load the model and data once at startup
123
  with h5py.File('complete_artist_data.hdf5', 'r') as f:
@@ -283,6 +287,8 @@ def find_similar_tags(test_tags, similarity_weight):
283
  def find_similar_artists(new_tags_string, top_n, similarity_weight):
284
  try:
285
  new_tags_string = new_tags_string.lower()
 
 
286
  # Parse the prompt
287
  parsed = parser.parse(new_tags_string)
288
  # Extract tags from the parsed tree
@@ -292,7 +298,7 @@ def find_similar_artists(new_tags_string, top_n, similarity_weight):
292
  ###unseen_tags = list(set(OrderedDict.fromkeys(new_image_tags)) - set(vectorizer.vocabulary_.keys())) #We may want this line again later. These are the tags that were not used to calculate the artists list.
293
  unseen_tags_data = find_similar_tags(new_image_tags, similarity_weight)
294
 
295
- X_new_image = vectorizer.transform([','.join(new_image_tags)])
296
  similarities = cosine_similarity(X_new_image, X_artist)[0]
297
 
298
  top_artist_indices = np.argsort(similarities)[-top_n:][::-1]
 
101
  # Initialize the parser
102
  parser = Lark(grammar, start='start')
103
 
 
 
 
 
104
  # Function to extract tags
105
  def extract_tags(tree):
106
  tags = []
 
114
  _traverse(tree)
115
  return tags
116
 
117
+
118
+ special_tags = ["score:0", "score:1", "score:2", "score:3", "score:4", "score:5", "score:6", "score:7", "score:8", "score:9"]
119
+ def remove_special_tags(original_string):
120
+ tags = [tag.strip() for tag in original_string.split(",")]
121
+ remaining_tags = [tag for tag in tags if tag not in special_tags]
122
+ removed_tags = [tag for tag in tags if tag in special_tags]
123
+ return ", ".join(remaining_tags), removed_tags
124
+
125
 
126
  # Load the model and data once at startup
127
  with h5py.File('complete_artist_data.hdf5', 'r') as f:
 
287
  def find_similar_artists(new_tags_string, top_n, similarity_weight):
288
  try:
289
  new_tags_string = new_tags_string.lower()
290
+ new_tags_string, removed_tags = remove_special_tags(new_tags_string)
291
+
292
  # Parse the prompt
293
  parsed = parser.parse(new_tags_string)
294
  # Extract tags from the parsed tree
 
298
  ###unseen_tags = list(set(OrderedDict.fromkeys(new_image_tags)) - set(vectorizer.vocabulary_.keys())) #We may want this line again later. These are the tags that were not used to calculate the artists list.
299
  unseen_tags_data = find_similar_tags(new_image_tags, similarity_weight)
300
 
301
+ X_new_image = vectorizer.transform([','.join(new_image_tags + removed_tags)])
302
  similarities = cosine_similarity(X_new_image, X_artist)[0]
303
 
304
  top_artist_indices = np.argsort(similarities)[-top_n:][::-1]