FoodDesert commited on
Commit
72cd75e
1 Parent(s): 22f7149

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +28 -7
  2. word_rating_probabilities.csv +0 -0
app.py CHANGED
@@ -22,10 +22,10 @@ faq_content="""
22
 
23
  ## What is the purpose of this tool?
24
 
25
- When you enter a txt2img prompt prompt and press the "submit" button, the Tagset Completer parses your prompt and checks that all your tags are valid e621 tags.
26
  If it finds any that are not, it recommends some valid e621 tags you can use to replace them in the "Unseen Tags" table.
27
  Additionally, in the "Top Artists" text box, it lists the artists who would most likely draw an image having the set of tags you provided,
28
- in case you want to look them up to get more ideas.
29
 
30
  ## Does input order matter?
31
 
@@ -59,7 +59,8 @@ So for example, the query "red fox, red fox, red fox, score:7" will yield a list
59
  than the query "red fox, score:7".
60
 
61
  ## Why is this space tagged "not-for-all-audience"
62
- The "not-for-all-audience" tag informs users that this tool's text output is derived from e621.net data for tag prediction and completion. This measure underscores a commitment to responsible content sharing.
 
63
 
64
  ## How is the artist list calculated?
65
 
@@ -88,6 +89,8 @@ A similarity weight slider value of 0 means that only the FastText model's predi
88
  """
89
 
90
 
 
 
91
  grammar=r"""
92
  !start: (prompt | /[][():]/+)*
93
  prompt: (emphasized | plain | comma | WHITESPACE)*
@@ -154,6 +157,19 @@ with h5py.File('conditional_tag_probabilities_matrix.h5', 'r') as f:
154
  conditional_smoothing = 100. / conditional_doc_count
155
 
156
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  def clean_tag(tag):
158
  return ''.join(char for char in tag if ord(char) < 128)
159
 
@@ -219,7 +235,7 @@ def geometric_mean_given_words(target_word, context_words, co_occurrence_matrix,
219
  return geometric_mean
220
 
221
 
222
- def find_similar_tags(test_tags, similarity_weight):
223
 
224
  #Initialize stuff
225
  if not hasattr(find_similar_tags, "fasttext_small_model"):
@@ -261,6 +277,10 @@ def find_similar_tags(test_tags, similarity_weight):
261
  result.append((similar_tag.replace('_', ' '), round(similarity, 3)))
262
  seen.add(similar_tag)
263
 
 
 
 
 
264
  #Adjust score based on context
265
  for i in range(len(result)):
266
  word, score = result[i] # Unpack the tuple
@@ -284,7 +304,7 @@ def find_similar_tags(test_tags, similarity_weight):
284
 
285
  return results_data # Return list of lists for Dataframe
286
 
287
- def find_similar_artists(new_tags_string, top_n, similarity_weight):
288
  try:
289
  new_tags_string = new_tags_string.lower()
290
  new_tags_string, removed_tags = remove_special_tags(new_tags_string)
@@ -296,7 +316,7 @@ def find_similar_artists(new_tags_string, top_n, similarity_weight):
296
  new_image_tags = [tag.replace('_', ' ').replace('\\(', '(').replace('\\)', ')').strip() for tag in new_image_tags]
297
 
298
  ###unseen_tags = list(set(OrderedDict.fromkeys(new_image_tags)) - set(vectorizer.vocabulary_.keys())) #We may want this line again later. These are the tags that were not used to calculate the artists list.
299
- unseen_tags_data = find_similar_tags(new_image_tags, similarity_weight)
300
 
301
  X_new_image = vectorizer.transform([','.join(new_image_tags + removed_tags)])
302
  similarities = cosine_similarity(X_new_image, X_artist)[0]
@@ -317,7 +337,8 @@ iface = gr.Interface(
317
  inputs=[
318
  gr.Textbox(label="Enter image tags", placeholder="e.g. fox, outside, detailed background, ..."),
319
  gr.Slider(minimum=1, maximum=100, value=10, step=1, label="Number of artists"),
320
- gr.Slider(minimum=0, maximum=1, value=0.5, step=0.1, label="Similarity weight")
 
321
  ],
322
  outputs=[
323
  gr.Dataframe(label="Unseen Tags", headers=["Tag", "Similar Tags", "Similarity"]),
 
22
 
23
  ## What is the purpose of this tool?
24
 
25
+ When you enter a txt2img prompt and press the "submit" button, the Tagset Completer parses your prompt and checks that all your tags are valid e621 tags.
26
  If it finds any that are not, it recommends some valid e621 tags you can use to replace them in the "Unseen Tags" table.
27
  Additionally, in the "Top Artists" text box, it lists the artists who would most likely draw an image having the set of tags you provided,
28
+ in case you want to look them up to get more ideas. This is useful to align your prompt with the expected input to an e621-trained model.
29
 
30
  ## Does input order matter?
31
 
 
59
  than the query "red fox, score:7".
60
 
61
  ## Why is this space tagged "not-for-all-audience"
62
+ The "not-for-all-audience" tag informs users that this tool's text output is derived from e621.net data for tag prediction and completion.
63
+ The app will try not to display nsfw tags unless the "Allow NSFW Tags" is checked, but the filter is not perfect.
64
 
65
  ## How is the artist list calculated?
66
 
 
89
  """
90
 
91
 
92
+ nsfw_threshold = 0.95 # Assuming the threshold value is defined here
93
+
94
  grammar=r"""
95
  !start: (prompt | /[][():]/+)*
96
  prompt: (emphasized | plain | comma | WHITESPACE)*
 
157
  conditional_smoothing = 100. / conditional_doc_count
158
 
159
 
160
+ nsfw_tags = set() # Initialize an empty set to store words meeting the threshold
161
+ # Open and read the CSV file
162
+ with open("word_rating_probabilities.csv", 'r', newline='', encoding='utf-8') as csvfile:
163
+ reader = csv.reader(csvfile)
164
+ next(reader, None) # Skip the header row
165
+ for row in reader:
166
+ word = row[0] # The word is in the first column
167
+ probability_sum = float(row[1]) # The sum of probabilities is in the second column, convert to float for comparison
168
+ # Check if the probability sum meets the threshold and add the word to the set if it does
169
+ if probability_sum >= nsfw_threshold:
170
+ nsfw_tags.add(word)
171
+
172
+
173
  def clean_tag(tag):
174
  return ''.join(char for char in tag if ord(char) < 128)
175
 
 
235
  return geometric_mean
236
 
237
 
238
+ def find_similar_tags(test_tags, similarity_weight, allow_nsfw_tags):
239
 
240
  #Initialize stuff
241
  if not hasattr(find_similar_tags, "fasttext_small_model"):
 
277
  result.append((similar_tag.replace('_', ' '), round(similarity, 3)))
278
  seen.add(similar_tag)
279
 
280
+ #Remove NSFW tags if appropriate.
281
+ if not allow_nsfw_tags:
282
+ result = [(word, score) for word, score in result if word.replace(' ','_') not in nsfw_tags]
283
+
284
  #Adjust score based on context
285
  for i in range(len(result)):
286
  word, score = result[i] # Unpack the tuple
 
304
 
305
  return results_data # Return list of lists for Dataframe
306
 
307
+ def find_similar_artists(new_tags_string, top_n, similarity_weight, allow_nsfw_tags):
308
  try:
309
  new_tags_string = new_tags_string.lower()
310
  new_tags_string, removed_tags = remove_special_tags(new_tags_string)
 
316
  new_image_tags = [tag.replace('_', ' ').replace('\\(', '(').replace('\\)', ')').strip() for tag in new_image_tags]
317
 
318
  ###unseen_tags = list(set(OrderedDict.fromkeys(new_image_tags)) - set(vectorizer.vocabulary_.keys())) #We may want this line again later. These are the tags that were not used to calculate the artists list.
319
+ unseen_tags_data = find_similar_tags(new_image_tags, similarity_weight, allow_nsfw_tags)
320
 
321
  X_new_image = vectorizer.transform([','.join(new_image_tags + removed_tags)])
322
  similarities = cosine_similarity(X_new_image, X_artist)[0]
 
337
  inputs=[
338
  gr.Textbox(label="Enter image tags", placeholder="e.g. fox, outside, detailed background, ..."),
339
  gr.Slider(minimum=1, maximum=100, value=10, step=1, label="Number of artists"),
340
+ gr.Slider(minimum=0, maximum=1, value=0.5, step=0.1, label="Similarity weight"),
341
+ gr.Checkbox(label="Allow NSFW Tags", value=False)
342
  ],
343
  outputs=[
344
  gr.Dataframe(label="Unseen Tags", headers=["Tag", "Similar Tags", "Similarity"]),
word_rating_probabilities.csv ADDED
The diff for this file is too large to render. See raw diff