Spaces:

FoodDesert
/

Prompt_Squirrel

Running

App Files Files Community

FoodDesert commited on Mar 11, 2024

Commit

72cd75e

verified ·

1 Parent(s): 22f7149

Upload 2 files

Browse files

Files changed (2) hide show

app.py +28 -7
word_rating_probabilities.csv +0 -0

app.py CHANGED Viewed

@@ -22,10 +22,10 @@ faq_content="""
 ## What is the purpose of this tool?
-When you enter a txt2img prompt prompt and press the "submit" button, the Tagset Completer parses your prompt and checks that all your tags are valid e621 tags.
 If it finds any that are not, it recommends some valid e621 tags you can use to replace them in the "Unseen Tags" table.
 Additionally, in the "Top Artists" text box, it lists the artists who would most likely draw an image having the set of tags you provided,
-in case you want to look them up to get more ideas.
 ## Does input order matter?
@@ -59,7 +59,8 @@ So for example, the query "red fox, red fox, red fox, score:7" will yield a list
 than the query "red fox, score:7".
 ## Why is this space tagged "not-for-all-audience"
-The "not-for-all-audience" tag informs users that this tool's text output is derived from e621.net data for tag prediction and completion.  This measure underscores a commitment to responsible content sharing.
 ## How is the artist list calculated?
@@ -88,6 +89,8 @@ A similarity weight slider value of 0 means that only the FastText model's predi
 """
 grammar=r"""
 !start: (prompt | /[][():]/+)*
 prompt: (emphasized | plain | comma | WHITESPACE)*
@@ -154,6 +157,19 @@ with h5py.File('conditional_tag_probabilities_matrix.h5', 'r') as f:
     conditional_smoothing = 100. / conditional_doc_count
 def clean_tag(tag):
     return ''.join(char for char in tag if ord(char) < 128)
@@ -219,7 +235,7 @@ def geometric_mean_given_words(target_word, context_words, co_occurrence_matrix,
     return geometric_mean
-def find_similar_tags(test_tags, similarity_weight):
     #Initialize stuff
     if not hasattr(find_similar_tags, "fasttext_small_model"):
@@ -261,6 +277,10 @@ def find_similar_tags(test_tags, similarity_weight):
                                 result.append((similar_tag.replace('_', ' '), round(similarity, 3)))
                                 seen.add(similar_tag)
         #Adjust score based on context
         for i in range(len(result)):
             word, score = result[i]  # Unpack the tuple
@@ -284,7 +304,7 @@ def find_similar_tags(test_tags, similarity_weight):
     return results_data  # Return list of lists for Dataframe
-def find_similar_artists(new_tags_string, top_n, similarity_weight):
     try:
         new_tags_string = new_tags_string.lower()
         new_tags_string, removed_tags = remove_special_tags(new_tags_string)
@@ -296,7 +316,7 @@ def find_similar_artists(new_tags_string, top_n, similarity_weight):
         new_image_tags = [tag.replace('_', ' ').replace('\\(', '(').replace('\\)', ')').strip() for tag in new_image_tags]
         ###unseen_tags = list(set(OrderedDict.fromkeys(new_image_tags)) - set(vectorizer.vocabulary_.keys()))   #We may want this line again later.  These are the tags that were not used to calculate the artists list.
-        unseen_tags_data = find_similar_tags(new_image_tags, similarity_weight)
         X_new_image = vectorizer.transform([','.join(new_image_tags + removed_tags)])
         similarities = cosine_similarity(X_new_image, X_artist)[0]
@@ -317,7 +337,8 @@ iface = gr.Interface(
     inputs=[
         gr.Textbox(label="Enter image tags", placeholder="e.g. fox, outside, detailed background, ..."),
         gr.Slider(minimum=1, maximum=100, value=10, step=1, label="Number of artists"),
-        gr.Slider(minimum=0, maximum=1, value=0.5, step=0.1, label="Similarity weight")
     ],
     outputs=[
         gr.Dataframe(label="Unseen Tags", headers=["Tag", "Similar Tags", "Similarity"]),

 ## What is the purpose of this tool?
+When you enter a txt2img prompt and press the "submit" button, the Tagset Completer parses your prompt and checks that all your tags are valid e621 tags.
 If it finds any that are not, it recommends some valid e621 tags you can use to replace them in the "Unseen Tags" table.
 Additionally, in the "Top Artists" text box, it lists the artists who would most likely draw an image having the set of tags you provided,
+in case you want to look them up to get more ideas.  This is useful to align your prompt with the expected input to an e621-trained model.
 ## Does input order matter?
 than the query "red fox, score:7".
 ## Why is this space tagged "not-for-all-audience"
+The "not-for-all-audience" tag informs users that this tool's text output is derived from e621.net data for tag prediction and completion.
+The app will try not to display nsfw tags unless the "Allow NSFW Tags" is checked, but the filter is not perfect.
 ## How is the artist list calculated?
 """
+nsfw_threshold = 0.95  # Assuming the threshold value is defined here
 grammar=r"""
 !start: (prompt | /[][():]/+)*
 prompt: (emphasized | plain | comma | WHITESPACE)*
     conditional_smoothing = 100. / conditional_doc_count
+nsfw_tags = set()  # Initialize an empty set to store words meeting the threshold
+# Open and read the CSV file
+with open("word_rating_probabilities.csv", 'r', newline='', encoding='utf-8') as csvfile:
+    reader = csv.reader(csvfile)
+    next(reader, None)  # Skip the header row
+    for row in reader:
+        word = row[0]  # The word is in the first column
+        probability_sum = float(row[1])  # The sum of probabilities is in the second column, convert to float for comparison
+        # Check if the probability sum meets the threshold and add the word to the set if it does
+        if probability_sum >= nsfw_threshold:
+            nsfw_tags.add(word)
 def clean_tag(tag):
     return ''.join(char for char in tag if ord(char) < 128)
     return geometric_mean
+def find_similar_tags(test_tags, similarity_weight, allow_nsfw_tags):
     #Initialize stuff
     if not hasattr(find_similar_tags, "fasttext_small_model"):
                                 result.append((similar_tag.replace('_', ' '), round(similarity, 3)))
                                 seen.add(similar_tag)
+        #Remove NSFW tags if appropriate.
+        if not allow_nsfw_tags:
+            result = [(word, score) for word, score in result if word.replace(' ','_') not in nsfw_tags]
         #Adjust score based on context
         for i in range(len(result)):
             word, score = result[i]  # Unpack the tuple
     return results_data  # Return list of lists for Dataframe
+def find_similar_artists(new_tags_string, top_n, similarity_weight, allow_nsfw_tags):
     try:
         new_tags_string = new_tags_string.lower()
         new_tags_string, removed_tags = remove_special_tags(new_tags_string)
         new_image_tags = [tag.replace('_', ' ').replace('\\(', '(').replace('\\)', ')').strip() for tag in new_image_tags]
         ###unseen_tags = list(set(OrderedDict.fromkeys(new_image_tags)) - set(vectorizer.vocabulary_.keys()))   #We may want this line again later.  These are the tags that were not used to calculate the artists list.
+        unseen_tags_data = find_similar_tags(new_image_tags, similarity_weight, allow_nsfw_tags)
         X_new_image = vectorizer.transform([','.join(new_image_tags + removed_tags)])
         similarities = cosine_similarity(X_new_image, X_artist)[0]
     inputs=[
         gr.Textbox(label="Enter image tags", placeholder="e.g. fox, outside, detailed background, ..."),
         gr.Slider(minimum=1, maximum=100, value=10, step=1, label="Number of artists"),
+        gr.Slider(minimum=0, maximum=1, value=0.5, step=0.1, label="Similarity weight"),
+        gr.Checkbox(label="Allow NSFW Tags", value=False)
     ],
     outputs=[
         gr.Dataframe(label="Unseen Tags", headers=["Tag", "Similar Tags", "Similarity"]),

word_rating_probabilities.csv ADDED Viewed

The diff for this file is too large to render. See raw diff