Spaces:

FoodDesert
/

Prompt_Squirrel

Running

App Files Files Community

FoodDesert commited on Apr 7, 2024

Commit

f1da0db

verified ·

1 Parent(s): 1c36512

Upload 3 files

Browse files

adding comma checking

Files changed (2) hide show

SquirrelIcon.png +0 -0
app.py +68 -16

SquirrelIcon.png ADDED Viewed

app.py CHANGED Viewed

@@ -10,8 +10,7 @@ import re
 import random
 import compress_fasttext
 from collections import OrderedDict
-from lark import Lark
-from lark import Token
 from lark.exceptions import ParseError
 import json
 import zipfile
@@ -115,6 +114,19 @@ See SamplePrompts.csv for the list of prompts used and their descriptions.
 nsfw_threshold = 0.95  # Assuming the threshold value is defined here
 grammar=r"""
 !start: (prompt | /[][():]/+)*
 prompt: (emphasized | plain | comma | WHITESPACE)*
@@ -125,6 +137,7 @@ WHITESPACE: /\s+/
 plain: /([^,\\\[\]():|]|\\.)+/
 %import common.SIGNED_NUMBER -> NUMBER
 """
 # Initialize the parser
 parser = Lark(grammar, start='start')
@@ -134,15 +147,14 @@ def extract_tags(tree):
     def _traverse(node):
         if isinstance(node, Token) and node.type == '__ANON_1':
             tag_position = node.start_pos
-            #tag_text = node.value.strip()
             tag_text = node.value
-            tags_with_positions.append((tag_text, tag_position))
         elif not isinstance(node, Token):
             for child in node.children:
                 _traverse(child)
     _traverse(tree)
     return tags_with_positions
 special_tags = ["score:0", "score:1", "score:2", "score:3", "score:4", "score:5", "score:6", "score:7", "score:8", "score:9"]
 def remove_special_tags(original_string):
@@ -384,11 +396,14 @@ def create_top_artists_table(top_artists):
     return html_str
-def create_html_placeholder(title="", placeholder_height=400, placeholder_width="100%"):
     # Include a title in the same style as the top artists table heading
     html_placeholder = f"<div style='text-align: center;'><h1>{title}</h1></div>"
     # Add the placeholder div with specified height and width
-    html_placeholder += f"<div style='height: {placeholder_height}px; width: {placeholder_width}; margin: 20px; background: transparent;'></div>"
     return html_placeholder
@@ -420,6 +435,7 @@ def find_similar_tags(test_tags, similarity_weight, allow_nsfw_tags):
         modified_tag = tag_info['modified_tag']
         start_pos = tag_info['start_pos']
         end_pos = tag_info['end_pos']
         #print(original_tag, modified_tag, start_pos, end_pos)
@@ -432,6 +448,9 @@ def find_similar_tags(test_tags, similarity_weight, allow_nsfw_tags):
             continue
         encountered_modified_tags.add(modified_tag)
         modified_tag_for_search = modified_tag.replace(' ','_')
         similar_words = find_similar_tags.fasttext_small_model.most_similar(modified_tag_for_search, topn = 100)
@@ -471,12 +490,12 @@ def find_similar_tags(test_tags, similarity_weight, allow_nsfw_tags):
         result = sorted(result, key=lambda x: x[1], reverse=True)[:10]
         html_content += create_html_tables_for_tags(modified_tag, result, find_similar_tags.tag2count, find_similar_tags.tag2idwiki)
-        bad_entities.append({"entity":"Unknown", "start":start_pos, "end":end_pos})
         tags_added=True
     # If no tags were processed, add a message
     if not tags_added:
-        html_content = create_html_placeholder(title="Unknown Tags")
     return html_content, bad_entities  # Return list of lists for Dataframe
@@ -484,7 +503,7 @@ def find_similar_tags(test_tags, similarity_weight, allow_nsfw_tags):
 def build_tag_offsets_dicts(new_image_tags_with_positions):
     # Structure the data for HighlightedText
     tag_data = []
-    for tag_text, start_pos in new_image_tags_with_positions:
         # Modify the tag
         modified_tag = tag_text.replace('_', ' ').replace('\\(', '(').replace('\\)', ')').strip()
         artist_matrix_tag = tag_text.replace('_', ' ').replace('\\(', '\(').replace('\\)', '\)').strip()
@@ -496,10 +515,37 @@ def build_tag_offsets_dicts(new_image_tags_with_positions):
             "start_pos": start_pos,
             "end_pos": end_pos,
             "modified_tag": modified_tag,
-            "artist_matrix_tag": artist_matrix_tag
         })
     return tag_data
 def find_similar_artists(original_tags_string, top_n, similarity_weight, allow_nsfw_tags):
     try:
@@ -508,26 +554,29 @@ def find_similar_artists(original_tags_string, top_n, similarity_weight, allow_n
         # Parse the prompt
         parsed = parser.parse(new_tags_string)
         # Extract tags from the parsed tree
         new_image_tags = extract_tags(parsed)
         tag_data = build_tag_offsets_dicts(new_image_tags)
         ###unseen_tags = list(set(OrderedDict.fromkeys(new_image_tags)) - set(vectorizer.vocabulary_.keys()))   #We may want this line again later.  These are the tags that were not used to calculate the artists list.
         unseen_tags_data, bad_entities = find_similar_tags(tag_data, similarity_weight, allow_nsfw_tags)
         bad_tags_illustrated_string = {"text":new_tags_string, "entities":bad_entities}
-        #bad_tags_illustrated_string = {"text":original_tags_string, "entities":bad_entities}
         #modified_tags = [tag_info['modified_tag'] for tag_info in tag_data]
         #X_new_image = vectorizer.transform([','.join(modified_tags + removed_tags)])
-        artist_matrix_tags = [tag_info['artist_matrix_tag'] for tag_info in tag_data]
         X_new_image = vectorizer.transform([','.join(artist_matrix_tags + removed_tags)])
         similarities = cosine_similarity(X_new_image, X_artist)[0]
         top_artist_indices = np.argsort(similarities)[-(top_n + 1):][::-1]
         top_artists = [(artist_names[i], similarities[i]) for i in top_artist_indices if artist_names[i].lower() != "by conditional dnp"][:top_n]
-        #top_artists_str = "\n".join([f"{rank+1}. {artist[3:]} ({score:.4f})" for rank, (artist, score) in enumerate(top_artists)])
         top_artists_str = create_top_artists_table(top_artists)
         dynamic_prompts_formatted_artists = "{" + "|".join([artist for artist, _ in top_artists]) + "}"
@@ -538,7 +587,7 @@ def find_similar_artists(original_tags_string, top_n, similarity_weight, allow_n
                 image_galleries.append(baseline)  # Add baseline as its own gallery item
                 image_galleries.append(artists)  # Extend the list with artist tuples
-        return (unseen_tags_data, bad_tags_illustrated_string, top_artists_str, dynamic_prompts_formatted_artists, *image_galleries) #image_galleries[0], image_galleries[1] DOES work.  Find a generic alternative.
     except ParseError as e:
         return [], "Parse Error: Check for mismatched parentheses or something", "", None, None
@@ -548,8 +597,11 @@ with gr.Blocks() as app:
         with gr.Row():
             with gr.Column(scale=3):
                 image_tags = gr.Textbox(label="Enter Prompt", placeholder="e.g. fox, outside, detailed background, ...")
-                bad_tags_illustrated_string = gr.HighlightedText(show_legend=True,label="Annotated Prompt")
             with gr.Column(scale=1):
                 gr.HTML("<br>" * 2)  # Adjust the number of line breaks ("<br>") as needed to push the button down
                 submit_button = gr.Button("Submit")
     with gr.Row():

 import random
 import compress_fasttext
 from collections import OrderedDict
+from lark import Lark, Tree, Token
 from lark.exceptions import ParseError
 import json
 import zipfile
 nsfw_threshold = 0.95  # Assuming the threshold value is defined here
+#grammar=r"""
+#!start: (prompt | /[][():]/+)*
+#prompt: (emphasized | plain | commas | WHITESPACE)*
+#!emphasized: "(" prompt ")"
+#        | "(" prompt ":" [WHITESPACE] NUMBER [WHITESPACE] ")"
+#!comma: ","
+#commas: double_comma | comma
+#double_comma: comma WHITESPACE* comma
+#WHITESPACE: /\s+/
+#plain: /([^,\\\[\]():|]|\\.)+/
+#%import common.SIGNED_NUMBER -> NUMBER
+#"""
 grammar=r"""
 !start: (prompt | /[][():]/+)*
 prompt: (emphasized | plain | comma | WHITESPACE)*
 plain: /([^,\\\[\]():|]|\\.)+/
 %import common.SIGNED_NUMBER -> NUMBER
 """
 # Initialize the parser
 parser = Lark(grammar, start='start')
     def _traverse(node):
         if isinstance(node, Token) and node.type == '__ANON_1':
             tag_position = node.start_pos
             tag_text = node.value
+            tags_with_positions.append((tag_text, tag_position, "tag"))
         elif not isinstance(node, Token):
             for child in node.children:
                 _traverse(child)
     _traverse(tree)
     return tags_with_positions
 special_tags = ["score:0", "score:1", "score:2", "score:3", "score:4", "score:5", "score:6", "score:7", "score:8", "score:9"]
 def remove_special_tags(original_string):
     return html_str
+def create_html_placeholder(title="", content="", placeholder_height=400, placeholder_width="100%"):
     # Include a title in the same style as the top artists table heading
     html_placeholder = f"<div style='text-align: center;'><h1>{title}</h1></div>"
+    # Conditionally add content if present
+    if content:
+        html_placeholder += f"<div style='text-align: center; margin-bottom: 20px;'><p>{content}</p></div>"
     # Add the placeholder div with specified height and width
+    html_placeholder += f"<div style='height: {placeholder_height}px; width: {placeholder_width}; margin: 20px auto; background: transparent;'></div>"
     return html_placeholder
         modified_tag = tag_info['modified_tag']
         start_pos = tag_info['start_pos']
         end_pos = tag_info['end_pos']
+        node_type = tag_info['node_type']
         #print(original_tag, modified_tag, start_pos, end_pos)
             continue
         encountered_modified_tags.add(modified_tag)
+        if node_type == "double_comma":
+            bad_entities.append({"entity":"Double Comma", "start":start_pos, "end":end_pos})
+            continue
         modified_tag_for_search = modified_tag.replace(' ','_')
         similar_words = find_similar_tags.fasttext_small_model.most_similar(modified_tag_for_search, topn = 100)
         result = sorted(result, key=lambda x: x[1], reverse=True)[:10]
         html_content += create_html_tables_for_tags(modified_tag, result, find_similar_tags.tag2count, find_similar_tags.tag2idwiki)
+        bad_entities.append({"entity":"Unknown Tag", "start":start_pos, "end":end_pos})
         tags_added=True
     # If no tags were processed, add a message
     if not tags_added:
+        html_content = create_html_placeholder(title="Unknown Tags", content="No Unknown Tags Found")
     return html_content, bad_entities  # Return list of lists for Dataframe
 def build_tag_offsets_dicts(new_image_tags_with_positions):
     # Structure the data for HighlightedText
     tag_data = []
+    for tag_text, start_pos, nodetype in new_image_tags_with_positions:
         # Modify the tag
         modified_tag = tag_text.replace('_', ' ').replace('\\(', '(').replace('\\)', ')').strip()
         artist_matrix_tag = tag_text.replace('_', ' ').replace('\\(', '\(').replace('\\)', '\)').strip()
             "start_pos": start_pos,
             "end_pos": end_pos,
             "modified_tag": modified_tag,
+            "artist_matrix_tag": artist_matrix_tag,
+            "node_type": nodetype
         })
     return tag_data
+def augment_bad_entities_with_regex(text):
+    bad_entities = []
+    #comma at end
+    match = re.search(r',(?=\s*$)', text)
+    if match:
+        index = match.start()
+        bad_entities.append({"entity":"Remove Final Comma", "start":index, "end":index+1})
+    match = re.search(r'\([^()]*(,)\s*\)\s*$', text)
+    if match:
+        index = match.start(1)
+        bad_entities.append({"entity":"Remove Final Comma", "start":index, "end":index+1})
+    match = re.search(r'\([^()]*(,)\s*:\s*\d+(\.\d+)?\s*\)\s*$', text)
+    if match:
+        index = match.start(1)
+        bad_entities.append({"entity":"Remove Final Comma", "start":index, "end":index+1})
+    #comma after parentheses
+    match = re.search(r'\)\s*(,)\s*[^\s]',text)
+    if match:
+        index = match.start(1)
+        bad_entities.append({"entity":"Move Comma Inside Parentheses", "start":index, "end":index+1})
+    return bad_entities
 def find_similar_artists(original_tags_string, top_n, similarity_weight, allow_nsfw_tags):
     try:
         # Parse the prompt
         parsed = parser.parse(new_tags_string)
         # Extract tags from the parsed tree
         new_image_tags = extract_tags(parsed)
         tag_data = build_tag_offsets_dicts(new_image_tags)
         ###unseen_tags = list(set(OrderedDict.fromkeys(new_image_tags)) - set(vectorizer.vocabulary_.keys()))   #We may want this line again later.  These are the tags that were not used to calculate the artists list.
         unseen_tags_data, bad_entities = find_similar_tags(tag_data, similarity_weight, allow_nsfw_tags)
+        bad_entities.extend(augment_bad_entities_with_regex(new_tags_string))
+        bad_entities.sort(key=lambda x: x['start'])
         bad_tags_illustrated_string = {"text":new_tags_string, "entities":bad_entities}
         #modified_tags = [tag_info['modified_tag'] for tag_info in tag_data]
         #X_new_image = vectorizer.transform([','.join(modified_tags + removed_tags)])
+        #artist_matrix_tags = [tag_info['artist_matrix_tag'] for tag_info in tag_data]
+        artist_matrix_tags = [tag_info['artist_matrix_tag'] for tag_info in tag_data if tag_info['node_type'] == "tag"]
         X_new_image = vectorizer.transform([','.join(artist_matrix_tags + removed_tags)])
         similarities = cosine_similarity(X_new_image, X_artist)[0]
         top_artist_indices = np.argsort(similarities)[-(top_n + 1):][::-1]
         top_artists = [(artist_names[i], similarities[i]) for i in top_artist_indices if artist_names[i].lower() != "by conditional dnp"][:top_n]
         top_artists_str = create_top_artists_table(top_artists)
         dynamic_prompts_formatted_artists = "{" + "|".join([artist for artist, _ in top_artists]) + "}"
                 image_galleries.append(baseline)  # Add baseline as its own gallery item
                 image_galleries.append(artists)  # Extend the list with artist tuples
+        return (unseen_tags_data, bad_tags_illustrated_string, top_artists_str, dynamic_prompts_formatted_artists, *image_galleries)
     except ParseError as e:
         return [], "Parse Error: Check for mismatched parentheses or something", "", None, None
         with gr.Row():
             with gr.Column(scale=3):
                 image_tags = gr.Textbox(label="Enter Prompt", placeholder="e.g. fox, outside, detailed background, ...")
+                bad_tags_illustrated_string = gr.HighlightedText(show_legend=True, color_map={"Unknown Tag":"red","Duplicate":"yellow","Remove Final Comma":"purple","Move Comma Inside Parentheses":"green"}, label="Annotated Prompt")
             with gr.Column(scale=1):
+                #gr.Image(label=" ", value="SquirrelIcon.png", height=155, width=140)
+                #image_path = os.path.join(os.getcwd(), "SquirrelIcon.png")
+                #gr.HTML('<div style="text-align: center;"><img src="{image_path}" alt="Cute Mascot" style="max-height: 100px; background: transparent;"></div><br>')
                 gr.HTML("<br>" * 2)  # Adjust the number of line breaks ("<br>") as needed to push the button down
                 submit_button = gr.Button("Submit")
     with gr.Row():