|
import seaborn as sns |
|
from PIL import Image, ImageDraw, ImageFont |
|
import matplotlib.font_manager |
|
import spacy |
|
import re |
|
import base64 |
|
import time |
|
import re |
|
from PIL import Image |
|
import base64 |
|
import hashlib |
|
import os |
|
|
|
nlp = spacy.load("en_core_web_sm-3.6.0") |
|
|
|
def process_image_without_resize(image_prompt): |
|
image = Image.open(image_prompt) |
|
print(f"height:{image.height}, width:{image.width}") |
|
timestamp = time.time() |
|
file_ext = os.path.splitext(image_prompt)[1] |
|
filename = f"examples/{timestamp}{file_ext}" |
|
filename_grounding = f"examples/{timestamp}_grounding{file_ext}" |
|
image.save(filename) |
|
print(f"temporal filename {filename}") |
|
with open(filename, "rb") as image_file: |
|
bytes = base64.b64encode(image_file.read()) |
|
encoded_img = str(bytes, encoding='utf-8') |
|
image_hash = hashlib.sha256(bytes).hexdigest() |
|
os.remove(filename) |
|
return image, encoded_img, image_hash, filename_grounding |
|
|
|
|
|
def is_chinese(text): |
|
zh_pattern = re.compile(u'[\u4e00-\u9fa5]+') |
|
return zh_pattern.search(text) |
|
|
|
|
|
def draw_boxes(image, boxes, texts, output_fn='output.png'): |
|
box_width = 5 |
|
color_palette = sns.color_palette("husl", len(boxes)) |
|
colors = [(int(r*255), int(g*255), int(b*255)) for r, g, b in color_palette] |
|
|
|
width, height = image.size |
|
absolute_boxes = [[(int(box[0] * width), int(box[1] * height), int(box[2] * width), int(box[3] * height)) for box in b] for b in boxes] |
|
|
|
overlay = Image.new('RGBA', image.size, (255, 255, 255, 0)) |
|
draw = ImageDraw.Draw(overlay) |
|
font_path = sorted(matplotlib.font_manager.findSystemFonts(fontpaths=None, fontext='ttf'))[0] |
|
font = ImageFont.truetype(font_path, size=26) |
|
|
|
for box, text, color in zip(absolute_boxes, texts, colors): |
|
for b in box: |
|
draw.rectangle(b, outline=color, width=box_width) |
|
if not text: |
|
continue |
|
splited_text = text.split('\n') |
|
num_lines = len(splited_text) |
|
text_width, text_height = font.getbbox(splited_text[0])[-2:] |
|
y_start = b[3] - text_height * num_lines - box_width |
|
if b[2] - b[0] < 100 or b[3] - b[1] < 100: |
|
y_start = b[3] |
|
for i, line in enumerate(splited_text): |
|
text_width, text_height = font.getbbox(line)[-2:] |
|
x = b[0] + box_width |
|
y = y_start + text_height * i |
|
draw.rectangle([x, y, x+text_width, y+text_height], fill=(128, 128, 128, 160)) |
|
draw.text((x, y), line, font=font, fill=(255, 255, 255)) |
|
img_with_overlay = Image.alpha_composite(image.convert('RGBA'), overlay).convert('RGB') |
|
img_with_overlay.save(output_fn) |
|
|
|
def boxstr_to_boxes(box_str): |
|
boxes = [[int(y)/1000 for y in x.split(',')] for x in box_str.split(';') if x.replace(',', '').isdigit()] |
|
return boxes |
|
|
|
def text_to_dict(text): |
|
doc = nlp(text) |
|
|
|
box_matches = list(re.finditer(r'\[\[([^\]]+)\]\]', text)) |
|
box_positions = [match.start() for match in box_matches] |
|
|
|
noun_phrases = [] |
|
boxes = [] |
|
|
|
for match, box_position in zip(box_matches, box_positions): |
|
nearest_np_start = max([0] + [chunk.start_char for chunk in doc.noun_chunks if chunk.end_char <= box_position]) |
|
noun_phrase = text[nearest_np_start:box_position].strip() |
|
if noun_phrase and noun_phrase[-1] == '?': |
|
noun_phrase = text[:box_position].strip() |
|
box_string = match.group(1) |
|
|
|
noun_phrases.append(noun_phrase) |
|
boxes.append(boxstr_to_boxes(box_string)) |
|
|
|
pairs = [] |
|
for noun_phrase, box_string in zip(noun_phrases, boxes): |
|
pairs.append((noun_phrase.lower(), box_string)) |
|
return dict(pairs) |
|
|
|
def parse_response(img, response, output_fn='output.png'): |
|
img = img.convert('RGB') |
|
width, height = img.size |
|
ratio = min(1920 / width, 1080 / height) |
|
new_width = int(width * ratio) |
|
new_height = int(height * ratio) |
|
new_img = img.resize((new_width, new_height), Image.LANCZOS) |
|
pattern = r"\[\[(.*?)\]\]" |
|
positions = re.findall(pattern, response) |
|
boxes = [[[int(y) for y in x.split(',')] for x in pos.split(';') if x.replace(',', '').isdigit()] for pos in positions] |
|
dic = text_to_dict(response) |
|
if not dic: |
|
texts = [] |
|
boxes = [] |
|
else: |
|
texts, boxes = zip(*dic.items()) |
|
draw_boxes(new_img, boxes, texts, output_fn=output_fn) |
|
|
|
def postprocess_text(template, text): |
|
quoted_text = f'"{text.strip()}"' |
|
return template.replace("<TASK>", quoted_text).strip() if template != "" else text.strip() |
|
|
|
|
|
|
|
templates_agent_cogagent = [ |
|
"do not use template", |
|
"Can you advise me on how to <TASK>?", |
|
"I'm looking for guidance on how to <TASK>.", |
|
"What steps do I need to take to <TASK>?", |
|
"Could you provide instructions for <TASK>?", |
|
"I'm wondering what the process is for <TASK>.", |
|
"How can I go about <TASK>?", |
|
"I need assistance with planning to <TASK>.", |
|
"Do you have any recommendations for <TASK>?", |
|
"Please share some tips for <TASK>.", |
|
"I'd like to know the best way to <TASK>.", |
|
"What's the most effective way to <TASK>?", |
|
"I'm seeking advice on accomplishing <TASK>.", |
|
"Could you guide me through the steps to <TASK>?", |
|
"I'm unsure how to start with <TASK>.", |
|
"Is there a strategy for successfully <TASK>?", |
|
"What's the proper procedure for <TASK>?", |
|
"How should I prepare for <TASK>?", |
|
"I'm not sure where to begin with <TASK>.", |
|
"I need some insights on <TASK>.", |
|
"Can you explain how to tackle <TASK>?", |
|
"I'm interested in the process of <TASK>.", |
|
"Could you enlighten me on <TASK>?", |
|
"What are the recommended steps for <TASK>?", |
|
"Is there a preferred method for <TASK>?", |
|
"I'd appreciate your advice on <TASK>.", |
|
"Can you shed light on <TASK>?", |
|
"What would be the best approach to <TASK>?", |
|
"How do I get started with <TASK>?", |
|
"I'm inquiring about the procedure for <TASK>.", |
|
"Could you share your expertise on <TASK>?", |
|
"I'd like some guidance on <TASK>.", |
|
"What's your recommendation for <TASK>?", |
|
"I'm seeking your input on how to <TASK>.", |
|
"Can you provide some insights into <TASK>?", |
|
"How can I successfully accomplish <TASK>?", |
|
"What steps are involved in <TASK>?", |
|
"I'm curious about the best way to <TASK>.", |
|
"Could you show me the ropes for <TASK>?", |
|
"I need to know how to go about <TASK>.", |
|
"What are the essential steps for <TASK>?", |
|
"Is there a specific method for <TASK>?", |
|
"I'd like to get some advice on <TASK>.", |
|
"Can you explain the process of <TASK>?", |
|
"I'm looking for guidance on how to approach <TASK>.", |
|
"What's the proper way to handle <TASK>?", |
|
"How should I proceed with <TASK>?", |
|
"I'm interested in your expertise on <TASK>.", |
|
"Could you walk me through the steps for <TASK>?", |
|
"I'm not sure where to begin when it comes to <TASK>.", |
|
"What should I prioritize when doing <TASK>?", |
|
"How can I ensure success with <TASK>?", |
|
"I'd appreciate some tips on <TASK>.", |
|
"Can you provide a roadmap for <TASK>?", |
|
"What's the recommended course of action for <TASK>?", |
|
"I'm seeking your guidance on <TASK>.", |
|
"Could you offer some suggestions for <TASK>?", |
|
"I'd like to know the steps to take for <TASK>.", |
|
"What's the most effective way to achieve <TASK>?", |
|
"How can I make the most of <TASK>?", |
|
"I'm wondering about the best approach to <TASK>.", |
|
"Can you share your insights on <TASK>?", |
|
"What steps should I follow to complete <TASK>?", |
|
"I'm looking for advice on <TASK>.", |
|
"What's the strategy for successfully completing <TASK>?", |
|
"How should I prepare myself for <TASK>?", |
|
"I'm not sure where to start with <TASK>.", |
|
"What's the procedure for <TASK>?", |
|
"Could you provide some guidance on <TASK>?", |
|
"I'd like to get some tips on how to <TASK>.", |
|
"Can you explain how to tackle <TASK> step by step?", |
|
"I'm interested in understanding the process of <TASK>.", |
|
"What are the key steps to <TASK>?", |
|
"Is there a specific method that works for <TASK>?", |
|
"I'd appreciate your advice on successfully completing <TASK>.", |
|
"Can you shed light on the best way to <TASK>?", |
|
"What would you recommend as the first step to <TASK>?", |
|
"How do I initiate <TASK>?", |
|
"I'm inquiring about the recommended steps for <TASK>.", |
|
"Could you share some insights into <TASK>?", |
|
"I'm seeking your expertise on <TASK>.", |
|
"What's your recommended approach for <TASK>?", |
|
"I'd like some guidance on where to start with <TASK>.", |
|
"Can you provide recommendations for <TASK>?", |
|
"What's your advice for someone looking to <TASK>?", |
|
"I'm seeking your input on the process of <TASK>.", |
|
"How can I achieve success with <TASK>?", |
|
"What's the best way to navigate <TASK>?", |
|
"I'm curious about the steps required for <TASK>.", |
|
"Could you show me the proper way to <TASK>?", |
|
"I need to know the necessary steps for <TASK>.", |
|
"What's the most efficient method for <TASK>?", |
|
"I'd appreciate your guidance on <TASK>.", |
|
"Can you explain the steps involved in <TASK>?", |
|
"I'm looking for recommendations on how to approach <TASK>.", |
|
"What's the right way to handle <TASK>?", |
|
"How should I manage <TASK>?", |
|
"I'm interested in your insights on <TASK>.", |
|
"Could you provide a step-by-step guide for <TASK>?", |
|
"I'm not sure how to start when it comes to <TASK>.", |
|
"What are the key factors to consider for <TASK>?", |
|
"How can I ensure a successful outcome with <TASK>?", |
|
"I'd like some tips and tricks for <TASK>.", |
|
"Can you offer a roadmap for accomplishing <TASK>?", |
|
"What's the preferred course of action for <TASK>?", |
|
"I'm seeking your expert advice on <TASK>.", |
|
"Could you suggest some best practices for <TASK>?", |
|
"I'd like to understand the necessary steps to complete <TASK>.", |
|
"What's the most effective strategy for <TASK>?", |
|
] |
|
|
|
template_grounding_cogvlm = [ |
|
"Where is <TASK>?", |
|
"Where is <TASK> in the image?", |
|
"Where is <TASK>? answer in [[x0,y0,x1,y1]] format.", |
|
"Can you point out <TASK> in the image and provide the bounding boxes of its location?", |
|
"Help me to locate <TASK> in and give me its bounding boxes, please.", |
|
"In the given, could you find and tell me the bounding boxes of <TASK>?", |
|
"Guide me to the location of <TASK> within the image by providing its bounding boxes.", |
|
"I'd like to know the exact bounding boxes of <TASK> in the photo.", |
|
"Would you kindly provide the bounding boxes of <TASK> located in the picture?", |
|
"Can you find <TASK> in and give me the bounding boxes of where it is located?", |
|
"I'm trying to locate <TASK> in. Can you determine its bounding boxes for me?", |
|
"What are the bounding boxes of <TASK> in the image?", |
|
"Can you disclose the position of <TASK> in the photograph by stating its bounding boxes?", |
|
"In, could you let me know the location of <TASK> in the form of bounding boxes?", |
|
"I need the bounding boxes of <TASK> in, can you please assist me with that?", |
|
"Where in is <TASK> located? Provide me with its bounding boxes, please.", |
|
"May I have the bounding boxes of <TASK>?", |
|
"In the photograph, could you pinpoint the location of <TASK> and tell me its bounding boxes?", |
|
"Can you please search and find <TASK> in, then let me know its bounding boxes?", |
|
"Please, point out the position of <TASK> in the image by giving its bounding boxes.", |
|
"What are the exact bounding boxes of <TASK> in the provided picture?", |
|
"Detect the location of <TASK> in and share the bounding boxes with me, please.", |
|
"In the picture, I'd like you to locate <TASK> and provide its coordinates.", |
|
"Please indicate the location of <TASK> in the photo by giving bounding boxes.", |
|
"Find <TASK> in and share its coordinates with me.", |
|
"Could you please help me find the bounding boxes of <TASK> in the image?", |
|
"I am looking for the position of <TASK> in. Can you provide its bounding boxes?", |
|
"In the image, can you locate <TASK> and let me know its coordinates?", |
|
"I'd appreciate if you could find and tell me the bounding boxes of <TASK>.", |
|
"In, I need the bounding box bounding boxes of <TASK>.", |
|
"Point me to the location of <TASK> in the picture by providing its bounding boxes.", |
|
"Could you trace <TASK> in and tell me its bounding boxes?", |
|
"Can you assist me in locating <TASK> in, and then provide its bounding boxes?", |
|
"I'm curious, what are the bounding boxes of <TASK> in the photo?", |
|
"Kindly share the bounding boxes of <TASK> located in the image.", |
|
"I would like to find <TASK> in. Can you give me its bounding boxes?", |
|
"Can you spot <TASK> in and disclose its bounding boxes to me?", |
|
"Please, reveal the location of <TASK> in the provided photograph as coordinates.", |
|
"Help me locate and determine the bounding boxes of <TASK>.", |
|
"I request the bounding boxes of <TASK> in the image.", |
|
"In the given, can you find <TASK> and tell me its bounding boxes?", |
|
"I need to know the position of <TASK> in as bounding boxes.", |
|
"Locate <TASK> in and provide its bounding boxes, please.", |
|
"Assist me in finding <TASK> in the photo and provide the bounding box bounding boxes.", |
|
"In, can you guide me to the location of <TASK> by providing bounding boxes?", |
|
"I'd like the bounding boxes of <TASK> as it appears in the image.", |
|
"What location does <TASK> hold in the picture? Inform me of its bounding boxes.", |
|
"Identify the position of <TASK> in and share its bounding boxes.", |
|
"I'd like to request the bounding boxes of <TASK> within the photo.", |
|
"How can I locate <TASK> in the image? Please provide the bounding boxes.", |
|
"I am interested in knowing the bounding boxes of <TASK> in the picture.", |
|
"Assist me in locating the position of <TASK> in the photograph and its bounding box bounding boxes.", |
|
"In the image, I need to find <TASK> and know its bounding boxes. Can you please help?" |
|
"Can you give me a description of the region <TASK> in image?", |
|
"In the provided image, would you mind describing the selected area <TASK>?", |
|
"I need details about the area <TASK> located within image.", |
|
"Could you please share some information on the region <TASK> in this photograph?", |
|
"Describe what's happening within the coordinates <TASK> of the given image.", |
|
"What can you tell me about the selected region <TASK> in the photo?", |
|
"Please, can you help me understand what's inside the region <TASK> in image?", |
|
"Give me a comprehensive description of the specified area <TASK> in the picture.", |
|
"I'm curious about the area <TASK> in the following image. Can you describe it?", |
|
"Please elaborate on the area with the coordinates <TASK> in the visual.", |
|
"In the displayed image, help me understand the region defined by <TASK>.", |
|
"Regarding the image, what's going on in the section <TASK>?", |
|
"In the given photograph, can you explain the area with coordinates <TASK>?", |
|
"Kindly describe what I should be seeing in the area <TASK> of image.", |
|
"Within the input image, what can be found in the region defined by <TASK>?", |
|
"Tell me what you see within the designated area <TASK> in the picture.", |
|
"Please detail the contents of the chosen region <TASK> in the visual input.", |
|
"What's inside the area <TASK> of the provided graphic?", |
|
"I'd like some information about the specific region <TASK> in the image.", |
|
"Help me understand the details within the area <TASK> in photograph.", |
|
"Can you break down the region <TASK> in the image for me?", |
|
"What is taking place within the specified area <TASK> in this capture?", |
|
"Care to elaborate on the targeted area <TASK> in the visual illustration?", |
|
"What insights can you provide about the area <TASK> in the selected picture?", |
|
"What does the area <TASK> within the given visual contain?", |
|
"Analyze and describe the region <TASK> in the included photo.", |
|
"Please provide details for the area marked as <TASK> in this photographic.", |
|
"For the image, can you assess and describe what's happening at <TASK>?", |
|
"Fill me in about the selected portion <TASK> within the presented image.", |
|
"In the image, elaborate on the details found within the section <TASK>.", |
|
"Please interpret and describe the area <TASK> inside the given picture.", |
|
"What information can you give me about the coordinates <TASK> in image?", |
|
"Regarding the coordinates <TASK> in image, can you provide a description?", |
|
"In the photo, can you delve into the details of the region <TASK>?", |
|
"Please provide insights on the specified area <TASK> within the graphic.", |
|
"Detail the chosen region <TASK> in the depicted scene.", |
|
"Can you discuss the entities within the region <TASK> of image?", |
|
"I'd appreciate a breakdown of the area <TASK> in the displayed image.", |
|
"What's the story in the section <TASK> of the included visual?", |
|
"Please enlighten me about the region <TASK> in the given photo.", |
|
"Offer a thorough description of the area <TASK> within the illustration.", |
|
"What can you share about the area <TASK> in the presented image?", |
|
"Help me grasp the context of the region <TASK> within image.", |
|
"Kindly give an overview of the section <TASK> in photo.", |
|
"What details can you provide about the region <TASK> in the snapshot?", |
|
"Can you divulge the contents of the area <TASK> within the given image?", |
|
"In the submitted image, please give a synopsis of the area <TASK>.", |
|
"In the image, please describe the bounding box <TASK>.", |
|
"Please describe the region <TASK> in the picture.", |
|
"Describe the bbox <TASK> in the provided photo.", |
|
"What can you tell me about the area <TASK> within the image?", |
|
"Could you give me a description of the rectangular region <TASK> found in?", |
|
"In, what elements can be found within the coordinates <TASK>?", |
|
"Please provide details for the area within the bounding box <TASK> in.", |
|
"Can you generate a description for the selected region <TASK> in the image?", |
|
"Kindly describe the objects or scenery in the bounding box <TASK> within.", |
|
"What details can you provide for the rectangle defined by the coordinates <TASK> in?", |
|
"In relation to the picture, please describe the content of the area marked by <TASK>.", |
|
"I'd like to know more about the area <TASK> in the given image. Can you describe it?", |
|
"Can you help me by describing the part of that lies within the bounding box <TASK>?", |
|
"What's happening in the section of the photo enclosed by the coordinates <TASK>?", |
|
"Describe the image content present in the specified rectangular area <TASK> of.", |
|
"Please provide information about the area within the bounding box <TASK> in the picture.", |
|
"Could you offer a description of the contents in the selected area <TASK> of the image?", |
|
"I'm curious about the area <TASK> in. Can you provide a description of it?", |
|
"What can be observed in the rectangular region <TASK> in the photograph?", |
|
"Please explain what is contained in the portion of defined by the box <TASK>.", |
|
"In the photograph, can you describe the objects or scenery enclosed by <TASK>?", |
|
"Can you give a brief explanation of the specified area <TASK> in the image?", |
|
"What does the area <TASK> look like in the context of the image?", |
|
"Could you please describe the contents of the bounding box <TASK> in the given image?", |
|
"I would like to know more about the rectangular region <TASK> within the picture. Can you describe it?", |
|
"Please tell me about the area <TASK> in the image. What does it contain?", |
|
"Help me understand what's happening in the selected bounding box <TASK> within.", |
|
"Can you provide a description of the area <TASK> in the image?", |
|
"What sort of things can be seen in the region <TASK> of the photo?", |
|
"Describe what can be found within the bounds of <TASK> in the image.", |
|
"In, can you paint a picture of the area enclosed by coordinates <TASK>?", |
|
"Please provide a detailed account of the area covered by the bounding box <TASK> in.", |
|
"Give me a vivid description of what's happening in the area <TASK> within the snapshot.", |
|
"In the image, what do you observe within the rectangular box defined by the coordinates <TASK>?", |
|
"Could you give me a breakdown of the content in the specified area <TASK> of the picture?", |
|
"Please elucidate the area<TASK> of the image.", |
|
"I'd appreciate it if you could describe the portion of that lies within the rectangle <TASK>.", |
|
"Can you share some insights about the rectangular region <TASK> in the image?", |
|
"Help me visualize the section of the photo enclosed by the bounding box <TASK>.", |
|
"Would you kindly provide a description for the content within the rectangular area <TASK> of?", |
|
"In, can you tell me more about the area specified by the bounding box <TASK>?", |
|
"Please describe what can be seen in the rectangular region <TASK> of the image.", |
|
"Can you analyze the content of the area <TASK> within the photograph?", |
|
"In the provided image, please explain the content within the region <TASK>.", |
|
"I'm interested in the selected rectangle <TASK> in. Can you tell me more about it?", |
|
"Explain what can be found in the bounding box <TASK> in the context of the image.", |
|
"Kindly share your observations about the rectangular region <TASK> within.", |
|
"I'd like a thorough description of the area <TASK> in the image.", |
|
"Could you please provide a description of the rectangular area <TASK> in?", |
|
"Please describe the section of the picture defined by the bbox <TASK>.", |
|
"Tell me more about the scenery or objects within the rectangular region <TASK> in.", |
|
"Would you kindly describe the content of the area enclosed by <TASK> in the image?", |
|
"Help me understand the objects or scenery within the bounding box <TASK> in the image.", |
|
"I would like to know about the section of the image enclosed by the rectangle <TASK>. Can you describe it?", |
|
"Describe the selected rectangular area <TASK> in the photo.", |
|
"Tell me about the region <TASK> of the image.", |
|
"I request a description of the area <TASK> in the picture.", |
|
"Can you elaborate on the content of the bounding box <TASK> in?", |
|
"Please share details about the rectangular region <TASK> within the image.", |
|
"What can I find in the bbox <TASK> of the provided image?", |
|
"In the image, could you provide a description for the coordinates <TASK>?", |
|
"Could you tell me more about the area <TASK> in the snapshot?", |
|
"Fill me in on the details of the rectangular box <TASK> within the image.", |
|
"What's going on in the section of contained within the bounding box <TASK>?", |
|
"I would like a description of the content within the bbox <TASK> in.", |
|
"Please enlighten me about the area <TASK> in the photograph.", |
|
"Can you give me a visual rundown of the area <TASK> in?", |
|
"Describe the visual elements within the selected area <TASK> of the image.", |
|
"Tell me what you see in the area <TASK> within the context of the image.", |
|
"Explain the content within the rectangular region <TASK> of the image.", |
|
"I'd like some information about the bounding box <TASK> in the photo.", |
|
"What is happening within the rectangle defined by coordinates <TASK> in the image?", |
|
"Please describe the content within the area <TASK> displayed in the image.", |
|
"What can be seen in the bounding box <TASK> in the context of the provided image?", |
|
"Share some details about the objects or environment within the bounding box <TASK> in.", |
|
"Please describe the area <TASK> in the image for me.", |
|
"Can you generate a description of the contents within the selected region <TASK> in?", |
|
"What objects or scenery can be found in the area <TASK> in the image?", |
|
"Please tell me more about the rectangular section <TASK> in the photo.", |
|
"Could you describe the content of the bbox <TASK> in the image?", |
|
"What does the selected region <TASK> in the image encompass?", |
|
"I am interested in the region <TASK> of the image; please describe it.", |
|
"Can you provide some context for the area <TASK> within the picture?", |
|
"Please give me some details about the rectangle <TASK> in the image.", |
|
"In the photo, what can you see within the region defined by the bounding box <TASK>?", |
|
"I would like a detailed description of the portion of enclosed by the bbox <TASK>.", |
|
"Please help me understand the content present within the rectangle <TASK> in.", |
|
"Would you mind describing the rectangular area <TASK> in the provided image?" |
|
] |
|
|