import seaborn as sns from PIL import Image, ImageDraw, ImageFont import matplotlib.font_manager import spacy import re import base64 import time import re from PIL import Image import base64 import hashlib import os nlp = spacy.load("en_core_web_sm-3.6.0") def process_image_without_resize(image_prompt): image = Image.open(image_prompt) print(f"height:{image.height}, width:{image.width}") timestamp = time.time() file_ext = os.path.splitext(image_prompt)[1] filename = f"examples/{timestamp}{file_ext}" filename_grounding = f"examples/{timestamp}_grounding{file_ext}" image.save(filename) print(f"temporal filename {filename}") with open(filename, "rb") as image_file: bytes = base64.b64encode(image_file.read()) encoded_img = str(bytes, encoding='utf-8') image_hash = hashlib.sha256(bytes).hexdigest() os.remove(filename) return image, encoded_img, image_hash, filename_grounding def is_chinese(text): zh_pattern = re.compile(u'[\u4e00-\u9fa5]+') return zh_pattern.search(text) def draw_boxes(image, boxes, texts, output_fn='output.png'): box_width = 5 color_palette = sns.color_palette("husl", len(boxes)) colors = [(int(r*255), int(g*255), int(b*255)) for r, g, b in color_palette] width, height = image.size absolute_boxes = [[(int(box[0] * width), int(box[1] * height), int(box[2] * width), int(box[3] * height)) for box in b] for b in boxes] overlay = Image.new('RGBA', image.size, (255, 255, 255, 0)) draw = ImageDraw.Draw(overlay) font_path = sorted(matplotlib.font_manager.findSystemFonts(fontpaths=None, fontext='ttf'))[0] font = ImageFont.truetype(font_path, size=26) for box, text, color in zip(absolute_boxes, texts, colors): for b in box: draw.rectangle(b, outline=color, width=box_width) if not text: continue splited_text = text.split('\n') num_lines = len(splited_text) text_width, text_height = font.getbbox(splited_text[0])[-2:] y_start = b[3] - text_height * num_lines - box_width if b[2] - b[0] < 100 or b[3] - b[1] < 100: y_start = b[3] for i, line in enumerate(splited_text): text_width, text_height = font.getbbox(line)[-2:] x = b[0] + box_width y = y_start + text_height * i draw.rectangle([x, y, x+text_width, y+text_height], fill=(128, 128, 128, 160)) draw.text((x, y), line, font=font, fill=(255, 255, 255)) img_with_overlay = Image.alpha_composite(image.convert('RGBA'), overlay).convert('RGB') img_with_overlay.save(output_fn) def boxstr_to_boxes(box_str): boxes = [[int(y)/1000 for y in x.split(',')] for x in box_str.split(';') if x.replace(',', '').isdigit()] return boxes def text_to_dict(text): doc = nlp(text) box_matches = list(re.finditer(r'\[\[([^\]]+)\]\]', text)) box_positions = [match.start() for match in box_matches] noun_phrases = [] boxes = [] for match, box_position in zip(box_matches, box_positions): nearest_np_start = max([0] + [chunk.start_char for chunk in doc.noun_chunks if chunk.end_char <= box_position]) noun_phrase = text[nearest_np_start:box_position].strip() if noun_phrase and noun_phrase[-1] == '?': noun_phrase = text[:box_position].strip() box_string = match.group(1) noun_phrases.append(noun_phrase) boxes.append(boxstr_to_boxes(box_string)) pairs = [] for noun_phrase, box_string in zip(noun_phrases, boxes): pairs.append((noun_phrase.lower(), box_string)) return dict(pairs) def parse_response(img, response, output_fn='output.png'): img = img.convert('RGB') width, height = img.size ratio = min(1920 / width, 1080 / height) new_width = int(width * ratio) new_height = int(height * ratio) new_img = img.resize((new_width, new_height), Image.LANCZOS) pattern = r"\[\[(.*?)\]\]" positions = re.findall(pattern, response) boxes = [[[int(y) for y in x.split(',')] for x in pos.split(';') if x.replace(',', '').isdigit()] for pos in positions] dic = text_to_dict(response) if not dic: texts = [] boxes = [] else: texts, boxes = zip(*dic.items()) draw_boxes(new_img, boxes, texts, output_fn=output_fn) def postprocess_text(template, text): quoted_text = f'"{text.strip()}"' return template.replace("", quoted_text).strip() if template != "" else text.strip() # The templates is for CogAgent_Agent Template templates_agent_cogagent = [ "do not use template", "Can you advise me on how to ?", "I'm looking for guidance on how to .", "What steps do I need to take to ?", "Could you provide instructions for ?", "I'm wondering what the process is for .", "How can I go about ?", "I need assistance with planning to .", "Do you have any recommendations for ?", "Please share some tips for .", "I'd like to know the best way to .", "What's the most effective way to ?", "I'm seeking advice on accomplishing .", "Could you guide me through the steps to ?", "I'm unsure how to start with .", "Is there a strategy for successfully ?", "What's the proper procedure for ?", "How should I prepare for ?", "I'm not sure where to begin with .", "I need some insights on .", "Can you explain how to tackle ?", "I'm interested in the process of .", "Could you enlighten me on ?", "What are the recommended steps for ?", "Is there a preferred method for ?", "I'd appreciate your advice on .", "Can you shed light on ?", "What would be the best approach to ?", "How do I get started with ?", "I'm inquiring about the procedure for .", "Could you share your expertise on ?", "I'd like some guidance on .", "What's your recommendation for ?", "I'm seeking your input on how to .", "Can you provide some insights into ?", "How can I successfully accomplish ?", "What steps are involved in ?", "I'm curious about the best way to .", "Could you show me the ropes for ?", "I need to know how to go about .", "What are the essential steps for ?", "Is there a specific method for ?", "I'd like to get some advice on .", "Can you explain the process of ?", "I'm looking for guidance on how to approach .", "What's the proper way to handle ?", "How should I proceed with ?", "I'm interested in your expertise on .", "Could you walk me through the steps for ?", "I'm not sure where to begin when it comes to .", "What should I prioritize when doing ?", "How can I ensure success with ?", "I'd appreciate some tips on .", "Can you provide a roadmap for ?", "What's the recommended course of action for ?", "I'm seeking your guidance on .", "Could you offer some suggestions for ?", "I'd like to know the steps to take for .", "What's the most effective way to achieve ?", "How can I make the most of ?", "I'm wondering about the best approach to .", "Can you share your insights on ?", "What steps should I follow to complete ?", "I'm looking for advice on .", "What's the strategy for successfully completing ?", "How should I prepare myself for ?", "I'm not sure where to start with .", "What's the procedure for ?", "Could you provide some guidance on ?", "I'd like to get some tips on how to .", "Can you explain how to tackle step by step?", "I'm interested in understanding the process of .", "What are the key steps to ?", "Is there a specific method that works for ?", "I'd appreciate your advice on successfully completing .", "Can you shed light on the best way to ?", "What would you recommend as the first step to ?", "How do I initiate ?", "I'm inquiring about the recommended steps for .", "Could you share some insights into ?", "I'm seeking your expertise on .", "What's your recommended approach for ?", "I'd like some guidance on where to start with .", "Can you provide recommendations for ?", "What's your advice for someone looking to ?", "I'm seeking your input on the process of .", "How can I achieve success with ?", "What's the best way to navigate ?", "I'm curious about the steps required for .", "Could you show me the proper way to ?", "I need to know the necessary steps for .", "What's the most efficient method for ?", "I'd appreciate your guidance on .", "Can you explain the steps involved in ?", "I'm looking for recommendations on how to approach .", "What's the right way to handle ?", "How should I manage ?", "I'm interested in your insights on .", "Could you provide a step-by-step guide for ?", "I'm not sure how to start when it comes to .", "What are the key factors to consider for ?", "How can I ensure a successful outcome with ?", "I'd like some tips and tricks for .", "Can you offer a roadmap for accomplishing ?", "What's the preferred course of action for ?", "I'm seeking your expert advice on .", "Could you suggest some best practices for ?", "I'd like to understand the necessary steps to complete .", "What's the most effective strategy for ?", ] template_grounding_cogvlm = [ "Where is ?", "Where is in the image?", "Where is ? answer in [[x0,y0,x1,y1]] format.", "Can you point out in the image and provide the bounding boxes of its location?", "Help me to locate in and give me its bounding boxes, please.", "In the given, could you find and tell me the bounding boxes of ?", "Guide me to the location of within the image by providing its bounding boxes.", "I'd like to know the exact bounding boxes of in the photo.", "Would you kindly provide the bounding boxes of located in the picture?", "Can you find in and give me the bounding boxes of where it is located?", "I'm trying to locate in. Can you determine its bounding boxes for me?", "What are the bounding boxes of in the image?", "Can you disclose the position of in the photograph by stating its bounding boxes?", "In, could you let me know the location of in the form of bounding boxes?", "I need the bounding boxes of in, can you please assist me with that?", "Where in is located? Provide me with its bounding boxes, please.", "May I have the bounding boxes of ?", "In the photograph, could you pinpoint the location of and tell me its bounding boxes?", "Can you please search and find in, then let me know its bounding boxes?", "Please, point out the position of in the image by giving its bounding boxes.", "What are the exact bounding boxes of in the provided picture?", "Detect the location of in and share the bounding boxes with me, please.", "In the picture, I'd like you to locate and provide its coordinates.", "Please indicate the location of in the photo by giving bounding boxes.", "Find in and share its coordinates with me.", "Could you please help me find the bounding boxes of in the image?", "I am looking for the position of in. Can you provide its bounding boxes?", "In the image, can you locate and let me know its coordinates?", "I'd appreciate if you could find and tell me the bounding boxes of .", "In, I need the bounding box bounding boxes of .", "Point me to the location of in the picture by providing its bounding boxes.", "Could you trace in and tell me its bounding boxes?", "Can you assist me in locating in, and then provide its bounding boxes?", "I'm curious, what are the bounding boxes of in the photo?", "Kindly share the bounding boxes of located in the image.", "I would like to find in. Can you give me its bounding boxes?", "Can you spot in and disclose its bounding boxes to me?", "Please, reveal the location of in the provided photograph as coordinates.", "Help me locate and determine the bounding boxes of .", "I request the bounding boxes of in the image.", "In the given, can you find and tell me its bounding boxes?", "I need to know the position of in as bounding boxes.", "Locate in and provide its bounding boxes, please.", "Assist me in finding in the photo and provide the bounding box bounding boxes.", "In, can you guide me to the location of by providing bounding boxes?", "I'd like the bounding boxes of as it appears in the image.", "What location does hold in the picture? Inform me of its bounding boxes.", "Identify the position of in and share its bounding boxes.", "I'd like to request the bounding boxes of within the photo.", "How can I locate in the image? Please provide the bounding boxes.", "I am interested in knowing the bounding boxes of in the picture.", "Assist me in locating the position of in the photograph and its bounding box bounding boxes.", "In the image, I need to find and know its bounding boxes. Can you please help?" "Can you give me a description of the region in image?", "In the provided image, would you mind describing the selected area ?", "I need details about the area located within image.", "Could you please share some information on the region in this photograph?", "Describe what's happening within the coordinates of the given image.", "What can you tell me about the selected region in the photo?", "Please, can you help me understand what's inside the region in image?", "Give me a comprehensive description of the specified area in the picture.", "I'm curious about the area in the following image. Can you describe it?", "Please elaborate on the area with the coordinates in the visual.", "In the displayed image, help me understand the region defined by .", "Regarding the image, what's going on in the section ?", "In the given photograph, can you explain the area with coordinates ?", "Kindly describe what I should be seeing in the area of image.", "Within the input image, what can be found in the region defined by ?", "Tell me what you see within the designated area in the picture.", "Please detail the contents of the chosen region in the visual input.", "What's inside the area of the provided graphic?", "I'd like some information about the specific region in the image.", "Help me understand the details within the area in photograph.", "Can you break down the region in the image for me?", "What is taking place within the specified area in this capture?", "Care to elaborate on the targeted area in the visual illustration?", "What insights can you provide about the area in the selected picture?", "What does the area within the given visual contain?", "Analyze and describe the region in the included photo.", "Please provide details for the area marked as in this photographic.", "For the image, can you assess and describe what's happening at ?", "Fill me in about the selected portion within the presented image.", "In the image, elaborate on the details found within the section .", "Please interpret and describe the area inside the given picture.", "What information can you give me about the coordinates in image?", "Regarding the coordinates in image, can you provide a description?", "In the photo, can you delve into the details of the region ?", "Please provide insights on the specified area within the graphic.", "Detail the chosen region in the depicted scene.", "Can you discuss the entities within the region of image?", "I'd appreciate a breakdown of the area in the displayed image.", "What's the story in the section of the included visual?", "Please enlighten me about the region in the given photo.", "Offer a thorough description of the area within the illustration.", "What can you share about the area in the presented image?", "Help me grasp the context of the region within image.", "Kindly give an overview of the section in photo.", "What details can you provide about the region in the snapshot?", "Can you divulge the contents of the area within the given image?", "In the submitted image, please give a synopsis of the area .", "In the image, please describe the bounding box .", "Please describe the region in the picture.", "Describe the bbox in the provided photo.", "What can you tell me about the area within the image?", "Could you give me a description of the rectangular region found in?", "In, what elements can be found within the coordinates ?", "Please provide details for the area within the bounding box in.", "Can you generate a description for the selected region in the image?", "Kindly describe the objects or scenery in the bounding box within.", "What details can you provide for the rectangle defined by the coordinates in?", "In relation to the picture, please describe the content of the area marked by .", "I'd like to know more about the area in the given image. Can you describe it?", "Can you help me by describing the part of that lies within the bounding box ?", "What's happening in the section of the photo enclosed by the coordinates ?", "Describe the image content present in the specified rectangular area of.", "Please provide information about the area within the bounding box in the picture.", "Could you offer a description of the contents in the selected area of the image?", "I'm curious about the area in. Can you provide a description of it?", "What can be observed in the rectangular region in the photograph?", "Please explain what is contained in the portion of defined by the box .", "In the photograph, can you describe the objects or scenery enclosed by ?", "Can you give a brief explanation of the specified area in the image?", "What does the area look like in the context of the image?", "Could you please describe the contents of the bounding box in the given image?", "I would like to know more about the rectangular region within the picture. Can you describe it?", "Please tell me about the area in the image. What does it contain?", "Help me understand what's happening in the selected bounding box within.", "Can you provide a description of the area in the image?", "What sort of things can be seen in the region of the photo?", "Describe what can be found within the bounds of in the image.", "In, can you paint a picture of the area enclosed by coordinates ?", "Please provide a detailed account of the area covered by the bounding box in.", "Give me a vivid description of what's happening in the area within the snapshot.", "In the image, what do you observe within the rectangular box defined by the coordinates ?", "Could you give me a breakdown of the content in the specified area of the picture?", "Please elucidate the area of the image.", "I'd appreciate it if you could describe the portion of that lies within the rectangle .", "Can you share some insights about the rectangular region in the image?", "Help me visualize the section of the photo enclosed by the bounding box .", "Would you kindly provide a description for the content within the rectangular area of?", "In, can you tell me more about the area specified by the bounding box ?", "Please describe what can be seen in the rectangular region of the image.", "Can you analyze the content of the area within the photograph?", "In the provided image, please explain the content within the region .", "I'm interested in the selected rectangle in. Can you tell me more about it?", "Explain what can be found in the bounding box in the context of the image.", "Kindly share your observations about the rectangular region within.", "I'd like a thorough description of the area in the image.", "Could you please provide a description of the rectangular area in?", "Please describe the section of the picture defined by the bbox .", "Tell me more about the scenery or objects within the rectangular region in.", "Would you kindly describe the content of the area enclosed by in the image?", "Help me understand the objects or scenery within the bounding box in the image.", "I would like to know about the section of the image enclosed by the rectangle . Can you describe it?", "Describe the selected rectangular area in the photo.", "Tell me about the region of the image.", "I request a description of the area in the picture.", "Can you elaborate on the content of the bounding box in?", "Please share details about the rectangular region within the image.", "What can I find in the bbox of the provided image?", "In the image, could you provide a description for the coordinates ?", "Could you tell me more about the area in the snapshot?", "Fill me in on the details of the rectangular box within the image.", "What's going on in the section of contained within the bounding box ?", "I would like a description of the content within the bbox in.", "Please enlighten me about the area in the photograph.", "Can you give me a visual rundown of the area in?", "Describe the visual elements within the selected area of the image.", "Tell me what you see in the area within the context of the image.", "Explain the content within the rectangular region of the image.", "I'd like some information about the bounding box in the photo.", "What is happening within the rectangle defined by coordinates in the image?", "Please describe the content within the area displayed in the image.", "What can be seen in the bounding box in the context of the provided image?", "Share some details about the objects or environment within the bounding box in.", "Please describe the area in the image for me.", "Can you generate a description of the contents within the selected region in?", "What objects or scenery can be found in the area in the image?", "Please tell me more about the rectangular section in the photo.", "Could you describe the content of the bbox in the image?", "What does the selected region in the image encompass?", "I am interested in the region of the image; please describe it.", "Can you provide some context for the area within the picture?", "Please give me some details about the rectangle in the image.", "In the photo, what can you see within the region defined by the bounding box ?", "I would like a detailed description of the portion of enclosed by the bbox .", "Please help me understand the content present within the rectangle in.", "Would you mind describing the rectangular area in the provided image?" ]