Spaces:
Runtime error
Runtime error
| """ | |
| Copyright (c) 2022, salesforce.com, inc. | |
| All rights reserved. | |
| SPDX-License-Identifier: BSD-3-Clause | |
| For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause | |
| """ | |
| import json | |
| import os | |
| import random | |
| import numpy as np | |
| from PIL import Image | |
| from transformers import LlamaTokenizer | |
| from .vqa_dataset import VQADataset | |
| QUESTIONS = [ | |
| "please describe the image", | |
| "can you describe the image", | |
| "Could you provide a description of the image?", | |
| "What do you see in this image?", | |
| "Share your thoughts on the content of the image.", | |
| "Please narrate what's happening in the picture.", | |
| "Can you give a brief explanation of the image?", | |
| "Describe the main elements and details present in the image.", | |
| "In your own words, what is depicted in the image?", | |
| "Can you outline the key aspects of the image?", | |
| "What are the most striking features in this image?", | |
| "Please provide a summary of the image's content.", | |
| "Describe the overall theme or concept captured in the image.", | |
| "How would you explain the image's composition and focus?", | |
| "What is the focal point or main subject of the image?", | |
| "How do the different components of the image interact with each other?", | |
| "What would be a fitting caption for this image?", | |
| "Can you create a concise description that captures the essence of the image?", | |
| "How would you briefly summarize the content of this image in a phrase or sentence?", | |
| "Please provide a catchy and relevant caption for this picture.", | |
| "If you were to give this image a title, what would it be?", | |
| "Describe the image in one creative sentence.", | |
| "Please suggest a memorable phrase that encapsulates the image's content.", | |
| "What engaging phrase would best represent this image?", | |
| "Can you create an expressive caption that highlights the main theme of the image?", | |
| "How would you sum up the image's story for a caption?", | |
| "Provide an eye-catching caption that conveys the image's core message.", | |
| "If you were to give this image a headline, what would it say?", | |
| "Can you craft a captivating caption that communicates the essence of the image?", | |
| "How would you describe the image's content in a powerful caption?", | |
| "Please provide an inventive title to summarize the scene depicted in the image.", | |
| "Compose a concise and striking phrase that reflects the image's key elements.", | |
| "If you were to create a caption for this image, what would it be?", | |
| "Offer a compelling caption that highlights the central focus of the image.", | |
| "Can you produce a unique caption that encapsulates the image's overall mood?", | |
| "Please generate an attention-grabbing caption that would best illustrate the events captured in this image", | |
| "How would you express the image's main idea in an impactful sentence?", | |
| "Please create a vivid and concise title that conveys the essence of the picture.", | |
| "Compose an imaginative caption that reflects the image's most striking features.", | |
| "What memorable statement would best represent the scene illustrated in this image?", | |
| "Draft an evocative caption that brings the image to life for the reader.", | |
| "Can you suggest an insightful caption that highlights the underlying message of the image?", | |
| "What engaging phrase would effectively convey the action or subject matter depicted in this picture?", | |
| "How would you encapsulate the image's core theme in a concise and expressive manner?", | |
| "Please provide a creative and impactful title that captures the spirit of the image.", | |
| "Craft a captivating caption that showcases the image's most prominent attributes.", | |
| "What intriguing statement would best sum up the scene presented in this image?", | |
| "Develop a descriptive caption that paints a vivid picture for the viewer.", | |
| "Can you give a detailed account of the image's contents?", | |
| "What are the key elements and features visible in this image?", | |
| "How would you narrate the events or actions depicted in the picture?", | |
| "Please share your observations about the various components present in the image.", | |
| "What is the overall theme or concept captured in this image? Can you describe it?", | |
| ] | |
| class COCOCaptionDataset(VQADataset): | |
| def __init__( | |
| self, tokenizer, vis_processor=None, vis_root=None, ann_paths=[], add_eos=True, ignore_instruction=True | |
| ): | |
| """ | |
| vis_root (string): Root directory of images (e.g. coco/images/) | |
| ann_root (string): directory to store the annotation file | |
| """ | |
| self.tokenizer: LlamaTokenizer = tokenizer | |
| self.vis_root = vis_root | |
| self.annotation = [] | |
| for ann_path in ann_paths: | |
| self.annotation.extend(json.load(open(ann_path, "r"))) | |
| self.vis_processor = vis_processor | |
| instructions = [] | |
| for question in QUESTIONS: | |
| # instruction = f"Below is a question about an image. Write a response to answer the question.\n\n### Image:\n<image>\n\n### Question:\n{question}\n\n### Answer:\n".format( | |
| # question | |
| # ) | |
| instruction = "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Image:\n{image}\n\n### Instruction:\n{question}\n\n### Response:\n".format( | |
| image="<image>", question=question | |
| ) | |
| instructions.append(instruction) | |
| self.instructions = instructions | |
| self.add_eos = add_eos | |
| self.ignore_instruction = ignore_instruction | |
| def process_image(self, ann): | |
| image_path = os.path.join(self.vis_root, ann["image"]) | |
| image = Image.open(image_path).convert("RGB") | |
| image = self.vis_processor(image) | |
| return image | |
| def process_text(self, ann): | |
| all_captions = ann["caption"] | |
| if not isinstance(all_captions, list): | |
| all_captions = [all_captions] | |
| caption = random.choice(all_captions) | |
| instruction = random.choice(self.instructions) | |
| return dict(instruction=instruction, answer=caption) | |