Spaces:
Runtime error
Runtime error
import json | |
import os | |
import random | |
from PIL import Image | |
from .vqa_dataset import VQADataset, VQAPrompter | |
QUESTIONS = [ | |
"please describe the image", | |
"can you describe the image", | |
"Could you provide a description of the image?", | |
"What do you see in this image?", | |
"Share your thoughts on the content of the image.", | |
"Please narrate what's happening in the picture.", | |
"Can you give a brief explanation of the image?", | |
"Describe the main elements and details present in the image.", | |
"In your own words, what is depicted in the image?", | |
"Can you outline the key aspects of the image?", | |
"What are the most striking features in this image?", | |
"Please provide a summary of the image's content.", | |
"Describe the overall theme or concept captured in the image.", | |
"How would you explain the image's composition and focus?", | |
"What is the focal point or main subject of the image?", | |
"How do the different components of the image interact with each other?", | |
"What would be a fitting caption for this image?", | |
"Can you create a concise description that captures the essence of the image?", | |
"How would you briefly summarize the content of this image in a phrase or sentence?", | |
"Please provide a catchy and relevant caption for this picture.", | |
"If you were to give this image a title, what would it be?", | |
"Describe the image in one creative sentence.", | |
"Please suggest a memorable phrase that encapsulates the image's content.", | |
"What engaging phrase would best represent this image?", | |
"Can you create an expressive caption that highlights the main theme of the image?", | |
"How would you sum up the image's story for a caption?", | |
"Provide an eye-catching caption that conveys the image's core message.", | |
"If you were to give this image a headline, what would it say?", | |
"Can you craft a captivating caption that communicates the essence of the image?", | |
"How would you describe the image's content in a powerful caption?", | |
"Please provide an inventive title to summarize the scene depicted in the image.", | |
"Compose a concise and striking phrase that reflects the image's key elements.", | |
"If you were to create a caption for this image, what would it be?", | |
"Offer a compelling caption that highlights the central focus of the image.", | |
"Can you produce a unique caption that encapsulates the image's overall mood?", | |
"Please generate an attention-grabbing caption that would best illustrate the events captured in this image", | |
"How would you express the image's main idea in an impactful sentence?", | |
"Please create a vivid and concise title that conveys the essence of the picture.", | |
"Compose an imaginative caption that reflects the image's most striking features.", | |
"What memorable statement would best represent the scene illustrated in this image?", | |
"Draft an evocative caption that brings the image to life for the reader.", | |
"Can you suggest an insightful caption that highlights the underlying message of the image?", | |
"What engaging phrase would effectively convey the action or subject matter depicted in this picture?", | |
"How would you encapsulate the image's core theme in a concise and expressive manner?", | |
"Please provide a creative and impactful title that captures the spirit of the image.", | |
"Craft a captivating caption that showcases the image's most prominent attributes.", | |
"What intriguing statement would best sum up the scene presented in this image?", | |
"Develop a descriptive caption that paints a vivid picture for the viewer.", | |
"Can you give a detailed account of the image's contents?", | |
"What are the key elements and features visible in this image?", | |
"How would you narrate the events or actions depicted in the picture?", | |
"Please share your observations about the various components present in the image.", | |
"What is the overall theme or concept captured in this image? Can you describe it?", | |
] | |
class CcSbuAlignDataset(VQADataset): | |
def __init__(self, tokenizer, vis_processor, vis_root, ann_paths, add_eos=True, ignore_instruction=True): | |
self.tokenizer = tokenizer | |
self.vis_root = vis_root | |
self.annotation = [] | |
for ann_path in ann_paths: | |
self.annotation.extend(json.load(open(ann_path, "r"))["annotations"]) | |
self.vis_processor = vis_processor | |
self.prompter = VQAPrompter() | |
self.add_eos = add_eos | |
self.ignore_instruction = ignore_instruction | |
def process_text(self, ann): | |
# random select a question | |
question = random.choice(QUESTIONS) | |
answer = ann["caption"] | |
instruction = self.prompter(question) | |
return dict(instruction=instruction, answer=answer) | |
def process_image(self, ann): | |
image_path = os.path.join(self.vis_root, ann["image_id"] + ".jpg") | |
image = Image.open(image_path).convert("RGB") | |
image = self.vis_processor(image) | |
return image | |
def build_ccsbualign_dataset( | |
tokenizer, | |
vis_processor, | |
vis_root="data/cc_sbu_align/image/", | |
ann_paths=["data/cc_sbu_align/filter_cap.json"], | |
**kwargs, | |
): | |
return CcSbuAlignDataset( | |
tokenizer=tokenizer, | |
vis_processor=vis_processor, | |
vis_root=vis_root, | |
ann_paths=ann_paths, | |
) | |