import json import os import random from collections import defaultdict from PIL import Image from .vqa_dataset import VQADataset QUESTIONS = [ "What do you think of the above sentence?", "Can you confirm this statement?", "How do you interpret the given information?", "What is your opinion on this matter?", "Could you provide your perspective on this statement?", "How would you respond to the provided claim?", "What are your thoughts regarding the mentioned subject?", "Can you elaborate on this idea in English?", "Do you have any insights or feedback on this topic?", "What's your take on the given statement?", "What is your perspective on the given statement?", "How would you interpret this remark?", "Could you please provide your opinion on this?", "Can you share your understanding of the above point?", "Would you mind elaborating on this topic?", "What are your views about the given statement?", "How do you feel about the presented information?", "Could you provide your perspective on this?", "What is your opinion regarding this statement?", "Can you share your thoughts about the mentioned claim?", "How would you interpret the above comment?", "Would you mind sharing your insights on this issue?", ] class SNLIVEDataset(VQADataset): """Visual Reasoning Dataset.""" def __init__(self, tokenizer, vis_processor, vis_root, ann_paths, **kwargs): super().__init__(tokenizer, vis_processor, vis_root, ann_paths=[], **kwargs) self.annotation = self.load_annotations(ann_paths) if self.sample_image: print("randomly sample one annotation for each image") self.annotation = self.parse_annotation(self.annotation) self._add_instance_ids() @staticmethod def load_annotations(ann_paths): annotation = [] for ann_path in ann_paths: with open(ann_path, "r") as f: for line in f.readlines(): line = line.strip() if len(line) != 0: ann = json.loads(line) annotation.append(ann) return annotation def parse_annotation(self, annotation): image_list = defaultdict(list) for ann in annotation: image_list[ann["Flickr30K_ID"]].append(ann) annotation = [] for ann_list in image_list.values(): annotation.append(random.choice(ann_list)) return annotation def process_text(self, ann): question = ann["sentence2"] + " " + random.choice(QUESTIONS) answer = ann["gold_label"] if random.random() < self.option_prob: instruction = self.prompter(question, ["entailment", "neutral", "contradiction"]) else: instruction = self.prompter(question) return dict(instruction=instruction, answer=answer) def process_image(self, ann): image_path = os.path.join(self.vis_root, ann["Flickr30K_ID"] + ".jpg") image = Image.open(image_path).convert("RGB") image = self.vis_processor(image) return image