vqa_demo / extract_100_samples.py
MinxuanQin
add sample images and rewrite app.py
c1b68f7
from PIL import Image
import torch
from datasets import load_dataset, get_dataset_split_names
import pandas as pd
dataset = load_dataset("HuggingFaceM4/VQAv2", split="validation", cache_dir="cache", streaming=False)
index_range = 1000
## can see index here
df = pd.DataFrame(columns=['ques', 'label', 'q_id', 'img_path', 'question_type'])
for idx in range(index_range):
sample = dataset[idx]
ques = sample['question']
img = sample['image']
img_id = sample['image_id']
img.save(f'images/{img_id}.jpg')
label = sample['multiple_choice_answer']
q_id = sample['question_id']
q_type = sample['question_type']
df.loc[len(df.index)] = [ques, label, q_id, img_id, q_type]
df.to_json('vqa_samples.json', orient='columns')