|
|
import csv |
|
|
import json |
|
|
from pathlib import Path |
|
|
|
|
|
def csv_to_json(csv_path, json_path, file_stem): |
|
|
|
|
|
json_data = [] |
|
|
|
|
|
with open(csv_path, 'r', encoding='utf-8') as csv_file: |
|
|
csv_reader = csv.DictReader(csv_file) |
|
|
|
|
|
for index, row in enumerate(csv_reader): |
|
|
|
|
|
folder_name = row['folder_name'] |
|
|
media_path = "./" + (Path("data") / file_stem / folder_name).as_posix() |
|
|
|
|
|
|
|
|
answer = [ |
|
|
row['caption_1'], |
|
|
row['caption_2'], |
|
|
row['caption_3'], |
|
|
row['caption_4'], |
|
|
row['caption_5'] |
|
|
] |
|
|
|
|
|
|
|
|
entry = { |
|
|
"index": index, |
|
|
"media_type": "Video", |
|
|
"media_paths": media_path, |
|
|
"description": "", |
|
|
"task_type": "Vision-Question-Answer", |
|
|
"question": ["Please generate descriptive captions for this multi-view video."], |
|
|
"question_type": "free-form", |
|
|
"annotations": {}, |
|
|
"options": [], |
|
|
"answer": answer, |
|
|
"source": "4D-Bench", |
|
|
"domain": "Embodied_ai" |
|
|
} |
|
|
|
|
|
json_data.append(entry) |
|
|
|
|
|
|
|
|
with open(json_path, 'w', encoding='utf-8') as json_file: |
|
|
json.dump(json_data, json_file, indent=2) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
INPUT_CSV = "/mnt/data/users/zys/proj/vlm_reasoning/unprocessed_data/emb_ai/4d/4D_Object_Captioning/data/human_annotations.csv" |
|
|
OUTPUT_JSON = "/mnt/data/users/zys/proj/vlm_reasoning/dataset/4D_Object_Captioning.json" |
|
|
FILE_STEM = "4D_Object_Captioning" |
|
|
|
|
|
csv_to_json(INPUT_CSV, OUTPUT_JSON, FILE_STEM) |