Adinosaur
/

tools

Model card Files Files and versions

tools / utils /csv /4D-BO.py

Adinosaur's picture

Upload folder using huggingface_hub

1c980b1 verified 9 months ago

history blame contribute delete

2.03 kB

	import csv
	import json
	from pathlib import Path

	def csv_to_json(csv_path, json_path, file_stem):
	# 初始化JSON数据结构
	json_data = []

	with open(csv_path, 'r', encoding='utf-8') as csv_file:
	csv_reader = csv.DictReader(csv_file)

	for index, row in enumerate(csv_reader):
	# 构建media_path
	folder_name = row['folder_name']
	media_path = "./" + (Path("data") / file_stem / folder_name).as_posix()

	# 收集五个caption作为答案
	answer = [
	row['caption_1'],
	row['caption_2'],
	row['caption_3'],
	row['caption_4'],
	row['caption_5']
	]

	# 构建JSON条目
	entry = {
	"index": index,
	"media_type": "Video",
	"media_paths": media_path,
	"description": "",
	"task_type": "Vision-Question-Answer",
	"question": ["Please generate descriptive captions for this multi-view video."],
	"question_type": "free-form",
	"annotations": {},
	"options": [],
	"answer": answer,
	"source": "4D-Bench",
	"domain": "Embodied_ai"
	}

	json_data.append(entry)

	# 写入JSON文件
	with open(json_path, 'w', encoding='utf-8') as json_file:
	json.dump(json_data, json_file, indent=2)

	# 使用示例
	if __name__ == "__main__":
	# 用户需要修改以下参数
	INPUT_CSV = "/mnt/data/users/zys/proj/vlm_reasoning/unprocessed_data/emb_ai/4d/4D_Object_Captioning/data/human_annotations.csv" # 输入CSV文件路径
	OUTPUT_JSON = "/mnt/data/users/zys/proj/vlm_reasoning/dataset/4D_Object_Captioning.json" # 输出JSON文件路径
	FILE_STEM = "4D_Object_Captioning" # media_path中的file_stem部分

	csv_to_json(INPUT_CSV, OUTPUT_JSON, FILE_STEM)