Spaces:

fffiloni
/

miniGPT4-Video-Zero

Runtime error

App Files Files Community

miniGPT4-Video-Zero / test_benchmark /quantitative_evaluation /benchmark_dataset_generation /generate_temporal_qa.py

fffiloni

Upload 164 files

2ada650 verified 3 months ago

raw

history blame

No virus

6.72 kB

	import openai
	import os
	import argparse
	import warnings
	import json
	import ast
	from multiprocessing.pool import Pool

	warnings.filterwarnings('ignore')


	def parse_args():
	parser = argparse.ArgumentParser(description="question-answer-generation-using-gpt-3")
	parser.add_argument("--gt_caption_folder", required=True, help="The path to captions")
	parser.add_argument("--output_dir", required=True, help="The path to save annotation json files.")
	parser.add_argument("--output_json", required=True, help="The path to save annotation final combined json file.")
	parser.add_argument("--api_key", required=True, help="OpenAI API key.")
	parser.add_argument("--num_tasks", required=True, type=int, help="Number of splits.")
	args = parser.parse_args()
	return args


	def annotate(gt_file, caption_files, output_dir):
	"""
	Generate questions and answers for each caption file using GPT-3.
	"""
	for file in caption_files:
	key = file[:-5] # Strip file extension.
	caption = gt_file[key]
	try:
	# Generate GPT-3 response.
	completion = openai.ChatCompletion.create(
	model="gpt-3.5-turbo",
	messages=[
	{
	"role": "system",
	"content":
	"You play two roles: a human asking questions related to a video and an intelligent chatbot designed to help people find information from a given video. "
	"Your task is to generate a question-answer pair specifically related to temporal understanding from the video content. "
	"Your task is to first play the role of a human who asks a question about the temporal sequence or timing of events in the video and then play the role of an AI assistant that provides information based on the video content."
	"------"
	"##TASK: "
	"Users will provide some information about a video, and you will generate a conversation-like question and answers pair specifically focusing on the temporal sequence of events in the video. "
	"The question should be designed to extract temporal sequence information directly from the given information, so that the provided information or parts of it can serve as the answer. "
	"Generate ONE descriptive and conversational style question and detailed answer based on the given information, specifically related to the temporal understanding in the video."
	"------"
	"##INSTRUCTIONS:"
	"- The question must be like a human conversation and directly related to the temporal sequence of events in the video. "
	"- The question should be designed to extract temporal sequence information DIRECTLY from the given information, so that it or parts of it can serve as the answer. "
	"- The answer must be detailed and descriptive, and should directly reference the information provided with respect to the temporal sequence of events in the video."
	},
	{
	"role": "user",
	"content":
	f"The user input is: {caption}. "
	"Please generate the response in the form of a Python dictionary string with keys 'Q' for question and 'A' for answer. Each corresponding value should be the question and answer text respectively. "
	"For example, your response should look like this: {'Q': 'Your question here...', 'A': 'Your answer here...'}. "
	}
	]
	)
	# Convert response to a Python dictionary.
	response_message = completion["choices"][0]["message"]["content"]
	response_dict = ast.literal_eval(response_message)

	# Save the question-answer pairs to a json file.
	with open(f"{output_dir}/{key}.json", "w") as f:
	json.dump(response_dict, f)
	except Exception as e:
	print(f"Error processing file '{key}': {e}")


	def main():
	"""
	Main function to control the flow of the program.
	"""
	# Parse arguments.
	args = parse_args()

	# Read ground truth captions.
	gt_captions = {}
	gt_files = os.listdir(args.gt_caption_folder)
	for file in gt_files:
	with open(os.path.join(args.gt_caption_folder, file), mode='r', encoding='utf-8-sig') as f:
	caption = f.read().replace('\n', '').replace('‘', "'").replace('’', "'")
	video_id = file[:-4]
	gt_captions[video_id] = caption

	caption_files = [f"{video_id}.json" for video_id in gt_captions.keys()]
	output_dir = args.output_dir
	# Generate output directory if not exists.
	if not os.path.exists(output_dir):
	os.makedirs(output_dir)

	# Set the OpenAI API key.
	openai.api_key = args.api_key
	num_tasks = args.num_tasks

	# While loop to ensure that all captions are processed.
	while True:
	try:
	# Files that have not been processed yet.
	completed_files = os.listdir(output_dir)
	print(f"completed_files: {len(completed_files)}")

	# Files that have not been processed yet.
	incomplete_files = [f for f in caption_files if f not in completed_files]
	print(f"incomplete_files: {len(incomplete_files)}")

	if len(incomplete_files) == 0:
	break
	if len(incomplete_files) <= num_tasks:
	num_tasks = 1

	# Split tasks into parts.
	part_len = len(incomplete_files) // num_tasks
	all_parts = [incomplete_files[i:i + part_len] for i in range(0, len(incomplete_files), part_len)]
	task_args = [(gt_captions, part, args.output_dir) for part in all_parts]

	# Use a pool of workers to process the files in parallel.
	with Pool() as pool:
	pool.starmap(annotate, task_args)

	except Exception as e:
	print(f"Error: {e}")

	# Combine qa pairs into single file when individual qa generation completes
	all_data = {}
	for filename in os.listdir(output_dir):
	if filename.endswith(".json"):
	with open(os.path.join(output_dir, filename)) as f:
	key = filename[:-5]
	all_data[key] = json.load(f)

	with open(args.output_json, 'w') as f:
	json.dump(all_data, f, indent=4)


	if __name__ == "__main__":
	main()