TEOChat / videollava /eval /ben_utils.py
jirvin16's picture
Initial commit
134cb11
import json
import numpy as np
from tqdm import tqdm
from pathlib import Path
from videollava.constants import DEFAULT_IMAGE_TOKEN
from infer_utils import run_inference_single
def run_ben_inference(
model,
dataset_path,
processor,
tokenizer,
conv_mode,
use_video_data=False,
open_prompt=None,
repeat_frames=None,
prompt_strategy="interleave",
chronological_prefix=True,
data_frac=1,
data_size=None,
delete_system_prompt=False,
last_image=False,
start_ind=None,
end_ind=None,
print_prompt=False,
**kwargs
):
for k, v in kwargs.items():
print("WARNING: Unused argument:", k, v)
dataset_path = Path(dataset_path)
data_dir = dataset_path.parent
questions_path = data_dir / dataset_path.name.replace(".json", "_questions.json")
answers_path = data_dir / dataset_path.name.replace(".json", "_answers.json")
images_path = data_dir / dataset_path.name.replace(".json", "_images.json")
with open(questions_path) as json_data:
questionsJSON = json.load(json_data)
with open(answers_path) as json_data:
answersJSON = json.load(json_data)
with open(images_path) as json_data:
imagesJSON = json.load(json_data)
if data_size is not None:
data_size = min(data_size, len(questionsJSON))
idx = np.random.choice(len(questionsJSON), data_size, replace=False)
imagesJSON = [imagesJSON[i] for i in idx]
elif data_frac < 1:
idx = np.random.choice(len(questionsJSON), int(len(questionsJSON) * data_frac), replace=False)
imagesJSON = [imagesJSON[i] for i in idx]
if 'LRBEN' in str(dataset_path):
image_folder = 'Images_LR'
else:
image_folder = 'Data'
# Get the image IDs of test images
images_ids = [img['id'] for img in imagesJSON['images'] if img['active']]
if start_ind is not None and end_ind is not None:
print("Subsetting data from index", start_ind, "to", end_ind)
images_ids = images_ids[start_ind:end_ind]
elif start_ind is not None:
print("Subsetting data from index", start_ind, "to end")
images_ids = images_ids[start_ind:]
elif end_ind is not None:
print("Subsetting data from start to index", end_ind)
images_ids = images_ids[:end_ind]
# Store all predicted answers
answers = {}
# Read image corresponding to each ID and get its associated question and answer
for id in tqdm(images_ids):
image_paths = [str(data_dir / image_folder / (str(id)+'.tif'))]
for questionid in imagesJSON['images'][id]['questions_ids']:
question = questionsJSON['questions'][questionid]
if not question['active']:
continue
inp = question["question"] + " Answer with one word or number."
inp = DEFAULT_IMAGE_TOKEN + '\n' + inp
type_str = question["type"]
answer_str = answersJSON['answers'][question["answers_ids"][0]]['answer']
outputs = run_inference_single(
model=model,
processor=processor,
tokenizer=tokenizer,
conv_mode=conv_mode,
inp=inp,
image_paths=image_paths,
metadata=None,
use_video_data=use_video_data,
repeat_frames=repeat_frames,
prompt_strategy=prompt_strategy,
chronological_prefix=chronological_prefix,
delete_system_prompt=delete_system_prompt,
last_image=last_image,
print_prompt=print_prompt
)
answers[f"{id}_{questionid}"] = {
"predicted": outputs,
"ground_truth": answer_str,
"task": type_str
}
return answers