Spaces:

jirvin16
/

TEOChat

Runtime error

File size: 3,914 Bytes

134cb11

import json
import numpy as np
from tqdm import tqdm
from pathlib import Path

from videollava.constants import DEFAULT_IMAGE_TOKEN

from infer_utils import run_inference_single


def run_ben_inference(
        model,
        dataset_path,
        processor,
        tokenizer,
        conv_mode,
        use_video_data=False,
        open_prompt=None,
        repeat_frames=None,
        prompt_strategy="interleave",
        chronological_prefix=True,
        data_frac=1,
        data_size=None,
	    delete_system_prompt=False,
        last_image=False,
        start_ind=None,
        end_ind=None,
        print_prompt=False,
        **kwargs
    ):
    for k, v in kwargs.items():
        print("WARNING: Unused argument:", k, v)

    dataset_path = Path(dataset_path)
    data_dir = dataset_path.parent
    questions_path = data_dir / dataset_path.name.replace(".json", "_questions.json")
    answers_path = data_dir / dataset_path.name.replace(".json", "_answers.json")
    images_path = data_dir / dataset_path.name.replace(".json", "_images.json")

    with open(questions_path) as json_data:
        questionsJSON = json.load(json_data)

    with open(answers_path) as json_data:
        answersJSON = json.load(json_data)

    with open(images_path) as json_data:
        imagesJSON = json.load(json_data)

    if data_size is not None:
        data_size = min(data_size, len(questionsJSON))
        idx = np.random.choice(len(questionsJSON), data_size, replace=False)
        imagesJSON = [imagesJSON[i] for i in idx]
    elif data_frac < 1:
        idx = np.random.choice(len(questionsJSON), int(len(questionsJSON) * data_frac), replace=False)
        imagesJSON = [imagesJSON[i] for i in idx]

    if 'LRBEN' in str(dataset_path):
        image_folder = 'Images_LR'
    else:
        image_folder = 'Data'

    # Get the image IDs of test images
    images_ids = [img['id'] for img in imagesJSON['images'] if img['active']]

    if start_ind is not None and end_ind is not None:
        print("Subsetting data from index", start_ind, "to", end_ind)
        images_ids = images_ids[start_ind:end_ind]
    elif start_ind is not None:
        print("Subsetting data from index", start_ind, "to end")
        images_ids = images_ids[start_ind:]
    elif end_ind is not None:
        print("Subsetting data from start to index", end_ind)
        images_ids = images_ids[:end_ind]

    # Store all predicted answers
    answers = {}
    # Read image corresponding to each ID and get its associated question and answer
    for id in tqdm(images_ids):

        image_paths = [str(data_dir / image_folder / (str(id)+'.tif'))]

        for questionid in imagesJSON['images'][id]['questions_ids']:
            question = questionsJSON['questions'][questionid]
            if not question['active']:
                continue
            inp = question["question"] + " Answer with one word or number."
            inp = DEFAULT_IMAGE_TOKEN + '\n' + inp
            type_str = question["type"]
            answer_str = answersJSON['answers'][question["answers_ids"][0]]['answer']

            outputs = run_inference_single(
                model=model,
                processor=processor,
                tokenizer=tokenizer,
                conv_mode=conv_mode,
                inp=inp,
                image_paths=image_paths,
                metadata=None,
                use_video_data=use_video_data,
                repeat_frames=repeat_frames,
                prompt_strategy=prompt_strategy,
                chronological_prefix=chronological_prefix,
                delete_system_prompt=delete_system_prompt,
                last_image=last_image,
                print_prompt=print_prompt
            )

            answers[f"{id}_{questionid}"] = {
                "predicted": outputs,
                "ground_truth": answer_str,
                "task": type_str
            }

    return answers