|
|
import datetime |
|
|
import json |
|
|
import os |
|
|
import random |
|
|
import sys |
|
|
from pathlib import Path |
|
|
|
|
|
import numpy as np |
|
|
import yaml |
|
|
from decord import VideoReader, cpu |
|
|
from loguru import logger as eval_logger |
|
|
|
|
|
import lmms_eval.tasks._task_utils.file_utils as file_utils |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
hf_home = os.getenv("HF_HOME", "~/.cache/huggingface/") |
|
|
|
|
|
|
|
|
base_cache_dir = os.path.expanduser(hf_home) |
|
|
with open(Path(__file__).parent / "charades.yaml", "r") as f: |
|
|
raw_data = f.readlines() |
|
|
safe_data = [] |
|
|
for i, line in enumerate(raw_data): |
|
|
|
|
|
if "!function" not in line: |
|
|
safe_data.append(line) |
|
|
|
|
|
cache_name = yaml.safe_load("".join(safe_data))["dataset_kwargs"]["cache_dir"] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def temporal_grounding_doc_to_visual(doc, lmms_eval_specific_kwargs=None): |
|
|
video_path = doc["video"] |
|
|
cache_dir = os.path.join(base_cache_dir, cache_name) |
|
|
video_path = os.path.join(cache_dir, "Charades_v1_480", video_path) |
|
|
if os.path.exists(video_path): |
|
|
video_path = video_path |
|
|
elif "s3://" not in video_path: |
|
|
sys.exit(f"video path:{video_path} does not exist, please check") |
|
|
|
|
|
return [video_path] |
|
|
|
|
|
|
|
|
|
|
|
def temporal_grounding_doc_to_text(doc, lmms_eval_specific_kwargs=None): |
|
|
if lmms_eval_specific_kwargs is None: |
|
|
lmms_eval_specific_kwargs = {} |
|
|
|
|
|
if "pre_prompt" in lmms_eval_specific_kwargs: |
|
|
pre_prompt = lmms_eval_specific_kwargs["pre_prompt"] |
|
|
if "post_prompt" in lmms_eval_specific_kwargs: |
|
|
post_prompt = lmms_eval_specific_kwargs["post_prompt"] |
|
|
|
|
|
question = doc["caption"] |
|
|
|
|
|
return f"{pre_prompt}{question}. {post_prompt}" |
|
|
|
|
|
|
|
|
def temporal_grounding_doc_to_answer(doc): |
|
|
return doc["timestamp"] |
|
|
|
|
|
|
|
|
|
|
|
def temporal_grounding_process_results_generation(doc, result): |
|
|
pred = result[0] |
|
|
return {"submission": {f'{doc["video"]}>>>{doc["caption"]}>>>{doc["timestamp"]}': pred}} |
|
|
|
|
|
|
|
|
def temporal_grounding_aggregate_charades(results, args): |
|
|
temporal_grounding_aggregate_submissions(results, args, "charades") |
|
|
|
|
|
|
|
|
def temporal_grounding_aggregate_submissions(results, args, task): |
|
|
now_date_time = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S") |
|
|
submission_file_name = f"inference_results_temporal_grounding_{task}_{now_date_time}.json" |
|
|
path = file_utils.generate_submission_file(submission_file_name, args) |
|
|
|
|
|
|
|
|
|
|
|
combined_submission = {} |
|
|
|
|
|
for submission_dict in results: |
|
|
combined_submission.update(submission_dict) |
|
|
|
|
|
with open(path, "w") as f: |
|
|
json.dump(combined_submission, f, indent=4) |
|
|
|
|
|
eval_logger.info(f"Submission file saved to {path}") |
|
|
|