File size: 1,348 Bytes
a0a5594 87ce8f2 8a770f2 024e8dc 87ce8f2 097ab33 87ce8f2 024e8dc 2d61ab3 024e8dc 8a770f2 3e3aa94 024e8dc 3e3aa94 8a770f2 3e3aa94 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
from typing import Dict, List, Any
import sys
sys.path.append('./')
from videollama2 import model_init, mm_infer
from videollama2.utils import disable_torch_init
import logging
import numpy as np
class EndpointHandler:
def __init__(self, path: str = ""):
"""
Initialize the handler by loading the model and any other necessary components.
Args:
path (str): The path to the model or other necessary files.
"""
disable_torch_init()
self.model_path = 'Aliayub1995/VideoLLaMA2-7B'
self.model, self.processor, self.tokenizer = model_init(self.model_path)
def __call__(self, video_tensor: np.ndarray) -> List[Dict[str, Any]]:
logging.info("Received video tensor") # Debugging: Confirm video tensor received
# Default values
modal = "video"
instruct = "Can you explain each scene and provide the exact time of the video in which it happened in this format [start_time: end_time]: Description, [start_time: end_time]: Description ..."
# Perform inference
output = mm_infer(
self.processor[modal](video_tensor),
instruct,
model=self.model,
tokenizer=self.tokenizer,
do_sample=False,
modal=modal
)
return [{"output": output}]
|