Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import AutoModel, AutoTokenizer | |
import torch | |
from decord import VideoReader, cpu | |
import os | |
import spaces | |
# Load the model and tokenizer | |
model_name = "openbmb/MiniCPM-V-2_6-int4" | |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) | |
model = AutoModel.from_pretrained(model_name, trust_remote_code=True, device_map="auto") | |
model.eval() | |
MAX_NUM_FRAMES = 64 | |
VIDEO_EXTENSIONS = {'.mp4', '.mkv', '.mov', '.avi', '.flv', '.wmv', '.webm', '.m4v'} | |
def get_file_extension(filename): | |
return os.path.splitext(filename)[1].lower() | |
def is_video(filename): | |
return get_file_extension(filename) in VIDEO_EXTENSIONS | |
def encode_video(video): | |
def uniform_sample(l, n): | |
gap = len(l) / n | |
idxs = [int(i * gap + gap / 2) for i in range(n)] | |
return [l[i] for i in idxs] | |
if hasattr(video, 'path'): | |
video_path = video.path | |
else: | |
video_path = video.file.path | |
vr = VideoReader(video_path, ctx=cpu(0)) | |
total_frames = len(vr) | |
if total_frames <= MAX_NUM_FRAMES: | |
frame_idxs = list(range(total_frames)) | |
else: | |
frame_idxs = uniform_sample(range(total_frames), MAX_NUM_FRAMES) | |
frames = vr.get_batch(frame_idxs).asnumpy() | |
return frames | |
def analyze_video(video, prompt): | |
if not is_video(video.name): | |
return "Please upload a valid video file." | |
frames = encode_video(video) | |
# Prepare the frames for the model | |
inputs = model.vpm(frames) | |
# Generate the caption with the user's prompt | |
with torch.no_grad(): | |
outputs = model.generate(inputs=inputs, tokenizer=tokenizer, max_new_tokens=50, prompt=prompt) | |
# Decode the output | |
caption = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
return caption | |
# Create the Gradio interface using Blocks | |
with gr.Blocks(title="Video Analyzer using MiniCPM-V-2.6-int4") as iface: | |
gr.Markdown("# Video Analyzer using MiniCPM-V-2.6-int4") | |
gr.Markdown("Upload a video to get an analysis using the MiniCPM-V-2.6-int4 model.") | |
gr.Markdown("This model uses 4-bit quantization for improved efficiency. [Learn more](https://huggingface.co/openbmb/MiniCPM-V-2_6-int4)") | |
with gr.Row(): | |
video_input = gr.Video() | |
prompt_input = gr.Textbox(label="Prompt (optional)", placeholder="Enter a prompt to guide the analysis...") | |
analysis_output = gr.Textbox(label="Video Analysis") | |
analyze_button = gr.Button("Analyze Video") | |
analyze_button.click(fn=analyze_video, inputs=[video_input, prompt_input], outputs=analysis_output) | |
# Launch the interface | |
iface.launch() | |