Spaces:

ardha27
/

VideoAnalyzer

Runtime error

App Files Files Community

VideoAnalyzer / app.py

Zeph27

init

e95a3a8 6 months ago

raw

history blame

2.65 kB

	import gradio as gr
	from transformers import AutoModel, AutoTokenizer
	import torch
	from decord import VideoReader, cpu
	import os
	import spaces

	# Load the model and tokenizer
	model_name = "openbmb/MiniCPM-V-2_6-int4"
	tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
	model = AutoModel.from_pretrained(model_name, trust_remote_code=True, device_map="auto")
	model.eval()

	MAX_NUM_FRAMES = 64
	VIDEO_EXTENSIONS = {'.mp4', '.mkv', '.mov', '.avi', '.flv', '.wmv', '.webm', '.m4v'}

	def get_file_extension(filename):
	return os.path.splitext(filename)[1].lower()

	def is_video(filename):
	return get_file_extension(filename) in VIDEO_EXTENSIONS

	def encode_video(video):
	def uniform_sample(l, n):
	gap = len(l) / n
	idxs = [int(i * gap + gap / 2) for i in range(n)]
	return [l[i] for i in idxs]

	if hasattr(video, 'path'):
	video_path = video.path
	else:
	video_path = video.file.path

	vr = VideoReader(video_path, ctx=cpu(0))
	total_frames = len(vr)
	if total_frames <= MAX_NUM_FRAMES:
	frame_idxs = list(range(total_frames))
	else:
	frame_idxs = uniform_sample(range(total_frames), MAX_NUM_FRAMES)

	frames = vr.get_batch(frame_idxs).asnumpy()
	return frames

	@spaces.GPU
	def analyze_video(video, prompt):
	if not is_video(video.name):
	return "Please upload a valid video file."

	frames = encode_video(video)

	# Prepare the frames for the model
	inputs = model.vpm(frames)

	# Generate the caption with the user's prompt
	with torch.no_grad():
	outputs = model.generate(inputs=inputs, tokenizer=tokenizer, max_new_tokens=50, prompt=prompt)

	# Decode the output
	caption = tokenizer.decode(outputs[0], skip_special_tokens=True)

	return caption

	# Create the Gradio interface using Blocks
	with gr.Blocks(title="Video Analyzer using MiniCPM-V-2.6-int4") as iface:
	gr.Markdown("# Video Analyzer using MiniCPM-V-2.6-int4")
	gr.Markdown("Upload a video to get an analysis using the MiniCPM-V-2.6-int4 model.")
	gr.Markdown("This model uses 4-bit quantization for improved efficiency. [Learn more](https://huggingface.co/openbmb/MiniCPM-V-2_6-int4)")

	with gr.Row():
	video_input = gr.Video()
	prompt_input = gr.Textbox(label="Prompt (optional)", placeholder="Enter a prompt to guide the analysis...")
	analysis_output = gr.Textbox(label="Video Analysis")

	analyze_button = gr.Button("Analyze Video")
	analyze_button.click(fn=analyze_video, inputs=[video_input, prompt_input], outputs=analysis_output)

	# Launch the interface
	iface.launch()