Spaces:
Sleeping
Sleeping
File size: 1,416 Bytes
78b768f 741cc94 7aa5317 78b768f 741cc94 7aa5317 aed9794 78b768f 5e97fbe 78b768f 741cc94 3b1917e 78b768f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
# app.py
import gradio as gr
from src.model_loader import load_model
from src.video_utils import process_video_for_internvl3
from src.ar_prompts import generate_conversation_questions
tokenizer, model = load_model()
def evaluate_ar_multi_turn(video):
pixel_values, num_patches_list, image_prefix = process_video_for_internvl3(video)
conversation = generate_conversation_questions(include_descriptions=True)
history = None
visible_outputs = []
for i, question in enumerate(conversation):
prompt = image_prefix + question if i == 0 else question
output, history = model.chat(
tokenizer,
pixel_values,
prompt,
generation_config={"max_new_tokens": 1024},
num_patches_list=num_patches_list,
history=history,
return_history=True
)
# 仅保留评测和拓展部分的回答(即从第3轮开始)
if i >= 2:
visible_outputs.append(output)
# 多个输出拼接成文本显示
return "\n\n".join(visible_outputs)
gr.Interface(
fn=evaluate_ar_multi_turn,
inputs=gr.Video(label="Upload your AR video"),
outputs="text",
title="InternVL3 AR Evaluation (Multi-turn)",
description="Upload a short AR video clip. The model will sample frames and conduct a multi-turn dialogue to assess occlusion/rendering/placement/lighting."
).launch()
|