AR_Testing / app.py
XiaoyiYangRIT
update some files
78b768f
# app.py
import gradio as gr
from src.model_loader import load_model
from src.video_utils import process_video_for_internvl3
from src.ar_prompts import generate_conversation_questions
tokenizer, model = load_model()
def evaluate_ar_multi_turn(video):
pixel_values, num_patches_list, image_prefix = process_video_for_internvl3(video)
conversation = generate_conversation_questions(include_descriptions=True)
history = None
visible_outputs = []
for i, question in enumerate(conversation):
prompt = image_prefix + question if i == 0 else question
output, history = model.chat(
tokenizer,
pixel_values,
prompt,
generation_config={"max_new_tokens": 1024},
num_patches_list=num_patches_list,
history=history,
return_history=True
)
# 仅保留评测和拓展部分的回答(即从第3轮开始)
if i >= 2:
visible_outputs.append(output)
# 多个输出拼接成文本显示
return "\n\n".join(visible_outputs)
gr.Interface(
fn=evaluate_ar_multi_turn,
inputs=gr.Video(label="Upload your AR video"),
outputs="text",
title="InternVL3 AR Evaluation (Multi-turn)",
description="Upload a short AR video clip. The model will sample frames and conduct a multi-turn dialogue to assess occlusion/rendering/placement/lighting."
).launch()