import gradio as gr import argparse import datetime import json import os import time import gradio as gr import requests from PIL import Image from q_align.model.builder import load_pretrained_model from q_align.conversation import (default_conversation, conv_templates, SeparatorStyle) from q_align.constants import LOGDIR from q_align.utils import (build_logger, server_error_msg, violates_moderation, moderation_msg) from q_align.evaluate.scorer import QAlignScorer, QAlignAestheticScorer, QAlignVideoScorer import gradio as gr def load_video(video_file): from decord import VideoReader vr = VideoReader(video_file) # Get video frame rate fps = vr.get_avg_fps() # Calculate frame indices for 1fps frame_indices = [int(fps * i) for i in range(int(len(vr) / fps))] frames = vr.get_batch(frame_indices).asnumpy() return [Image.fromarray(frames[i]) for i in range(int(len(vr) / fps))] pretrained="q-future/one-align" device="cuda:0" tokenizer, model, image_processor, _ = load_pretrained_model(pretrained, None, "mplug_owl2", device=device) iqa_scorer = QAlignScorer(tokenizer=tokenizer, model=model, image_processor=image_processor) iaa_scorer = QAlignAestheticScorer(tokenizer=tokenizer, model=model, image_processor=image_processor) vqa_scorer = QAlignVideoScorer(tokenizer=tokenizer, model=model, image_processor=image_processor) scorers = {"Image Aesthetics (IAA)": iaa_scorer, "Image Quality (IQA)": iqa_scorer, "Video Quality (VQA)": vqa_scorer} LEVELS = ["excellent (5)", "good (4)", "fair (3)", "poor (2)", "bad (1)"] scores = [5,4,3,2,1] def image_classifier(input_img, input_vid, scorer_type): if scorer_type is None: scorer_type = "Image Quality (IQA)" this_scorer = scorers[scorer_type] if input_vid is not None: input_ = load_video(input_vid) elif input_img is not None: input_ = [input_img] if "Video" in scorer_type: input_ = [input_] probs = this_scorer(input_).mean(0).tolist() prob_dict = {LEVEL: prob for LEVEL, prob in zip(LEVELS, probs)} score = sum([prob * score for score, prob in zip(scores, probs)]) return prob_dict, score title_markdown = ("""