Spaces:
Runtime error
Runtime error
import gradio as gr | |
import argparse | |
import datetime | |
import json | |
import os | |
import time | |
import gradio as gr | |
import requests | |
from PIL import Image | |
from q_align.model.builder import load_pretrained_model | |
from q_align.conversation import (default_conversation, conv_templates, | |
SeparatorStyle) | |
from q_align.constants import LOGDIR | |
from q_align.utils import (build_logger, server_error_msg, | |
violates_moderation, moderation_msg) | |
from q_align.evaluate.scorer import QAlignScorer, QAlignAestheticScorer, QAlignVideoScorer | |
import gradio as gr | |
def load_video(video_file): | |
from decord import VideoReader | |
vr = VideoReader(video_file) | |
# Get video frame rate | |
fps = vr.get_avg_fps() | |
# Calculate frame indices for 1fps | |
frame_indices = [int(fps * i) for i in range(int(len(vr) / fps))] | |
frames = vr.get_batch(frame_indices).asnumpy() | |
return [Image.fromarray(frames[i]) for i in range(int(len(vr) / fps))] | |
pretrained="q-future/one-align" | |
device="cuda:0" | |
tokenizer, model, image_processor, _ = load_pretrained_model(pretrained, None, "mplug_owl2", device=device) | |
iqa_scorer = QAlignScorer(tokenizer=tokenizer, model=model, image_processor=image_processor) | |
iaa_scorer = QAlignAestheticScorer(tokenizer=tokenizer, model=model, image_processor=image_processor) | |
vqa_scorer = QAlignVideoScorer(tokenizer=tokenizer, model=model, image_processor=image_processor) | |
scorers = {"Image Aesthetics (IAA)": iaa_scorer, "Image Quality (IQA)": iqa_scorer, "Video Quality (VQA)": vqa_scorer} | |
LEVELS = ["excellent (5)", "good (4)", "fair (3)", "poor (2)", "bad (1)"] | |
scores = [5,4,3,2,1] | |
def image_classifier(input_img, input_vid, scorer_type): | |
if scorer_type is None: | |
scorer_type = "Image Quality (IQA)" | |
this_scorer = scorers[scorer_type] | |
if input_vid is not None: | |
input_ = load_video(input_vid) | |
elif input_img is not None: | |
input_ = [input_img] | |
if "Video" in scorer_type: | |
input_ = [input_] | |
probs = this_scorer(input_).mean(0).tolist() | |
prob_dict = {LEVEL: prob for LEVEL, prob in zip(LEVELS, probs)} | |
score = sum([prob * score for score, prob in zip(scores, probs)]) | |
return prob_dict, score | |
title_markdown = (""" | |
<h3 align="center">Q-Align: Teaching LMMs for Visual Scoring via Discrete Text-Defined Levels</h3> | |
<h3 align="center"> One Unified Model for Visual scoring. </h3> | |
<h5 align="center"> | |
<a href="https://teowu.github.io/" target="_blank">Haoning Wu</a><sup>1</sup><sup>*</sup><sup>+</sup>, | |
<a href="https://github.com/zzc-1998" target="_blank">Zicheng Zhang</a><sup>2</sup><sup>*</sup>, | |
<a href="https://sites.google.com/view/r-panda" target="_blank">Weixia Zhang</a><sup>2</sup>, | |
<a href="https://chaofengc.github.io" target="_blank">Chaofeng Chen</a><sup>1</sup>, | |
<a href="https://liaoliang92.github.io" target="_blank">Liang Liao</a><sup>1</sup>, | |
<a href="https://github.com/lcysyzxdxc" target="_blank">Chunyi Li</a><sup>2</sup>, | |
</h5> | |
<h5 align="center"> | |
<a href="https://github.com/YixuanGao98" target="_blank">Yixuan Gao</a><sup>2</sup>, | |
<a href="https://github.com/AnnanWangDaniel" target="_blank">Annan Wang</a><sup>1</sup>, | |
<a href="https://github.com/ZhangErliCarl/" target="_blank">Erli Zhang</a><sup>1</sup>, | |
<a href="https://wenxiusun.com" target="_blank">Wenxiu Sun</a><sup>3</sup>, | |
<a href="https://scholar.google.com/citations?user=uT9CtPYAAAAJ&hl=en" target="_blank">Qiong Yan</a><sup>3</sup>, | |
<a href="https://sites.google.com/site/minxiongkuo/" target="_blank">Xiongkuo Min</a><sup>2</sup>, | |
<a href="https://ee.sjtu.edu.cn/en/FacultyDetail.aspx?id=24&infoid=153&flag=153" target="_blank">Guangtao Zhai</a><sup>2</sup><sup>#</sup>, | |
<a href="https://personal.ntu.edu.sg/wslin/Home.html" target="_blank">Weisi Lin</a><sup>1</sup><sup>#</sup> | |
</h5> | |
<h5 align="center"> | |
<sup>1</sup>Nanyang Technological University, <sup>2</sup>Shanghai Jiao Tong University, <sup>3</sup>Sensetime Research | |
</h5> | |
<h5 align="center"> | |
<sup>*</sup>Equal contribution. <sup>+</sup>Project Lead. <sup>#</sup>Corresponding author(s). | |
</h5> | |
<h4 align="center"> If you like the OneScorer, please give us a star ✨ on <a href='https://github.com/Q-Future/Q-Align'>GitHub</a> for latest update. </h4> | |
<h5 align="center"> | |
<div style="display:flex; gap: 0.25rem;" align="center"> | |
<a href='https://q-align.github.io'><img src='https://img.shields.io/badge/Homepage-green'></a> | |
<a href='https://github.com/Q-Future/Q-Align'><img src='https://img.shields.io/badge/Github-Code-blue'></a> | |
<a href="https://Q-Future.github.io/Q-Align/fig/Q_Align_v0_1_preview.pdf"><img src="https://img.shields.io/badge/Technical-Report-red"></a> | |
<a href='https://github.com/Q-Future/Q-Align/stargazers'><img src='https://img.shields.io/github/stars/Q-Future/Q-Align.svg?style=social'></a> | |
</div> | |
</h5> | |
""") | |
input_img = gr.Image(type='pil', label="Upload an Image") | |
input_vid = gr.Video(label="Upload a Video (will INGORE the image if a video is uploaded)", info="If a video is uploaded, the image uploaded will be ignored.") | |
labels = gr.Label(label="Probabilities of rating levels:") | |
number = gr.Number(label="Output score:", info="Range in [1,5]. Higher is better.") | |
demo = gr.Interface(fn=image_classifier, inputs=[input_img, input_vid, gr.Radio(["Image Aesthetics (IAA)", "Image Quality (IQA)", "Video Quality (VQA)"], label="Task", info="Which Scorer will you need?"),], outputs=[labels, number], title="OneScorer", description=title_markdown) | |
demo.launch(share=True) |