Spaces:

q-future
/

OneAlign

Runtime error

OneAlign / app.py

haoning.wu

update format

bba21a6 about 1 year ago

5.6 kB

	import gradio as gr

	import argparse
	import datetime
	import json
	import os
	import time

	import gradio as gr
	import requests
	from PIL import Image

	from q_align.model.builder import load_pretrained_model

	from q_align.conversation import (default_conversation, conv_templates,
	SeparatorStyle)
	from q_align.constants import LOGDIR
	from q_align.utils import (build_logger, server_error_msg,
	violates_moderation, moderation_msg)

	from q_align.evaluate.scorer import QAlignScorer, QAlignAestheticScorer, QAlignVideoScorer

	import gradio as gr

	def load_video(video_file):
	from decord import VideoReader
	vr = VideoReader(video_file)

	# Get video frame rate
	fps = vr.get_avg_fps()

	# Calculate frame indices for 1fps
	frame_indices = [int(fps * i) for i in range(int(len(vr) / fps))]
	frames = vr.get_batch(frame_indices).asnumpy()
	return [Image.fromarray(frames[i]) for i in range(int(len(vr) / fps))]


	pretrained="q-future/one-align"
	device="cuda:0"
	tokenizer, model, image_processor, _ = load_pretrained_model(pretrained, None, "mplug_owl2", device=device)

	iqa_scorer = QAlignScorer(tokenizer=tokenizer, model=model, image_processor=image_processor)
	iaa_scorer = QAlignAestheticScorer(tokenizer=tokenizer, model=model, image_processor=image_processor)
	vqa_scorer = QAlignVideoScorer(tokenizer=tokenizer, model=model, image_processor=image_processor)

	scorers = {"Image Aesthetics (IAA)": iaa_scorer, "Image Quality (IQA)": iqa_scorer, "Video Quality (VQA)": vqa_scorer}

	LEVELS = ["excellent (5)", "good (4)", "fair (3)", "poor (2)", "bad (1)"]
	scores = [5,4,3,2,1]
	def image_classifier(input_img, input_vid, scorer_type):
	if scorer_type is None:
	scorer_type = "Image Quality (IQA)"
	this_scorer = scorers[scorer_type]
	if input_vid is not None:
	input_ = load_video(input_vid)
	elif input_img is not None:
	input_ = [input_img]
	if "Video" in scorer_type:
	input_ = [input_]
	probs = this_scorer(input_).mean(0).tolist()
	prob_dict = {LEVEL: prob for LEVEL, prob in zip(LEVELS, probs)}
	score = sum([prob * score for score, prob in zip(scores, probs)])
	return prob_dict, score

	title_markdown = ("""

	<h3 align="center">Q-Align: Teaching LMMs for Visual Scoring via Discrete Text-Defined Levels</h3>

	<h3 align="center"> One Unified Model for Visual scoring. </h3>

	<h5 align="center">
	<a href="https://teowu.github.io/" target="_blank">Haoning Wu</a><sup>1</sup><sup>*</sup><sup>+</sup>,
	<a href="https://github.com/zzc-1998" target="_blank">Zicheng Zhang</a><sup>2</sup><sup>*</sup>,
	<a href="https://sites.google.com/view/r-panda" target="_blank">Weixia Zhang</a><sup>2</sup>,
	<a href="https://chaofengc.github.io" target="_blank">Chaofeng Chen</a><sup>1</sup>,
	<a href="https://liaoliang92.github.io" target="_blank">Liang Liao</a><sup>1</sup>,
	<a href="https://github.com/lcysyzxdxc" target="_blank">Chunyi Li</a><sup>2</sup>,
	</h5>


	<h5 align="center">
	<a href="https://github.com/YixuanGao98" target="_blank">Yixuan Gao</a><sup>2</sup>,
	<a href="https://github.com/AnnanWangDaniel" target="_blank">Annan Wang</a><sup>1</sup>,
	<a href="https://github.com/ZhangErliCarl/" target="_blank">Erli Zhang</a><sup>1</sup>,
	<a href="https://wenxiusun.com" target="_blank">Wenxiu Sun</a><sup>3</sup>,
	<a href="https://scholar.google.com/citations?user=uT9CtPYAAAAJ&hl=en" target="_blank">Qiong Yan</a><sup>3</sup>,
	<a href="https://sites.google.com/site/minxiongkuo/" target="_blank">Xiongkuo Min</a><sup>2</sup>,
	<a href="https://ee.sjtu.edu.cn/en/FacultyDetail.aspx?id=24&infoid=153&flag=153" target="_blank">Guangtao Zhai</a><sup>2</sup><sup>#</sup>,
	<a href="https://personal.ntu.edu.sg/wslin/Home.html" target="_blank">Weisi Lin</a><sup>1</sup><sup>#</sup>
	</h5>

	<h5 align="center">
	<sup>1</sup>Nanyang Technological University, <sup>2</sup>Shanghai Jiao Tong University, <sup>3</sup>Sensetime Research
	</h5>
	<h5 align="center">
	<sup>*</sup>Equal contribution. <sup>+</sup>Project Lead. <sup>#</sup>Corresponding author(s).
	</h5>

	<h4 align="center"> If you like the OneScorer, please give us a star ✨ on <a href='https://github.com/Q-Future/Q-Align'>GitHub</a> for latest update. </h4>

	<h5 align="center">
	<div style="display:flex; gap: 0.25rem;" align="center">
	<a href='https://q-align.github.io'><img src='https://img.shields.io/badge/Homepage-green'></a>
	<a href='https://github.com/Q-Future/Q-Align'><img src='https://img.shields.io/badge/Github-Code-blue'></a>
	<a href="https://Q-Future.github.io/Q-Align/fig/Q_Align_v0_1_preview.pdf"><img src="https://img.shields.io/badge/Technical-Report-red"></a>
	<a href='https://github.com/Q-Future/Q-Align/stargazers'><img src='https://img.shields.io/github/stars/Q-Future/Q-Align.svg?style=social'></a>
	</div>
	</h5>

	""")


	input_img = gr.Image(type='pil', label="Upload an Image")
	input_vid = gr.Video(label="Upload a Video (will INGORE the image if a video is uploaded)", info="If a video is uploaded, the image uploaded will be ignored.")

	labels = gr.Label(label="Probabilities of rating levels:")
	number = gr.Number(label="Output score:", info="Range in [1,5]. Higher is better.")
	demo = gr.Interface(fn=image_classifier, inputs=[input_img, input_vid, gr.Radio(["Image Aesthetics (IAA)", "Image Quality (IQA)", "Video Quality (VQA)"], label="Task", info="Which Scorer will you need?"),], outputs=[labels, number], title="OneScorer", description=title_markdown)
	demo.launch(share=True)