HPSv3

Runtime error

App Files Files Community

HPSv3 / app.py

sdsdgwe

update

aef5fb0 6 months ago

raw

history blame contribute delete

21.1 kB

	import gradio as gr
	import torch
	import os
	import sys
	from PIL import Image
	import uuid
	import huggingface_hub
	sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

	from hpsv3.inference import HPSv3RewardInferencer
	try:
	import ImageReward as RM
	from hpsv2.src.open_clip import create_model_and_transforms, get_tokenizer
	except:
	RM = None
	create_model_and_transforms = None
	get_tokenizer = None
	print("ImageReward or HPSv2 dependencies not found. Skipping those models.")

	from transformers import AutoProcessor, AutoModel

	# --- Configuration ---
	DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
	DTYPE = torch.bfloat16 if DEVICE == 'cuda' else torch.float32

	# --- Model Configuration ---
	MODEL_CONFIGS = {
	"HPSv3_7B": {
	"name": "HPSv3 7B",
	"type": "hpsv3"
	},
	"HPSv2": {
	"name": "HPSv2",
	"checkpoint_path": "xswu/HPSv2/HPS_v2.1_compressed.pt",
	"type": "hpsv2"
	},
	"ImageReward": {
	"name": "ImageReward v1.0",
	"checkpoint_path": "ImageReward-v1.0",
	"type": "imagereward"
	},
	"PickScore": {
	"name": "PickScore",
	"checkpoint_path": "yuvalkirstain/PickScore_v1",
	"type": "pickscore"
	},
	"CLIP": {
	"name": "CLIP ViT-H-14",
	"checkpoint_path": "laion/CLIP-ViT-H-14-laion2B-s32B-b79K",
	"type": "clip"
	}
	}

	# --- Global Model Storage ---
	current_models = {}
	current_model_name = None

	# --- Dynamic Model Loading Functions ---
	def load_model(model_key, update_status_fn=None):
	"""Load the specified model based on the model key."""
	global current_models, current_model_name

	if model_key == current_model_name and model_key in current_models:
	return current_models[model_key]

	if update_status_fn:
	update_status_fn(f"🔄 Loading {MODEL_CONFIGS[model_key]['name']}...")

	# Clear previous models to save memory
	current_models.clear()
	torch.cuda.empty_cache()

	config = MODEL_CONFIGS[model_key]

	try:
	if config["type"] == "hpsv3":
	checkpoint_path = huggingface_hub.hf_hub_download("MizzenAI/HPSv3", 'HPSv3.safetensors', repo_type='model')
	model = HPSv3RewardInferencer(
	device=DEVICE,
	checkpoint_path=checkpoint_path
	)
	elif config["type"] == "hpsv2":
	model_obj, preprocess_train, preprocess_val = create_model_and_transforms(
	'ViT-H-14',
	'laion2B-s32B-b79K',
	precision='amp',
	device=DEVICE,
	jit=False,
	force_quick_gelu=False,
	force_custom_text=False,
	force_patch_dropout=False,
	force_image_size=None,
	pretrained_image=False,
	image_mean=None,
	image_std=None,
	light_augmentation=True,
	aug_cfg={},
	output_dict=True,
	with_score_predictor=False,
	with_region_predictor=False
	)
	checkpoint_path = huggingface_hub.hf_hub_download("xswu/HPSv2", 'HPS_v2.1_compressed.pt', repo_type='model')
	checkpoint = torch.load(checkpoint_path, map_location=DEVICE, weights_only=False)
	model_obj.load_state_dict(checkpoint['state_dict'])
	model_obj = model_obj.to(DEVICE).eval()
	tokenizer = get_tokenizer('ViT-H-14')
	model = {"model": model_obj, "preprocess_val": preprocess_val, "tokenizer": tokenizer}
	elif config["type"] == "imagereward":
	model = RM.load(config["checkpoint_path"])
	elif config["type"] == "pickscore":
	processor = AutoProcessor.from_pretrained('/preflab/models/CLIP-ViT-H-14-laion2B-s32B-b79K')
	model_obj = AutoModel.from_pretrained(config["checkpoint_path"]).eval().to(DEVICE)
	model = {"model": model_obj, "processor": processor}
	elif config["type"] == "clip":
	model_obj = AutoModel.from_pretrained(config["checkpoint_path"]).to(DEVICE)
	processor = AutoProcessor.from_pretrained(config["checkpoint_path"])
	model = {"model": model_obj, "processor": processor}
	else:
	raise ValueError(f"Unknown model type: {config['type']}")

	current_models[model_key] = model
	current_model_name = model_key

	if update_status_fn:
	update_status_fn(f"✅ {MODEL_CONFIGS[model_key]['name']} loaded successfully!")

	return model
	except Exception as e:
	error_msg = f"Error loading model {model_key}: {e}"
	print(error_msg)
	if update_status_fn:
	update_status_fn(f"❌ {error_msg}")
	return None

	def score_with_model(model_key, image_paths, prompts):
	"""Score images using the specified model."""
	model = load_model(model_key)
	if model is None:
	raise ValueError(f"Failed to load model {model_key}")

	config = MODEL_CONFIGS[model_key]

	if config["type"] == "hpsv3":
	rewards = model.reward(image_paths, prompts)
	return [reward[0].item() for reward in rewards] # HPSv3 returns tensor with multiple values, take first
	elif config["type"] == "hpsv2":
	return score_hpsv2_batch(model, image_paths, prompts)
	elif config["type"] == "imagereward":
	return [model.score(prompt, image_path) for prompt, image_path in zip(prompts, image_paths)]
	elif config["type"] == "pickscore":
	return score_pickscore_batch(prompts, image_paths, model["model"], model["processor"])
	elif config["type"] == "clip":
	return score_clip_batch(model["model"], model["processor"], image_paths, prompts)
	else:
	raise ValueError(f"Unknown model type: {config['type']}")

	def score_hpsv2_batch(model_dict, image_paths, prompts):
	"""Score using HPSv2 model."""
	model = model_dict['model']
	preprocess_val = model_dict['preprocess_val']
	tokenizer = model_dict['tokenizer']

	# 批量处理图片
	images = [preprocess_val(Image.open(p)).unsqueeze(0)[:,:3,:,:] for p in image_paths]
	images = torch.cat(images, dim=0).to(device=DEVICE)
	texts = tokenizer(prompts).to(device=DEVICE)
	with torch.no_grad():
	outputs = model(images, texts)
	image_features, text_features = outputs["image_features"], outputs["text_features"]
	logits_per_image = image_features @ text_features.T
	hps_scores = torch.diagonal(logits_per_image).cpu()
	return [score.item() for score in hps_scores]

	def score_pickscore_batch(prompts, image_paths, model, processor):
	"""Score using PickScore model."""
	pil_images = [Image.open(p) for p in image_paths]
	image_inputs = processor(
	images=pil_images,
	padding=True,
	truncation=True,
	max_length=77,
	return_tensors="pt",
	).to(DEVICE)

	text_inputs = processor(
	text=prompts,
	padding=True,
	truncation=True,
	max_length=77,
	return_tensors="pt",
	).to(DEVICE)

	with torch.no_grad():
	image_embs = model.get_image_features(**image_inputs)
	image_embs = image_embs / torch.norm(image_embs, dim=-1, keepdim=True)
	text_embs = model.get_text_features(**text_inputs)
	text_embs = text_embs / torch.norm(text_embs, dim=-1, keepdim=True)
	scores = model.logit_scale.exp() * (text_embs @ image_embs.T)
	return [scores[i, i].cpu().item() for i in range(len(prompts))]

	def score_clip_batch(model, processor, image_paths, prompts):
	"""Score using CLIP model."""
	pil_images = [Image.open(p) for p in image_paths]
	image_inputs = processor(
	images=pil_images,
	padding=True,
	truncation=True,
	max_length=77,
	return_tensors="pt",
	).to(DEVICE)

	text_inputs = processor(
	text=prompts,
	padding=True,
	truncation=True,
	max_length=77,
	return_tensors="pt",
	).to(DEVICE)

	with torch.no_grad():
	image_embs = model.get_image_features(**image_inputs)
	image_embs = image_embs / torch.norm(image_embs, dim=-1, keepdim=True)
	text_embs = model.get_text_features(**text_inputs)
	text_embs = text_embs / torch.norm(text_embs, dim=-1, keepdim=True)
	scores = image_embs @ text_embs.T
	return [scores[i, i].cpu().item() for i in range(len(prompts))]

	# Load default model
	print("Loading default HPSv3 model...")
	load_model("HPSv3_7B")
	print("Model loaded successfully.")

	# --- Helper Functions ---
	def get_score_interpretation(score):
	"""Returns a color-coded qualitative interpretation of the score."""
	if score is None:
	return ""

	if score < 0:
	color = "#ef4444" # Modern red
	bg_color = "rgba(239, 68, 68, 0.1)"
	icon = "❌"
	feedback = "Poor Quality"
	comment = "The image has significant quality issues or doesn't match the prompt well."
	elif score < 5:
	color = "#f59e0b" # Modern amber
	bg_color = "rgba(245, 158, 11, 0.1)"
	icon = "⚠️"
	feedback = "Needs Improvement"
	comment = "The image is acceptable but could be enhanced in quality or prompt alignment."
	elif score < 10:
	color = "#10b981" # Modern emerald
	bg_color = "rgba(16, 185, 129, 0.1)"
	icon = "✅"
	feedback = "Good Quality"
	comment = "A well-crafted image that aligns nicely with the given prompt."
	else: # score >= 10
	color = "#06d6a0" # Vibrant teal
	bg_color = "rgba(6, 214, 160, 0.1)"
	icon = "⭐"
	feedback = "Excellent!"
	comment = "Outstanding quality and perfect alignment with the prompt."

	return f"""
	<div style='
	background: {bg_color};
	border: 2px solid {color};
	border-radius: 16px;
	padding: 20px;
	text-align: center;
	margin: 10px 0;
	'>
	<div style='font-size: 2rem; margin-bottom: 8px;'>{icon}</div>
	<h3 style='color: {color}; font-size: 1.4rem; font-weight: 700; margin: 8px 0;'>{feedback}</h3>
	<p style='color: #666; font-size: 0.95rem; margin: 0; line-height: 1.4;'>{comment}</p>
	</div>
	"""

	# --- Model Change Handler ---
	def handle_model_change(model_key):
	"""Handle model selection change."""
	global current_model_name

	if model_key != current_model_name:
	# Show loading status
	yield f"🔄 Loading {MODEL_CONFIGS[model_key]['name']}..."

	# Load the new model
	model = load_model(model_key)

	if model is not None:
	yield f"✅ Current model: {MODEL_CONFIGS[model_key]['name']}"
	else:
	yield f"❌ Failed to load {MODEL_CONFIGS[model_key]['name']}"
	else:
	yield f"✅ Current model: {MODEL_CONFIGS[model_key]['name']}"

	# --- Prediction Function ---
	def predict_score(image, prompt, model_name):
	"""Takes Gradio inputs and returns the score, interpretation, and status."""
	if image is None:
	return None, "", "❌ Error: Please upload an image."
	if not prompt or not prompt.strip():
	return None, "", "❌ Error: Please enter a prompt."

	temp_dir = "temp_images_for_gradio"
	os.makedirs(temp_dir, exist_ok=True)
	temp_path = os.path.join(temp_dir, f"{uuid.uuid4()}.png")

	try:
	Image.fromarray(image).save(temp_path)
	scores = score_with_model(model_name, [temp_path], [prompt])
	score = round(scores[0], 4)
	interpretation = get_score_interpretation(score)
	return score, interpretation, "✅ Analysis completed successfully!"
	except Exception as e:
	print(f"An error occurred during inference: {e}")
	return None, "", f"❌ Processing error: {e}"
	finally:
	if os.path.exists(temp_path):
	os.remove(temp_path)

	# --- Image Comparison Function ---
	def compare_images(image1, image2, prompt, model_name):
	"""Compare two images and determine which one is better based on the prompt."""
	if image1 is None or image2 is None:
	return None, None, "", "❌ Error: Please upload both images."
	if not prompt or not prompt.strip():
	return None, None, "", "❌ Error: Please enter a prompt."

	temp_dir = "temp_images_for_gradio"
	os.makedirs(temp_dir, exist_ok=True)
	temp_path1 = os.path.join(temp_dir, f"{uuid.uuid4()}_img1.png")
	temp_path2 = os.path.join(temp_dir, f"{uuid.uuid4()}_img2.png")

	try:
	Image.fromarray(image1).save(temp_path1)
	Image.fromarray(image2).save(temp_path2)

	# Get scores for both images
	scores = score_with_model(model_name, [temp_path1, temp_path2], [prompt, prompt])
	score1 = round(scores[0], 4)
	score2 = round(scores[1], 4)

	# Determine winner
	if score1 > score2:
	winner_text = f"🏆 Image 1 is better!\n\nImage 1 Score: {score1}\nImage 2 Score: {score2}\n\nDifference: +{round(score1-score2, 4)}"
	elif score2 > score1:
	winner_text = f"🏆 Image 2 is better!\n\nImage 1 Score: {score1}\nImage 2 Score: {score2}\n\nDifference: +{round(score2-score1, 4)}"
	else:
	winner_text = f"🤝 It's a tie!\n\nBoth images scored: {score1}"

	return score1, score2, winner_text, "✅ Comparison completed successfully!"

	except Exception as e:
	print(f"An error occurred during comparison: {e}")
	return None, None, "", f"❌ Processing error: {e}"
	finally:
	if os.path.exists(temp_path1):
	os.remove(temp_path1)
	if os.path.exists(temp_path2):
	os.remove(temp_path2)

	# --- Gradio Interface ---
	with gr.Blocks(theme=gr.themes.Soft(), title="HPSv3 - Human Preference Score v3") as demo:
	gr.HTML(f"""
	<div style="text-align: center; margin-bottom: 20px;">
	<h1>🎨 HPSv3: Human Preference Score v3</h1>
	<p>Evaluate image quality and alignment with prompts with multiple models.</p>
	<p><a href="https://mizzenai.github.io/HPSv3.project/" target="_blank">🌐 Project Website</a> \|
	<a href="https://huggingface.co/papers/2508.03789" target="_blank">📄 Paper</a> \|
	<a href="https://github.com/MizzenAI/HPSv3" target="_blank">💻 Code</a></p>
	</div>
	""")

	# Global model selector
	with gr.Row():
	model_selector = gr.Dropdown(
	choices=[(config["name"], key) for key, config in MODEL_CONFIGS.items()],
	value="HPSv3_7B",
	label="🤖 Select Model",
	)
	model_status = gr.Textbox(
	label="Model Status",
	value=f"✅ Current model: {MODEL_CONFIGS['HPSv3_7B']['name']}",
	interactive=False,
	scale=2
	)

	with gr.Tabs():
	# Tab 1: Single Image Scoring
	with gr.TabItem("📊 Image Scoring"):
	with gr.Row(equal_height=False):
	with gr.Column(scale=2):
	with gr.Group():
	gr.Markdown("### 🖼️ Upload & Describe")
	image_input = gr.Image(
	type="numpy",
	label="Upload Image",
	height=450
	)
	prompt_input = gr.Textbox(
	label="Prompt Description",
	placeholder="Describe what the image should represent...",
	lines=3,
	max_lines=5
	)

	with gr.Column(scale=1):
	with gr.Group():
	gr.Markdown("### 🎯 Quality Assessment")
	score_output = gr.Number(
	label="Score",
	elem_id="score-output",
	precision=4
	)
	interpretation_output = gr.Markdown(label="")
	status_output = gr.Textbox(
	label="Status",
	interactive=False
	)
	submit_button = gr.Button(
	"🚀 Run Evaluation",
	variant="primary",
	size="lg"
	)

	submit_button.click(
	fn=predict_score,
	inputs=[image_input, prompt_input, model_selector],
	outputs=[score_output, interpretation_output, status_output]
	)

	with gr.Group():
	gr.Examples(
	examples=[
	["assets/example1.png", "cute chibi anime cartoon fox, smiling wagging tail with a small cartoon heart above sticker, high resolution, vibrant colors"],
	["assets/example2.png", "cute chibi anime cartoon fox, smiling wagging tail with a small cartoon heart above sticker, high resolution, vibrant colors"],
	],
	inputs=[image_input, prompt_input],
	outputs=[score_output, interpretation_output, status_output],
	fn=lambda img, prompt: predict_score(img, prompt, "HPSv3_7B"),
	cache_examples=False
	)

	# Tab 2: Image Comparison
	with gr.TabItem("⚖️ Image Comparison"):
	with gr.Row(equal_height=False):
	with gr.Column(scale=2):
	with gr.Group():
	gr.Markdown("### 🖼️ Upload Images & Prompt")
	with gr.Row():
	image1_input = gr.Image(
	type="numpy",
	label="Image 1",
	height=300
	)
	image2_input = gr.Image(
	type="numpy",
	label="Image 2",
	height=300
	)
	prompt_compare_input = gr.Textbox(
	label="Prompt Description",
	placeholder="Describe what the images should represent...",
	lines=3,
	max_lines=5
	)

	with gr.Column(scale=1):
	with gr.Group():
	gr.Markdown("### 🎯 Comparison Results")
	score1_output = gr.Number(
	label="Image 1 Score",
	precision=4
	)
	score2_output = gr.Number(
	label="Image 2 Score",
	precision=4
	)
	comparison_result = gr.Markdown(label="Winner")
	status_compare_output = gr.Textbox(
	label="Status",
	interactive=False
	)

	compare_button = gr.Button(
	"⚖️ Compare Images",
	variant="primary",
	size="lg"
	)

	compare_button.click(
	fn=compare_images,
	inputs=[image1_input, image2_input, prompt_compare_input, model_selector],
	outputs=[score1_output, score2_output, comparison_result, status_compare_output]
	)

	with gr.Group():
	gr.Examples(
	examples=[
	["assets/example1.png", "assets/example2.png", "cute chibi anime cartoon fox, smiling wagging tail with a small cartoon heart above sticker, high resolution, vibrant colors"],
	["assets/example2.png", "assets/example1.png", "cute chibi anime cartoon fox, smiling wagging tail with a small cartoon heart above sticker, high resolution, vibrant colors"],
	],
	inputs=[image1_input, image2_input, prompt_compare_input],
	outputs=[score1_output, score2_output, comparison_result, status_compare_output],
	fn=lambda img1, img2, prompt: compare_images(img1, img2, prompt, "HPSv3_7B"),
	cache_examples=False
	)

	# Model change handler
	model_selector.change(
	fn=handle_model_change,
	inputs=[model_selector],
	outputs=[model_status]
	)

	def main():
	"""Main function to launch the demo."""
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False,
	favicon_path=None,
	show_error=True,
	)

	if __name__ == "__main__":
	main()