Spaces:

city96
/

CityAesthetics-demo

Running

City

Sync with GitHub

9ac551a 12 months ago

3.33 kB

	import os
	import torch
	from safetensors.torch import load_file
	from huggingface_hub import hf_hub_download
	from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection

	from model import AestheticPredictorModel

	class CityAestheticsPipeline:
	"""
	Demo model pipeline for [image=>score] prediction
	Accepts a single model path on initialization.
	Resulting object can be called directly with a PIL image as the input
	Returns a single float value with the predicted score [0.0;1.0].
	"""
	clip_ver = "openai/clip-vit-large-patch14"
	def __init__(self, model_path, device="cpu", clip_dtype=torch.float32):
	self.device = device
	self.clip_dtype = clip_dtype
	self._init_clip()
	self.model = self._load_model(model_path)
	print("CityAesthetics: Pipeline init ok") # debug

	def __call__(self, raw):
	emb = self.get_clip_emb(raw)
	return self.get_model_pred(self.model, emb)

	def get_model_pred(self, model, emb):
	with torch.no_grad():
	pred = model(emb)
	return float(pred.detach().cpu().squeeze(0))

	def get_clip_emb(self, raw):
	img = self.proc(
	images = raw,
	return_tensors = "pt"
	)["pixel_values"].to(self.clip_dtype).to(self.device)
	with torch.no_grad():
	emb = self.clip(pixel_values=img)
	return emb["image_embeds"].detach().to(torch.float32)

	def _init_clip(self):
	self.proc = CLIPImageProcessor.from_pretrained(self.clip_ver)
	self.clip = CLIPVisionModelWithProjection.from_pretrained(
	self.clip_ver,
	device_map = self.device,
	torch_dtype = self.clip_dtype,
	)

	def _load_model(self, path):
	sd = load_file(path)
	assert tuple(sd["up.0.weight"].shape) == (1024, 768) # only allow CLIP ver
	model = AestheticPredictorModel()
	model.eval()
	model.load_state_dict(sd)
	model.to(self.device)
	return model

	class CityAestheticsMultiModelPipeline(CityAestheticsPipeline):
	"""
	Demo multi-model pipeline for [image=>score] prediction
	Accepts a list of model paths on initialization.
	Resulting object can be called directly with a PIL image as the input.
	Returns a dict with the model name as key and the score [0.0;1.0] as a value.
	"""
	def __init__(self, model_paths, device="cpu", clip_dtype=torch.float32):
	self.device = device
	self.clip_dtype = clip_dtype
	self._init_clip()
	self.models = {}
	for path in model_paths:
	name = os.path.splitext(os.path.basename(path))[0]
	self.models[name] = self._load_model(path)
	print("CityAesthetics: Pipeline init ok") # debug

	def __call__(self, raw):
	emb = self.get_clip_emb(raw)
	out = {}
	for name, model in self.models.items():
	pred = model(emb)
	out[name] = self.get_model_pred(model, emb)
	return out

	def get_model_path(name, repo, token=True):
	"""
	Returns local model path or falls back to HF hub if required.
	"""
	fname = f"{name}.safetensors"

	# local path: [models/AesPred-Anime-v1.8.safetensors]
	path = os.path.join(os.path.dirname(os.path.realpath(__file__)),"models")
	if os.path.isfile(os.path.join(path, fname)):
	print("CityAesthetics: Using local model")
	return os.path.join(path, fname)

	# huggingface hub fallback
	print("CityAesthetics: Using HF Hub model")
	return str(hf_hub_download(
	token = token,
	repo_id = repo,
	filename = fname,
	))