shunk031
/

aesthetics-predictor-v1-vit-large-patch14

Feature Extraction

aesthetics_predictor

Model card Files Files and versions Community

aesthetics-predictor-v1-vit-large-patch14 / modeling_v1.py

shunk031's picture

Upload AestheticsPredictorV1

b7e635a verified 25 days ago

raw history blame

No virus

2.34 kB

	from typing import Dict, Final, Optional, Tuple, Union

	import torch
	import torch.nn as nn
	from transformers import CLIPVisionModelWithProjection, logging
	from transformers.modeling_outputs import ImageClassifierOutputWithNoAttention

	from .configuration_predictor import AestheticsPredictorConfig

	logging.set_verbosity_error()

	URLS: Final[Dict[str, str]] = {
	"openai/clip-vit-base-patch16": "https://github.com/LAION-AI/aesthetic-predictor/raw/main/sa_0_4_vit_b_16_linear.pth",
	"openai/clip-vit-base-patch32": "https://github.com/LAION-AI/aesthetic-predictor/raw/main/sa_0_4_vit_b_32_linear.pth",
	"openai/clip-vit-large-patch14": "https://github.com/LAION-AI/aesthetic-predictor/raw/main/sa_0_4_vit_l_14_linear.pth",
	}


	class AestheticsPredictorV1(CLIPVisionModelWithProjection):
	def __init__(self, config: AestheticsPredictorConfig) -> None:
	super().__init__(config)
	self.predictor = nn.Linear(config.projection_dim, 1)
	self.post_init()

	def forward(
	self,
	pixel_values: Optional[torch.FloatTensor] = None,
	output_attentions: Optional[bool] = None,
	output_hidden_states: Optional[bool] = None,
	return_dict: Optional[bool] = None,
	) -> Union[Tuple, ImageClassifierOutputWithNoAttention]:
	return_dict = (
	return_dict if return_dict is not None else self.config.use_return_dict
	)

	outputs = super().forward(
	pixel_values=pixel_values,
	output_attentions=output_attentions,
	output_hidden_states=output_hidden_states,
	return_dict=return_dict,
	)
	image_embeds = outputs[0] # image_embeds
	image_embeds /= image_embeds.norm(dim=-1, keepdim=True)

	prediction = self.predictor(image_embeds)

	if not return_dict:
	return (None, prediction, image_embeds)

	return ImageClassifierOutputWithNoAttention(
	loss=None,
	logits=prediction,
	hidden_states=image_embeds,
	)


	def convert_from_openai_clip(openai_model_name: str) -> AestheticsPredictorV1:
	model = AestheticsPredictorV1.from_pretrained(openai_model_name)
	state_dict = torch.hub.load_state_dict_from_url(URLS[openai_model_name])
	model.predictor.load_state_dict(state_dict)
	model.eval()

	return model