File size: 2,329 Bytes
c490bed |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
from transformers import CLIPTokenizer
class SDXLTokenizer:
"""Wrapper around HuggingFace tokenizers for SDXL.
Tokenizes prompt with two tokenizers and returns the joined output.
Args:
model_name (str): Name of the model's text encoders to load. Defaults to 'stabilityai/stable-diffusion-xl-base-1.0'.
"""
def __init__(self, file_path_or_name='stabilityai/stable-diffusion-xl-base-1.0'):
self.tokenizer = CLIPTokenizer.from_pretrained(file_path_or_name, subfolder='tokenizer')
self.tokenizer_2 = CLIPTokenizer.from_pretrained(file_path_or_name, subfolder='tokenizer_2')
@classmethod
def from_pretrained(cls, file_path_or_name='stabilityai/stable-diffusion-xl-base-1.0', **kwargs):
"""
Create a new instance of SDXLTextEncoder with specified pretrained model parameters.
Args:
file_path_or_name (str): Name or path of the model's text encoders to load.
encode_latents_in_fp16 (bool): Whether to encode latents in fp16.
torch_dtype (torch.dtype): Data type for model parameters.
**kwargs: Additional keyword arguments.
Returns:
SDXLTextEncoder: A new instance of SDXLTextEncoder.
"""
# Update arguments with any additional kwargs
init_args = {'file_path_or_name': file_path_or_name}
init_args.update(kwargs)
# Create and return a new instance of SDXLTextEncoder
return cls(**init_args)
def __call__(self, prompt, padding, truncation, return_tensors, max_length=None):
tokenized_output = self.tokenizer(
prompt,
padding=padding,
max_length=self.tokenizer.model_max_length if max_length is None else max_length,
truncation=truncation,
return_tensors=return_tensors)
tokenized_output_2 = self.tokenizer_2(
prompt,
padding=padding,
max_length=self.tokenizer_2.model_max_length if max_length is None else max_length,
truncation=truncation,
return_tensors=return_tensors)
# Add second tokenizer output to first tokenizer
for key in tokenized_output.keys():
tokenized_output[key] = [tokenized_output[key], tokenized_output_2[key]]
return tokenized_output
|