File size: 2,329 Bytes

c490bed

from transformers import  CLIPTokenizer

class SDXLTokenizer:
    """Wrapper around HuggingFace tokenizers for SDXL.

    Tokenizes prompt with two tokenizers and returns the joined output.

    Args:
        model_name (str): Name of the model's text encoders to load. Defaults to 'stabilityai/stable-diffusion-xl-base-1.0'.
    """

    def __init__(self, file_path_or_name='stabilityai/stable-diffusion-xl-base-1.0'):
        self.tokenizer = CLIPTokenizer.from_pretrained(file_path_or_name, subfolder='tokenizer')
        self.tokenizer_2 = CLIPTokenizer.from_pretrained(file_path_or_name, subfolder='tokenizer_2')
   
    @classmethod
    def from_pretrained(cls, file_path_or_name='stabilityai/stable-diffusion-xl-base-1.0', **kwargs):
        """
        Create a new instance of SDXLTextEncoder with specified pretrained model parameters.

        Args:
            file_path_or_name (str): Name or path of the model's text encoders to load.
            encode_latents_in_fp16 (bool): Whether to encode latents in fp16.
            torch_dtype (torch.dtype): Data type for model parameters.
            **kwargs: Additional keyword arguments.

        Returns:
            SDXLTextEncoder: A new instance of SDXLTextEncoder.
        """
        # Update arguments with any additional kwargs
        init_args = {'file_path_or_name': file_path_or_name}
        init_args.update(kwargs)

        # Create and return a new instance of SDXLTextEncoder
        return cls(**init_args)


    def __call__(self, prompt, padding, truncation, return_tensors, max_length=None):
        tokenized_output = self.tokenizer(
            prompt,
            padding=padding,
            max_length=self.tokenizer.model_max_length if max_length is None else max_length,
            truncation=truncation,
            return_tensors=return_tensors)
        tokenized_output_2 = self.tokenizer_2(
            prompt,
            padding=padding,
            max_length=self.tokenizer_2.model_max_length if max_length is None else max_length,
            truncation=truncation,
            return_tensors=return_tensors)

        # Add second tokenizer output to first tokenizer
        for key in tokenized_output.keys():
            tokenized_output[key] = [tokenized_output[key], tokenized_output_2[key]]
        return tokenized_output