document-summarization

Running

File size: 7,199 Bytes

import pprint as pp
import logging
import time

import torch
from transformers import GenerationConfig, pipeline

from utils import compare_model_size

# Setting up logging
logging.basicConfig(
    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)


class BatchAggregator:
    CONFIGURED_MODELS = [
        "pszemraj/bart-large-mnli-dolly_hhrlhf-v1"
    ]  # TODO: Add models here
    DEFAULT_INSTRUCTION = "Write a comprehensive yet concise summary that pulls together the main points of the following text:"
    GENERIC_CONFIG = GenerationConfig(
        num_beams=8,
        early_stopping=True,
        do_sample=False,
        min_new_tokens=32,
        max_new_tokens=256,
        repetition_penalty=1.1,
        length_penalty=1.4,
        no_repeat_ngram_size=4,
        encoder_no_repeat_ngram_size=5,
    )

    def __init__(
        self, model_name: str = "pszemraj/bart-large-mnli-dolly_hhrlhf-v1", **kwargs
    ):
        self.device = None
        self.is_compiled = False
        self.logger = logging.getLogger(__name__)
        self.init_model(model_name)

    def init_model(self, model_name: str) -> None:
        """
        Initialize the model.

        :param model_name: The name of the model to use.
        """
        # Free up memory
        if torch.cuda.is_available():
            torch.cuda.empty_cache()

        self.logger.info(f"Setting model to {model_name}")
        self.model_name = model_name
        self.aggregator = self._create_pipeline(model_name)
        self._configure_model()
        # update the generation config with the specific tokenizer
        tokenizer_params = {
            "decoder_start_token_id": 0
            if "t5" in model_name.lower()
            else self.aggregator.tokenizer.eos_token_id,
            "eos_token_id": 1
            if "t5" in model_name.lower()
            else self.aggregator.tokenizer.eos_token_id,
            "pad_token_id": 0
            if "t5" in model_name.lower()
            else self.aggregator.tokenizer.pad_token_id,
        }
        self.update_generation_config(**tokenizer_params)

    def _create_pipeline(
        self, model_name: str = "pszemraj/bart-large-mnli-dolly_hhrlhf-v1"
    ) -> pipeline:
        """
        _create_pipeline creates a pipeline for the model.

        :param str model_name: model name to use, default: "pszemraj/bart-large-mnli-dolly_hhrlhf-v1"
        :return pipeline: the pipeline for the model

        :raises Exception: if the pipeline cannot be created
        """
        self.device = 0 if torch.cuda.is_available() else -1
        try:
            self.logger.info(
                f"Creating pipeline with model {model_name} on device {self.device}"
            )
            return pipeline(
                "text2text-generation",
                model_name,
                device=self.device,
                torch_dtype=torch.float32,
            )
        except Exception as e:
            self.logger.error(f"Failed to create pipeline: {e}")
            raise

    def _configure_model(self):
        """
        Configure the model for generation.
        """
        try:
            self.aggregator.model = torch.compile(self.aggregator.model)
            self.is_compiled = True
        except Exception as e:
            self.logger.warning(f"Could not compile model with Torch 2.0: {e}")

        if self.model_name not in self.CONFIGURED_MODELS:
            self.logger.info("Setting generation config to general defaults")
            self._set_default_generation_config()
        else:
            try:
                self.logger.info("Loading generation config from hub")
                self.aggregator.model.generation_config = (
                    GenerationConfig.from_pretrained(self.model_name)
                )
            except Exception as e:
                self.logger.warning(
                    f"Could not load generation config, using defaults: {e}"
                )
                self._set_default_generation_config()

        self.logger.info(self.aggregator.model.generation_config.to_json_string())

    def _set_default_generation_config(self):
        """
        Set the default generation configuration for the model.
        """
        self.aggregator.model.generation_config = self.GENERIC_CONFIG

        if "bart" in self.model_name.lower():
            self.logger.info("Using BART model, updating generation config")
            upd = {
                "num_beams": 8,
                "repetition_penalty": 1.3,
                "length_penalty": 1.0,
                "_from_model_config": False,
                "max_new_tokens": 256,
                "min_new_tokens": 32,
                "no_repeat_ngram_size": 3,
                "encoder_no_repeat_ngram_size": 6,
            }  # TODO: clean up
            self.aggregator.model.generation_config.update(**upd)

        if (
            "large"
            or "xl" in self.model_name.lower()
            or compare_model_size(self.model_name, 500)
        ):
            upd = {"num_beams": 4}
            self.update_generation_config(**upd)

    def update_generation_config(self, **kwargs):
        """
        Update the generation configuration with the specified parameters.

        Args:
            **kwargs: The parameters to update in the generation configuration.
        """
        self.logger.info(f"Updating generation config with {pp.pformat(kwargs)}")

        self.aggregator.model.generation_config.update(**kwargs)

    def update_loglevel(self, level: str = "INFO"):
        """
        Update the log level.

        Args:
            level (str): The log level to set. Defaults to "INFO".
        """
        self.logger.setLevel(level)

    def infer_aggregate(
        self,
        text_list: list,
        instruction: str = DEFAULT_INSTRUCTION,
        **kwargs,
    ) -> str:
        f"""
        Generate a summary of the specified texts.

        Args:
            text_list (list): The texts to summarize.
            instruction (str): The instruction for the summary. Defaults to {self.DEFAULT_INSTRUCTION}.
            **kwargs: Additional parameters to update in the generation configuration.

        Returns:
            The generated summary.
        """
        joined_text = "\n".join(text_list)
        prompt = f"{instruction}\n\n{joined_text}\n"
        if kwargs:
            self.update_generation_config(**kwargs)
        st = time.perf_counter()
        self.logger.info(f"inference on {len(text_list)} texts ...")
        result = self.aggregator(
            prompt,
            generation_config=self.aggregator.model.generation_config,
        )[0]["generated_text"]
        self.logger.info(f"Done. runtime:\t{round(time.perf_counter() - st, 2)}s")
        self.logger.info(
            f"Input tokens:\t{self.count_tokens(prompt)}. Output tokens:\t{self.count_tokens(result)}"
        )
        return result

    def count_tokens(self, text: str) -> int:
        """count the number of tokens in a text"""
        return (
            len(self.aggregator.tokenizer.encode(text, truncation=False, padding=False))
            if text
            else 0
        )