File size: 2,060 Bytes

from typing import Any
from llmlingua import PromptCompressor
from pydantic import BaseModel


class LLMLinguaParameters(BaseModel):
    instruction: str
    question: str
    target_token: int
    context_budget: str
    iterative_size: int


class EndpointHandler:
    def __init__(self, path=None):
        model_path = path or "NousResearch/Llama-2-7b-hf"
        self.llm_lingua = PromptCompressor(model_name=model_path)

    def __call__(self, data: dict[str, Any]) -> list[dict[str, Any]]:
        """
        data args:
            inputs: (:obj: `str`): input text
            parameters: (:obj: `dict`): dictionary of parameters 
                instruction: `str`
                question: `str`
                target_token: `int`
                context_budget: `str`
                iterative_size: `int`

        returns:
            :obj:`str` containing the output of the model

        Examples:
        Data
         input:
            ```
            {
                "prompt": "Long prompt with verbose details to reduce tokens count...",
                "parameters": {
                    "instruction":"",
                    "question": "",
                    "target_token": 200,
                    "context_budget": "*1.5",
                    "iterative_size": 100,
                }
            }
            ```
         output:
            ```
            {
                "compressed_prompt": "Question: Sam bought a dozen boxes, each with 30 highlighter pens inside...",
                "origin_tokens": 2365,
                "compressed_tokens": 174,
                "ratio": "13.6x",
                "saving": ", Saving $0.1 in GPT-4."
            }
            ```
        """
        # generate schema validation for the input data using the example above
        prompt = data.pop("inputs")
        complex_prompt = prompt.split("\n\n")
        parameters = LLMLinguaParameters(**data.get("parameters", {})).dict()

        output = self.llm_lingua.compress_prompt(complex_prompt, **parameters)
        return output