from typing import Any from llmlingua import PromptCompressor from pydantic import BaseModel class LLMLinguaParameters(BaseModel): instruction: str question: str target_token: int context_budget: str iterative_size: int class EndpointHandler: def __init__(self, path=None): model_path = path or "NousResearch/Llama-2-7b-hf" self.llm_lingua = PromptCompressor(model_name=model_path) def __call__(self, data: dict[str, Any]) -> list[dict[str, Any]]: """ data args: inputs: (:obj: `str`): input text parameters: (:obj: `dict`): dictionary of parameters instruction: `str` question: `str` target_token: `int` context_budget: `str` iterative_size: `int` returns: :obj:`str` containing the output of the model Examples: Data input: ``` { "prompt": "Long prompt with verbose details to reduce tokens count...", "parameters": { "instruction":"", "question": "", "target_token": 200, "context_budget": "*1.5", "iterative_size": 100, } } ``` output: ``` { "compressed_prompt": "Question: Sam bought a dozen boxes, each with 30 highlighter pens inside...", "origin_tokens": 2365, "compressed_tokens": 174, "ratio": "13.6x", "saving": ", Saving $0.1 in GPT-4." } ``` """ # generate schema validation for the input data using the example above prompt = data.pop("inputs") complex_prompt = prompt.split("\n\n") parameters = LLMLinguaParameters(**data.get("parameters", {})).dict() output = self.llm_lingua.compress_prompt(complex_prompt, **parameters) return output