|
from typing import Any |
|
from llmlingua import PromptCompressor |
|
from pydantic import BaseModel |
|
|
|
|
|
class LLMLinguaParameters(BaseModel): |
|
instruction: str |
|
question: str |
|
target_token: int |
|
context_budget: str |
|
iterative_size: int |
|
|
|
|
|
class EndpointHandler: |
|
def __init__(self, path=None): |
|
model_path = path or "NousResearch/Llama-2-7b-hf" |
|
self.llm_lingua = PromptCompressor(model_name=model_path) |
|
|
|
def __call__(self, data: dict[str, Any]) -> list[dict[str, Any]]: |
|
""" |
|
data args: |
|
inputs: (:obj: `str`): input text |
|
parameters: (:obj: `dict`): dictionary of parameters |
|
instruction: `str` |
|
question: `str` |
|
target_token: `int` |
|
context_budget: `str` |
|
iterative_size: `int` |
|
|
|
returns: |
|
:obj:`str` containing the output of the model |
|
|
|
Examples: |
|
Data |
|
input: |
|
``` |
|
{ |
|
"prompt": "Long prompt with verbose details to reduce tokens count...", |
|
"parameters": { |
|
"instruction":"", |
|
"question": "", |
|
"target_token": 200, |
|
"context_budget": "*1.5", |
|
"iterative_size": 100, |
|
} |
|
} |
|
``` |
|
output: |
|
``` |
|
{ |
|
"compressed_prompt": "Question: Sam bought a dozen boxes, each with 30 highlighter pens inside...", |
|
"origin_tokens": 2365, |
|
"compressed_tokens": 174, |
|
"ratio": "13.6x", |
|
"saving": ", Saving $0.1 in GPT-4." |
|
} |
|
``` |
|
""" |
|
|
|
prompt = data.pop("inputs") |
|
complex_prompt = prompt.split("\n\n") |
|
parameters = LLMLinguaParameters(**data.get("parameters", {})).dict() |
|
|
|
output = self.llm_lingua.compress_prompt(complex_prompt, **parameters) |
|
return output |
|
|
|
|