File size: 2,060 Bytes
10eca74 b66e7ec 10eca74 c42a084 10eca74 c42a084 10eca74 c42a084 10eca74 c42a084 10eca74 c42a084 10eca74 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
from typing import Any
from llmlingua import PromptCompressor
from pydantic import BaseModel
class LLMLinguaParameters(BaseModel):
instruction: str
question: str
target_token: int
context_budget: str
iterative_size: int
class EndpointHandler:
def __init__(self, path=None):
model_path = path or "NousResearch/Llama-2-7b-hf"
self.llm_lingua = PromptCompressor(model_name=model_path)
def __call__(self, data: dict[str, Any]) -> list[dict[str, Any]]:
"""
data args:
inputs: (:obj: `str`): input text
parameters: (:obj: `dict`): dictionary of parameters
instruction: `str`
question: `str`
target_token: `int`
context_budget: `str`
iterative_size: `int`
returns:
:obj:`str` containing the output of the model
Examples:
Data
input:
```
{
"prompt": "Long prompt with verbose details to reduce tokens count...",
"parameters": {
"instruction":"",
"question": "",
"target_token": 200,
"context_budget": "*1.5",
"iterative_size": 100,
}
}
```
output:
```
{
"compressed_prompt": "Question: Sam bought a dozen boxes, each with 30 highlighter pens inside...",
"origin_tokens": 2365,
"compressed_tokens": 174,
"ratio": "13.6x",
"saving": ", Saving $0.1 in GPT-4."
}
```
"""
# generate schema validation for the input data using the example above
prompt = data.pop("inputs")
complex_prompt = prompt.split("\n\n")
parameters = LLMLinguaParameters(**data.get("parameters", {})).dict()
output = self.llm_lingua.compress_prompt(complex_prompt, **parameters)
return output
|