nikita-sh
/

LLMLingua__NousResearch-Llama-2-7b-inf

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

LLMLingua__NousResearch-Llama-2-7b-inf / handler.py

nikita-sh's picture

update handler docstrings

c42a084 verified 6 months ago

history blame contribute delete

No virus

2.06 kB

	from typing import Any
	from llmlingua import PromptCompressor
	from pydantic import BaseModel


	class LLMLinguaParameters(BaseModel):
	instruction: str
	question: str
	target_token: int
	context_budget: str
	iterative_size: int


	class EndpointHandler:
	def __init__(self, path=None):
	model_path = path or "NousResearch/Llama-2-7b-hf"
	self.llm_lingua = PromptCompressor(model_name=model_path)

	def __call__(self, data: dict[str, Any]) -> list[dict[str, Any]]:
	"""
	data args:
	inputs: (:obj: `str`): input text
	parameters: (:obj: `dict`): dictionary of parameters
	instruction: `str`
	question: `str`
	target_token: `int`
	context_budget: `str`
	iterative_size: `int`

	returns:
	:obj:`str` containing the output of the model

	Examples:
	Data
	input:
	```
	{
	"prompt": "Long prompt with verbose details to reduce tokens count...",
	"parameters": {
	"instruction":"",
	"question": "",
	"target_token": 200,
	"context_budget": "*1.5",
	"iterative_size": 100,
	}
	}
	```
	output:
	```
	{
	"compressed_prompt": "Question: Sam bought a dozen boxes, each with 30 highlighter pens inside...",
	"origin_tokens": 2365,
	"compressed_tokens": 174,
	"ratio": "13.6x",
	"saving": ", Saving $0.1 in GPT-4."
	}
	```
	"""
	# generate schema validation for the input data using the example above
	prompt = data.pop("inputs")
	complex_prompt = prompt.split("\n\n")
	parameters = LLMLinguaParameters(**data.get("parameters", {})).dict()

	output = self.llm_lingua.compress_prompt(complex_prompt, **parameters)
	return output