shredder-31
/

GA_model_Gemma_2b

Text Generation

Inference Endpoints

Model card Files Files and versions Community

GA_model_Gemma_2b / handler.py

shredder-31's picture

Update handler.py

09957e8 verified 7 months ago

1.37 kB

	from typing import Dict, List, Any

	class EndpointHandler():
	def __init__(self , path=""):
	# Preload all the elements you are going to need at inference.
	# pseudo:
	# self.model= load_model(path)
	bnb_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_use_double_quant=True,
	bnb_4bit_quant_type="nf4",
	bnb_4bit_compute_dtype=torch.bfloat16
	)
	path = "shredder-31/GA_model_Gemma_2b"
	model = AutoModelForCausalLM.from_pretrained(path, quantization_config=bnb_config, device_map={"":0})
	tokenizer = AutoTokenizer.from_pretrained(path, add_eos_token=True)
	self.model = model
	self.tokenizer = tokenizer


	def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
	"""
	data args:
	inputs (:obj: `str` \| `PIL.Image` \| `np.array`)
	kwargs
	Return:
	A :obj:`list` \| `dict`: will be serialized and returned
	"""


	encodeds = self.tokenizer(data['inputs'], return_tensors="pt", add_special_tokens=True)
	generated_ids = self.model.generate(**encodeds, max_length=100 ,max_new_tokens=100, do_sample=False)
	decoded = self.tokenizer.decode(generated_ids[0], skip_special_tokens=True)

	return {'output':decoded[len(data['inputs']):]}