gabrielearmento
/

MiniCPM-llama3-v-2_5-int4

Visual Question Answering

feature-extraction

Inference Endpoints

4-bit precision

Model card Files Files and versions Community

MiniCPM-llama3-v-2_5-int4 / handler.py

gabrielearmento's picture

gabrielearmento

Update handler.py

964ef5d verified 2 months ago

1.03 kB

	from typing import Dict, List, Any
	from transformers import AutoModel, AutoTokenizer
	from PIL import Image

	class EndpointHandler():
	def __init__(self, path=""):
	# Preload all the elements you are going to need at inference.
	self.model = AutoModel.from_pretrained('openbmb/MiniCPM-Llama3-V-2_5-int4', trust_remote_code=True)
	self.tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-Llama3-V-2_5-int4', trust_remote_code=True)

	def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
	image_url = data.pop("image_url")
	image = Image.open(image_url).convert("RGB")
	message = data.pop("message")
	messages = [{'role': 'user', 'content': message}]
	return model.chat(
	image=image,
	msgs=msgs,
	tokenizer=self.tokenizer,
	sampling=True, # if sampling=False, beam_search will be used by default
	temperature=0.7,
	# system_prompt='' # pass system_prompt if needed
	)