Straive
/

llava-v1.6-34b-hf

Image-Text-to-Text

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

llava-v1.6-34b-hf / handler.py

saurabh-straive's picture

saurabh-straive

Update handler.py

2fc750a verified 8 months ago

history blame contribute delete

1.03 kB

	from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
	import torch
	from PIL import Image

	class EndpointHandler():
	def __init__(self, path=""):
	disable_torch_init()
	self.processor = LlavaNextProcessor.from_pretrained(path, use_fast=False)
	self.model = LlavaNextForConditionalGeneration.from_pretrained(
	path,
	torch_dtype=torch.float16,
	low_cpu_mem_usage=True,
	load_in_4bit=True
	)
	self.model.to("cuda:0")

	def __call__(self, data):
	image_encoded = data.pop("inputs", data)
	prompt = data["text"]

	image = self.decode_base64_image(image_encoded)
	if image.mode != "RGB":
	image = image.convert("RGB")

	inputs = self.processor(prompt, image, return_tensors="pt").to("cuda:0")

	# autoregressively complete prompt
	output = self.model.generate(**inputs, max_new_tokens=500)

	return processor.decode(output[0], skip_special_tokens=True)