wealthcoders
/

qwen-vl-2.5

Image-Text-to-Text

text-generation-inference

Model card Files Files and versions

qwen-vl-2.5 / handler.py

wealthcoders's picture

Create handler.py

53772d5 verified 5 months ago

history blame contribute delete

1.61 kB

	from transformers import GenerationConfig, AutoProcessor, AutoTokenizer, AutoModelForImageTextToText, Qwen2_5_VLForConditionalGeneration
	from qwen_vl_utils import process_vision_info

	model_name = "Qwen/Qwen2.5-VL-7B-Instruct"
	#If it is an any form of ID - return only list of keys and values.

	class EndpointHandler:
	def __init__(self):
	self.model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
	model_name, torch_dtype="auto", device_map="cuda"
	)
	self.processor = AutoProcessor.from_pretrained(model_name)

	async def __call__(self, data):

	messages = data.get("messages")

	gen_cfg = GenerationConfig(
	max_new_tokens=2048,
	no_repeat_ngram_size=3,
	repeat_penalty=1.2,
	early_stopping=True,
	)

	text = self.processor.apply_chat_template(
	messages, tokenize=False, add_generation_prompt=True
	)
	image_inputs, video_inputs = process_vision_info(messages)
	inputs = self.processor(
	text=[text],
	images=image_inputs,
	videos=video_inputs,
	padding=True,
	return_tensors="pt",
	)

	generated_ids = self.model.generate(**inputs, generation_config=gen_cfg)
	generated_ids_trimmed = [
	out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
	]
	output_text = self.processor.batch_decode(
	generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
	)

	return output_text[0]