Spaces:

alibaba-pai
/

EasyAnimate

Running

EasyAnimate / easyanimate /video_caption /utils /image_captioner_awq.py

bubbliiiing

Create Code

19fe404 5 months ago

3.75 kB

	from pathlib import Path
	from typing import Tuple

	import auto_gptq
	import torch
	from auto_gptq.modeling import BaseGPTQForCausalLM
	from transformers import AutoModelForCausalLM, AutoTokenizer


	class QwenVLChat:
	def __init__(self, device: str = "cuda:0", quantized: bool = False) -> None:
	if quantized:
	self.model = AutoModelForCausalLM.from_pretrained(
	"Qwen/Qwen-VL-Chat-Int4", device_map=device, trust_remote_code=True
	).eval()
	self.tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-VL-Chat-Int4", trust_remote_code=True)
	else:
	self.model = AutoModelForCausalLM.from_pretrained(
	"Qwen/Qwen-VL-Chat", device_map=device, trust_remote_code=True, fp16=True
	).eval()
	self.tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-VL-Chat", trust_remote_code=True)

	def __call__(self, prompt: str, image: str) -> Tuple[str, str]:
	query = self.tokenizer.from_list_format([{"image": image}, {"text": prompt}])
	response, history = self.model.chat(self.tokenizer, query=query, history=[])

	return response, history


	class InternLMXComposer2QForCausalLM(BaseGPTQForCausalLM):
	layers_block_name = "model.layers"
	outside_layer_modules = [
	"vit",
	"vision_proj",
	"model.tok_embeddings",
	"model.norm",
	"output",
	]
	inside_layer_modules = [
	["attention.wqkv.linear"],
	["attention.wo.linear"],
	["feed_forward.w1.linear", "feed_forward.w3.linear"],
	["feed_forward.w2.linear"],
	]


	class InternLMXComposer2:
	def __init__(self, device: str = "cuda:0", quantized: bool = True):
	if quantized:
	auto_gptq.modeling._base.SUPPORTED_MODELS = ["internlm"]
	self.model = InternLMXComposer2QForCausalLM.from_quantized(
	"internlm/internlm-xcomposer2-vl-7b-4bit", trust_remote_code=True, device=device
	).eval()
	self.tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-xcomposer2-vl-7b-4bit", trust_remote_code=True)
	else:
	# Setting fp16=True does not work. See https://huggingface.co/internlm/internlm-xcomposer2-vl-7b/discussions/1.
	self.model = (
	AutoModelForCausalLM.from_pretrained(
	"internlm/internlm-xcomposer2-vl-7b", device_map=device, trust_remote_code=True
	)
	.eval()
	.to(torch.float16)
	)
	self.tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-xcomposer2-vl-7b", trust_remote_code=True)

	def __call__(self, prompt: str, image: str):
	if not prompt.startswith("<ImageHere>"):
	prompt = "<ImageHere>" + prompt
	with torch.cuda.amp.autocast(), torch.no_grad():
	response, history = self.model.chat(self.tokenizer, query=prompt, image=image, history=[], do_sample=False)
	return response, history


	if __name__ == "__main__":
	image_folder = "demo/"
	wildcard_list = [".jpg", ".png"]
	image_list = []
	for wildcard in wildcard_list:
	image_list.extend([str(image_path) for image_path in Path(image_folder).glob(wildcard)])
	qwen_vl_chat = QwenVLChat(device="cuda:0", quantized=True)
	qwen_vl_prompt = "Please describe this image in detail."
	for image in image_list:
	response, _ = qwen_vl_chat(qwen_vl_prompt, image)
	print(image, response)

	internlm2_vl = InternLMXComposer2(device="cuda:0", quantized=False)
	internlm2_vl_prompt = "Please describe this image in detail."
	for image in image_list:
	response, _ = internlm2_vl(internlm2_vl_prompt, image)
	print(image, response)