from .mm_constants import IMAGE_TOKEN_INDEX, IMAGE_PAD_TOKEN_INDEX | |
def tokenizer_image_token_qwen(prompt, tokenizer, image_token_index, image_token_num=256): | |
prompt_chunks, tmp = [], [] | |
for n in prompt: | |
if n == image_token_index: | |
prompt_chunks.append(tmp) | |
tmp = [] | |
else: | |
tmp.append(n) | |
if tmp: prompt_chunks.append(tmp) | |
input_ids = [] | |
for i, chunk in enumerate(prompt_chunks): | |
if i > 0: | |
input_ids.extend([IMAGE_TOKEN_INDEX] + [IMAGE_PAD_TOKEN_INDEX] * (image_token_num - 1)) | |
input_ids.extend(chunk) | |
return input_ids |