File size: 621 Bytes
8155cef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
from .mm_constants import IMAGE_TOKEN_INDEX, IMAGE_PAD_TOKEN_INDEX

def tokenizer_image_token_qwen(prompt, tokenizer, image_token_index, image_token_num=256):
    prompt_chunks, tmp = [], []
    for n in prompt:
        if n == image_token_index:
            prompt_chunks.append(tmp)
            tmp = []
        else:
            tmp.append(n)
    if tmp: prompt_chunks.append(tmp)

    input_ids = []
    for i, chunk in enumerate(prompt_chunks):
        if i > 0:
            input_ids.extend([IMAGE_TOKEN_INDEX] + [IMAGE_PAD_TOKEN_INDEX] * (image_token_num - 1))
        input_ids.extend(chunk)

    return input_ids