File size: 4,401 Bytes
cbde782
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import argparse

import torch
from datasets import load_dataset
from transformers import AutoProcessor, VisionEncoderDecoderModel


def speedometer(
    model: torch.nn.Module,
    pixel_values: torch.Tensor,
    decoder_input_ids: torch.Tensor,
    processor: AutoProcessor,
    bad_words_ids: list,
    warmup_iters: int = 100,
    timing_iters: int = 100,
    num_tokens: int = 10,
) -> None:
    """Measure average run time for a PyTorch module

    Performs forward passes.
    """
    start = torch.cuda.Event(enable_timing=True)
    end = torch.cuda.Event(enable_timing=True)

    # Warmup runs
    torch.cuda.synchronize()
    for _ in range(warmup_iters):
        outputs = model.generate(
            pixel_values.to(model.device),
            decoder_input_ids=decoder_input_ids.to(model.device),
            early_stopping=True,
            pad_token_id=processor.tokenizer.pad_token_id,
            eos_token_id=processor.tokenizer.eos_token_id,
            use_cache=True,
            num_beams=1,
            bad_words_ids=bad_words_ids,
            return_dict_in_generate=True,
            min_length=num_tokens,
            max_length=num_tokens,
        )

    # Timing runs
    start.record()
    for _ in range(timing_iters):
        outputs = model.generate(
            pixel_values.to(model.device),
            decoder_input_ids=decoder_input_ids.to(model.device),
            early_stopping=True,
            pad_token_id=processor.tokenizer.pad_token_id,
            eos_token_id=processor.tokenizer.eos_token_id,
            use_cache=True,
            num_beams=1,
            bad_words_ids=bad_words_ids,
            return_dict_in_generate=True,
            min_length=num_tokens,
            max_length=num_tokens,
        )
    end.record()
    torch.cuda.synchronize()

    mean = start.elapsed_time(end) / timing_iters
    print(f"Mean time: {mean} ms")

    return mean


def get_ja_list_of_lists(processor):
    def is_japanese(s):
        "Made by GPT-4: https://chat.openai.com/share/a795b15c-8534-40b9-9699-c8c1319f5f25"
        for char in s:
            code_point = ord(char)
            if (
                0x3040 <= code_point <= 0x309F
                or 0x30A0 <= code_point <= 0x30FF
                or 0x4E00 <= code_point <= 0x9FFF
                or 0x3400 <= code_point <= 0x4DBF
                or 0x20000 <= code_point <= 0x2A6DF
                or 0x31F0 <= code_point <= 0x31FF
                or 0xFF00 <= code_point <= 0xFFEF
                or 0x3000 <= code_point <= 0x303F
                or 0x3200 <= code_point <= 0x32FF
            ):
                continue
            else:
                return False
        return True

    ja_tokens, ja_ids = [], []
    for token, id in processor.tokenizer.vocab.items():
        if is_japanese(token.lstrip("▁")):
            ja_tokens.append(token)
            ja_ids.append(id)

    return [[x] for x in ja_ids]


def main():

    parser = argparse.ArgumentParser(description='Description of your program')
    parser.add_argument('--model_path', help='Description for foo argument', required=True)
    parser.add_argument('--ja_bad_words', help='Use ja bad_words_ids', action="store_true", default=False)
    args = parser.parse_args()

    print("Running speed test on model: ", args.model_path, "with ja_bad_words: ", args.ja_bad_words)

    processor = AutoProcessor.from_pretrained(args.model_path)
    model = VisionEncoderDecoderModel.from_pretrained(args.model_path)

    device = 0 if torch.cuda.is_available() else torch.device("cpu")

    model.to(device)

    dataset = load_dataset("hf-internal-testing/example-documents", split="test")

    image = dataset[1]["image"]

    task_prompt = "<s_synthdog>"
    decoder_input_ids = processor.tokenizer(
        task_prompt, add_special_tokens=False, return_tensors="pt"
    ).input_ids

    pixel_values = processor(image, return_tensors="pt").pixel_values

    bad_words_ids = [[processor.tokenizer.unk_token_id]]

    if args.ja_bad_words:
        bad_words_ids += get_ja_list_of_lists(processor)

    print("Length of bad_words_ids: ", len(bad_words_ids))

    results = speedometer(
        model,
        pixel_values,
        decoder_input_ids,
        processor,
        bad_words_ids=bad_words_ids,
        warmup_iters=100,
        timing_iters=100,
        num_tokens=10,
    )


if __name__ == "__main__":

    main()