File size: 3,797 Bytes
4d423a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import gradio as gr


def get_process_config():
    return {
        "process.numactl": gr.Checkbox(
            value=False,
            label="process.numactl",
            info="Runs the model with numactl",
        ),
        "process.numactl_kwargs": gr.Textbox(
            label="process.numactl_kwargs",
            value="{'cpunodebind': 0, 'membind': 0}",
            info="Additional python dict of kwargs to pass to numactl",
        ),
    }


def get_inference_config():
    return {
        "inference.warmup_runs": gr.Slider(
            step=1,
            value=10,
            minimum=0,
            maximum=10,
            label="inference.warmup_runs",
            info="Number of warmup runs",
        ),
        "inference.duration": gr.Slider(
            step=1,
            value=10,
            minimum=0,
            maximum=10,
            label="inference.duration",
            info="Minimum duration of the benchmark in seconds",
        ),
        "inference.iterations": gr.Slider(
            step=1,
            value=10,
            minimum=0,
            maximum=10,
            label="inference.iterations",
            info="Minimum number of iterations of the benchmark",
        ),
        "inference.latency": gr.Checkbox(
            value=True,
            label="inference.latency",
            info="Measures the latency of the model",
        ),
        "inference.memory": gr.Checkbox(
            value=False,
            label="inference.memory",
            info="Measures the peak memory consumption",
        ),
    }


def get_pytorch_config():
    return {
        "pytorch.torch_dtype": gr.Dropdown(
            value="float32",
            label="pytorch.torch_dtype",
            choices=["bfloat16", "float16", "float32", "auto"],
            info="The dtype to use for the model",
        ),
        "pytorch.torch_compile": gr.Checkbox(
            value=False,
            label="pytorch.torch_compile",
            info="Compiles the model with torch.compile",
        ),
    }


def get_onnxruntime_config():
    return {
        "onnxruntime.export": gr.Checkbox(
            value=True,
            label="onnxruntime.export",
            info="Exports the model to ONNX",
        ),
        "onnxruntime.use_cache": gr.Checkbox(
            value=True,
            label="onnxruntime.use_cache",
            info="Uses cached ONNX model if available",
        ),
        "onnxruntime.use_merged": gr.Checkbox(
            value=True,
            label="onnxruntime.use_merged",
            info="Uses merged ONNX model if available",
        ),
        "onnxruntime.torch_dtype": gr.Dropdown(
            value="float32",
            label="onnxruntime.torch_dtype",
            choices=["bfloat16", "float16", "float32", "auto"],
            info="The dtype to use for the model",
        ),
    }


def get_openvino_config():
    return {
        "openvino.export": gr.Checkbox(
            value=True,
            label="openvino.export",
            info="Exports the model to ONNX",
        ),
        "openvino.use_cache": gr.Checkbox(
            value=True,
            label="openvino.use_cache",
            info="Uses cached ONNX model if available",
        ),
        "openvino.use_merged": gr.Checkbox(
            value=True,
            label="openvino.use_merged",
            info="Uses merged ONNX model if available",
        ),
        "openvino.reshape": gr.Checkbox(
            value=False,
            label="openvino.reshape",
            info="Reshapes the model to the input shape",
        ),
        "openvino.half": gr.Checkbox(
            value=False,
            label="openvino.half",
            info="Converts model to half precision",
        ),
    }


def get_ipex_config():
    return {}