IlyasMoutawwakil HF staff commited on
Commit
eabde51
β€’
1 Parent(s): a533468

remove tgi

Browse files
Files changed (2) hide show
  1. app.py +3 -6
  2. config_store.py +204 -60
app.py CHANGED
@@ -8,14 +8,13 @@ from run import run_benchmark
8
  from config_store import (
9
  get_training_config,
10
  get_inference_config,
11
- get_text_generation_inference_config,
12
  get_neural_compressor_config,
13
  get_onnxruntime_config,
14
  get_openvino_config,
15
  get_pytorch_config,
16
  )
17
 
18
- BACKENDS = ["pytorch", "onnxruntime", "openvino", "neural-compressor", "text-generation-inference"]
19
  BENCHMARKS = ["inference", "training"]
20
  DEVICES = ["cpu", "cuda"]
21
 
@@ -25,14 +24,14 @@ with gr.Blocks() as demo:
25
  gr.HTML("<h1 style='text-align: center'>πŸ€— Optimum-Benchmark UI πŸ‹οΈ</h1>")
26
  # explanation text
27
  gr.Markdown(
28
- "This is a demo space of [Optimum-Benchmark](https://github.com/huggingface/optimum-benchmark.git):"
29
  "<br>A unified multi-backend utility for benchmarking `transformers`, `diffusers`, `peft` and `timm` models with "
30
  "Optimum's optimizations & quantization, for inference & training, on different backends & hardwares."
31
  )
32
 
33
  model = gr.Textbox(
34
  label="model",
35
- value="bert-base-uncased",
36
  info="Model to run the benchmark on. In the particular case of this space, only models that are hosted on huggingface.co/models can be benchmarked.",
37
  )
38
  task = gr.Dropdown(
@@ -73,8 +72,6 @@ with gr.Blocks() as demo:
73
  openvino_config = get_openvino_config()
74
  with gr.Accordion(label="Neural Compressor Config", open=False, visible=False):
75
  neural_compressor_config = get_neural_compressor_config()
76
- with gr.Accordion(label="Text Generation Inference Config", open=False, visible=False):
77
- text_generation_inference_config = get_text_generation_inference_config()
78
 
79
  # hide backend configs based on backend
80
  backend.change(
 
8
  from config_store import (
9
  get_training_config,
10
  get_inference_config,
 
11
  get_neural_compressor_config,
12
  get_onnxruntime_config,
13
  get_openvino_config,
14
  get_pytorch_config,
15
  )
16
 
17
+ BACKENDS = ["pytorch", "onnxruntime", "openvino", "neural-compressor"]
18
  BENCHMARKS = ["inference", "training"]
19
  DEVICES = ["cpu", "cuda"]
20
 
 
24
  gr.HTML("<h1 style='text-align: center'>πŸ€— Optimum-Benchmark UI πŸ‹οΈ</h1>")
25
  # explanation text
26
  gr.Markdown(
27
+ "This is a demo space of [`optimum-Benchmark`](https://github.com/huggingface/optimum-benchmark.git):"
28
  "<br>A unified multi-backend utility for benchmarking `transformers`, `diffusers`, `peft` and `timm` models with "
29
  "Optimum's optimizations & quantization, for inference & training, on different backends & hardwares."
30
  )
31
 
32
  model = gr.Textbox(
33
  label="model",
34
+ value="optimum/distilbert-base-uncased-finetuned-sst-2-english",
35
  info="Model to run the benchmark on. In the particular case of this space, only models that are hosted on huggingface.co/models can be benchmarked.",
36
  )
37
  task = gr.Dropdown(
 
72
  openvino_config = get_openvino_config()
73
  with gr.Accordion(label="Neural Compressor Config", open=False, visible=False):
74
  neural_compressor_config = get_neural_compressor_config()
 
 
75
 
76
  # hide backend configs based on backend
77
  backend.change(
config_store.py CHANGED
@@ -105,79 +105,223 @@ def get_pytorch_config():
105
  # info="Uses DistributedDataParallel for multi-gpu training",
106
  # ),
107
  # peft_strategy
108
- gr.Textbox(
109
  value="null",
 
110
  label="pytorch.peft_strategy",
 
111
  ),
112
  ]
113
 
114
 
115
  def get_onnxruntime_config():
116
- return get_base_backend_config(backend_name="onnxruntime")
117
- # no_weights
118
-
119
- # no_weights: bool = False
120
-
121
- # # export options
122
- # export: bool = True
123
- # use_cache: bool = True
124
- # use_merged: bool = False
125
- # torch_dtype: Optional[str] = None
126
-
127
- # # provider options
128
- # provider: str = "${infer_provider:${device}}"
129
- # device_id: Optional[int] = "${oc.deprecated:backend.provider_options.device_id}"
130
- # provider_options: Dict[str, Any] = field(default_factory=lambda: {"device_id": "${infer_device_id:${device}}"})
131
-
132
- # # inference options
133
- # use_io_binding: bool = "${is_gpu:${device}}"
134
- # enable_profiling: bool = "${oc.deprecated:backend.session_options.enable_profiling}"
135
- # session_options: Dict[str, Any] = field(
136
- # default_factory=lambda: {"enable_profiling": "${is_profiling:${benchmark.name}}"}
137
- # )
138
-
139
- # # optimization options
140
- # optimization: bool = False
141
- # optimization_config: Dict[str, Any] = field(default_factory=dict)
142
-
143
- # # quantization options
144
- # quantization: bool = False
145
- # quantization_config: Dict[str, Any] = field(default_factory=dict)
146
-
147
- # # calibration options
148
- # calibration: bool = False
149
- # calibration_config: Dict[str, Any] = field(default_factory=dict)
150
-
151
- # # null, O1, O2, O3, O4
152
- # auto_optimization: Optional[str] = None
153
- # auto_optimization_config: Dict[str, Any] = field(default_factory=dict)
154
-
155
- # # null, arm64, avx2, avx512, avx512_vnni, tensorrt
156
- # auto_quantization: Optional[str] = None
157
- # auto_quantization_config: Dict[str, Any] = field(default_factory=dict)
158
-
159
- # # ort-training is basically a different package so we might need to seperate these two backends in the future
160
- # use_inference_session: bool = "${is_inference:${benchmark.name}}"
161
-
162
- # # training options
163
- # use_ddp: bool = False
164
- # ddp_config: Dict[str, Any] = field(default_factory=dict)
165
-
166
- # # peft options
167
- # peft_strategy: Optional[str] = None
168
- # peft_config: Dict[str, Any] = field(default_factory=dict)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
 
170
 
171
  def get_openvino_config():
172
- return get_base_backend_config(backend_name="openvino")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
 
174
 
175
  def get_neural_compressor_config():
176
- return get_base_backend_config(backend_name="neural-compressor")
177
-
178
-
179
- def get_text_generation_inference_config():
180
- return get_base_backend_config(backend_name="text-generation-inference")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
 
182
 
183
  def get_inference_config():
 
105
  # info="Uses DistributedDataParallel for multi-gpu training",
106
  # ),
107
  # peft_strategy
108
+ gr.Dropdown(
109
  value="null",
110
+ choices=["null", "lora", "ada_lora", "prompt_tuning", "prefix_tuning", "p_tuning", "ia3"],
111
  label="pytorch.peft_strategy",
112
+ info="Use null for no PEFT",
113
  ),
114
  ]
115
 
116
 
117
  def get_onnxruntime_config():
118
+ return get_base_backend_config(backend_name="onnxruntime") + [
119
+ # no_weights
120
+ gr.Checkbox(
121
+ value=False,
122
+ label="pytorch.no_weights",
123
+ info="Generates random weights instead of downloading pretrained ones",
124
+ ),
125
+ # export
126
+ gr.Checkbox(
127
+ value=True,
128
+ label="onnxruntime.export",
129
+ info="Exports the model to ONNX",
130
+ ),
131
+ # use_cache
132
+ gr.Checkbox(
133
+ value=True,
134
+ label="onnxruntime.use_cache",
135
+ info="Uses cached ONNX model if available",
136
+ ),
137
+ # use_merged
138
+ gr.Checkbox(
139
+ value=False,
140
+ label="onnxruntime.use_merged",
141
+ info="Uses merged ONNX model if available",
142
+ ),
143
+ # torch_dtype
144
+ gr.Dropdown(
145
+ value="null",
146
+ label="onnxruntime.torch_dtype",
147
+ choices=["null", "bfloat16", "float16", "float32", "auto"],
148
+ info="Use null for default and `auto` for automatic dtype selection",
149
+ ),
150
+ # use_io_binding
151
+ gr.Checkbox(
152
+ value=True,
153
+ label="onnxruntime.use_io_binding",
154
+ info="Uses IO binding for inference",
155
+ ),
156
+ # auto_optimization
157
+ gr.Dropdown(
158
+ value="null",
159
+ label="onnxruntime.auto_optimization",
160
+ choices=["null", "O1", "O2", "O3", "O4"],
161
+ info="Use null for default",
162
+ ),
163
+ # auto_quantization
164
+ gr.Dropdown(
165
+ value="null",
166
+ label="onnxruntime.auto_quantization",
167
+ choices=["null", "arm64", "avx2", "avx512", "avx512_vnni", "tensorrt"],
168
+ info="Use null for default",
169
+ ),
170
+ # optimization
171
+ gr.Checkbox(
172
+ value=False,
173
+ label="onnxruntime.optimization",
174
+ info="Enables manual optimization",
175
+ ),
176
+ # optimization_config
177
+ gr.Dataframe(
178
+ type="array",
179
+ value=[["optimization_level"]],
180
+ headers=["1"],
181
+ row_count=(1, "static"),
182
+ col_count=(1, "dynamic"),
183
+ label="onnxruntime.optimization_config",
184
+ ),
185
+ # quantization
186
+ gr.Checkbox(
187
+ value=False,
188
+ label="onnxruntime.quantization",
189
+ info="Enables manual quantization",
190
+ ),
191
+ # quantization_config
192
+ gr.Dataframe(
193
+ type="array",
194
+ value=[["is_static"]],
195
+ headers=[False],
196
+ row_count=(1, "static"),
197
+ col_count=(1, "dynamic"),
198
+ label="onnxruntime.quantization_config",
199
+ info="Use null for default",
200
+ ),
201
+ # calibration
202
+ gr.Checkbox(
203
+ value=False,
204
+ label="onnxruntime.calibration",
205
+ info="Enables calibration",
206
+ ),
207
+ # calibration_config
208
+ gr.Dataframe(
209
+ type="array",
210
+ value=[["glue"]],
211
+ headers=["dataset_name"],
212
+ row_count=(1, "static"),
213
+ col_count=(1, "dynamic"),
214
+ label="onnxruntime.calibration_config",
215
+ info="Use null for default",
216
+ ),
217
+ # peft_strategy
218
+ gr.Dropdown(
219
+ value="null",
220
+ label="onnxruntime.peft_strategy",
221
+ choices=["null", "lora", "ada_lora", "prompt_tuning", "prefix_tuning", "p_tuning", "ia3"],
222
+ info="Use null for full parameters fine-tuning",
223
+ ),
224
+ ]
225
 
226
 
227
  def get_openvino_config():
228
+ return get_base_backend_config(backend_name="openvino") + [
229
+ # export
230
+ gr.Checkbox(
231
+ value=True,
232
+ label="openvino.export",
233
+ info="Exports the model to ONNX",
234
+ ),
235
+ # use_cache
236
+ gr.Checkbox(
237
+ value=True,
238
+ label="openvino.use_cache",
239
+ info="Uses cached ONNX model if available",
240
+ ),
241
+ # use_merged
242
+ gr.Checkbox(
243
+ value=False,
244
+ label="openvino.use_merged",
245
+ info="Uses merged ONNX model if available",
246
+ ),
247
+ # reshape
248
+ gr.Checkbox(
249
+ value=False,
250
+ label="openvino.reshape",
251
+ info="Reshapes the model to the input shape",
252
+ ),
253
+ # half
254
+ gr.Checkbox(
255
+ value=False,
256
+ label="openvino.half",
257
+ info="Converts model to half precision",
258
+ ),
259
+ # quantization
260
+ gr.Checkbox(
261
+ value=False,
262
+ label="openvino.quantization",
263
+ info="Enables quantization",
264
+ ),
265
+ # quantization_config
266
+ gr.Dataframe(
267
+ type="array",
268
+ headers=["compression", "input_info", "save_onnx_model"],
269
+ value=[[None, None, None]],
270
+ row_count=(1, "static"),
271
+ col_count=(3, "dynamic"),
272
+ label="openvino.quantization_config",
273
+ ),
274
+ # calibration
275
+ gr.Checkbox(
276
+ value=False,
277
+ label="openvino.calibration",
278
+ info="Enables calibration",
279
+ ),
280
+ # calibration_config
281
+ gr.Dataframe(
282
+ type="array",
283
+ headers=["dataset_name"],
284
+ value=[["glue"]],
285
+ row_count=(1, "static"),
286
+ col_count=(1, "dynamic"),
287
+ label="openvino.calibration_config",
288
+ ),
289
+ ]
290
 
291
 
292
  def get_neural_compressor_config():
293
+ return get_base_backend_config(backend_name="neural-compressor") + [
294
+ # ptq_quantization
295
+ gr.Checkbox(
296
+ value=False,
297
+ label="neural-compressor.ptq_quantization",
298
+ info="Enables post-training quantization",
299
+ ),
300
+ # ptq_quantization_config
301
+ gr.Dataframe(
302
+ type="array",
303
+ headers=["device"],
304
+ value=[["cpu"]],
305
+ row_count=(1, "static"),
306
+ col_count=(1, "dynamic"),
307
+ label="neural-compressor.ptq_quantization_config",
308
+ ),
309
+ # calibration
310
+ gr.Checkbox(
311
+ value=False,
312
+ label="neural-compressor.calibration",
313
+ info="Enables calibration",
314
+ ),
315
+ # calibration_config
316
+ gr.Dataframe(
317
+ type="array",
318
+ headers=["dataset_name"],
319
+ value=[["glue"]],
320
+ row_count=(1, "static"),
321
+ col_count=(1, "dynamic"),
322
+ label="neural-compressor.calibration_config",
323
+ ),
324
+ ]
325
 
326
 
327
  def get_inference_config():