auto-benchmark / config_store.py
IlyasMoutawwakil's picture
remove tgi
eabde51
raw
history blame
12.5 kB
import gradio as gr
def get_base_backend_config(backend_name="pytorch"):
return [
# seed
gr.Textbox(
value=42,
label=f"{backend_name}.seed",
info="Sets seed for reproducibility",
),
# inter_op_num_threads
gr.Textbox(
value="null",
label=f"{backend_name}.inter_op_num_threads",
info="Use null for default and -1 for cpu_count()",
),
# intra_op_num_threads
gr.Textbox(
value="null",
label=f"{backend_name}.intra_op_num_threads",
info="Use null for default and -1 for cpu_count()",
),
# initial_isolation_check
gr.Checkbox(
value=True,
label=f"{backend_name}.initial_isolation_check",
info="Makes sure that initially, no other process is running on the target device",
),
# continous_isolation_check
gr.Checkbox(
value=True,
label=f"{backend_name}.continous_isolation_check",
info="Makes sure that throughout the benchmark, no other process is running on the target device",
),
# delete_cache
gr.Checkbox(
value=False,
label=f"{backend_name}.delete_cache",
info="Deletes model cache (weights & configs) after benchmark is done",
),
]
def get_pytorch_config():
return get_base_backend_config(backend_name="pytorch") + [
# no_weights
gr.Checkbox(
value=False,
label="pytorch.no_weights",
info="Generates random weights instead of downloading pretrained ones",
),
# # device_map
# gr.Dropdown(
# value="null",
#
# label="pytorch.device_map",
# choices=["null", "auto", "sequential"],
# info="Use null for default and `auto` or `sequential` the same way as in `from_pretrained`",
# ),
# torch_dtype
gr.Dropdown(
value="null",
label="pytorch.torch_dtype",
choices=["null", "bfloat16", "float16", "float32", "auto"],
info="Use null for default and `auto` for automatic dtype selection",
),
# amp_autocast
gr.Checkbox(
value=False,
label="pytorch.amp_autocast",
info="Enables Pytorch's native Automatic Mixed Precision",
),
# amp_dtype
gr.Dropdown(
value="null",
label="pytorch.amp_dtype",
info="Use null for default",
choices=["null", "bfloat16", "float16"],
),
# torch_compile
gr.Checkbox(
value=False,
label="pytorch.torch_compile",
info="Compiles the model with torch.compile",
),
# bettertransformer
gr.Checkbox(
value=False,
label="pytorch.bettertransformer",
info="Applies optimum.BetterTransformer for fastpath anf optimized attention",
),
# quantization_scheme
gr.Dropdown(
value="null",
choices=["null", "gptq", "bnb"],
label="pytorch.quantization_scheme",
info="Use null for no quantization",
),
# # use_ddp
# gr.Checkbox(
# value=False,
#
# label="pytorch.use_ddp",
# info="Uses DistributedDataParallel for multi-gpu training",
# ),
# peft_strategy
gr.Dropdown(
value="null",
choices=["null", "lora", "ada_lora", "prompt_tuning", "prefix_tuning", "p_tuning", "ia3"],
label="pytorch.peft_strategy",
info="Use null for no PEFT",
),
]
def get_onnxruntime_config():
return get_base_backend_config(backend_name="onnxruntime") + [
# no_weights
gr.Checkbox(
value=False,
label="pytorch.no_weights",
info="Generates random weights instead of downloading pretrained ones",
),
# export
gr.Checkbox(
value=True,
label="onnxruntime.export",
info="Exports the model to ONNX",
),
# use_cache
gr.Checkbox(
value=True,
label="onnxruntime.use_cache",
info="Uses cached ONNX model if available",
),
# use_merged
gr.Checkbox(
value=False,
label="onnxruntime.use_merged",
info="Uses merged ONNX model if available",
),
# torch_dtype
gr.Dropdown(
value="null",
label="onnxruntime.torch_dtype",
choices=["null", "bfloat16", "float16", "float32", "auto"],
info="Use null for default and `auto` for automatic dtype selection",
),
# use_io_binding
gr.Checkbox(
value=True,
label="onnxruntime.use_io_binding",
info="Uses IO binding for inference",
),
# auto_optimization
gr.Dropdown(
value="null",
label="onnxruntime.auto_optimization",
choices=["null", "O1", "O2", "O3", "O4"],
info="Use null for default",
),
# auto_quantization
gr.Dropdown(
value="null",
label="onnxruntime.auto_quantization",
choices=["null", "arm64", "avx2", "avx512", "avx512_vnni", "tensorrt"],
info="Use null for default",
),
# optimization
gr.Checkbox(
value=False,
label="onnxruntime.optimization",
info="Enables manual optimization",
),
# optimization_config
gr.Dataframe(
type="array",
value=[["optimization_level"]],
headers=["1"],
row_count=(1, "static"),
col_count=(1, "dynamic"),
label="onnxruntime.optimization_config",
),
# quantization
gr.Checkbox(
value=False,
label="onnxruntime.quantization",
info="Enables manual quantization",
),
# quantization_config
gr.Dataframe(
type="array",
value=[["is_static"]],
headers=[False],
row_count=(1, "static"),
col_count=(1, "dynamic"),
label="onnxruntime.quantization_config",
info="Use null for default",
),
# calibration
gr.Checkbox(
value=False,
label="onnxruntime.calibration",
info="Enables calibration",
),
# calibration_config
gr.Dataframe(
type="array",
value=[["glue"]],
headers=["dataset_name"],
row_count=(1, "static"),
col_count=(1, "dynamic"),
label="onnxruntime.calibration_config",
info="Use null for default",
),
# peft_strategy
gr.Dropdown(
value="null",
label="onnxruntime.peft_strategy",
choices=["null", "lora", "ada_lora", "prompt_tuning", "prefix_tuning", "p_tuning", "ia3"],
info="Use null for full parameters fine-tuning",
),
]
def get_openvino_config():
return get_base_backend_config(backend_name="openvino") + [
# export
gr.Checkbox(
value=True,
label="openvino.export",
info="Exports the model to ONNX",
),
# use_cache
gr.Checkbox(
value=True,
label="openvino.use_cache",
info="Uses cached ONNX model if available",
),
# use_merged
gr.Checkbox(
value=False,
label="openvino.use_merged",
info="Uses merged ONNX model if available",
),
# reshape
gr.Checkbox(
value=False,
label="openvino.reshape",
info="Reshapes the model to the input shape",
),
# half
gr.Checkbox(
value=False,
label="openvino.half",
info="Converts model to half precision",
),
# quantization
gr.Checkbox(
value=False,
label="openvino.quantization",
info="Enables quantization",
),
# quantization_config
gr.Dataframe(
type="array",
headers=["compression", "input_info", "save_onnx_model"],
value=[[None, None, None]],
row_count=(1, "static"),
col_count=(3, "dynamic"),
label="openvino.quantization_config",
),
# calibration
gr.Checkbox(
value=False,
label="openvino.calibration",
info="Enables calibration",
),
# calibration_config
gr.Dataframe(
type="array",
headers=["dataset_name"],
value=[["glue"]],
row_count=(1, "static"),
col_count=(1, "dynamic"),
label="openvino.calibration_config",
),
]
def get_neural_compressor_config():
return get_base_backend_config(backend_name="neural-compressor") + [
# ptq_quantization
gr.Checkbox(
value=False,
label="neural-compressor.ptq_quantization",
info="Enables post-training quantization",
),
# ptq_quantization_config
gr.Dataframe(
type="array",
headers=["device"],
value=[["cpu"]],
row_count=(1, "static"),
col_count=(1, "dynamic"),
label="neural-compressor.ptq_quantization_config",
),
# calibration
gr.Checkbox(
value=False,
label="neural-compressor.calibration",
info="Enables calibration",
),
# calibration_config
gr.Dataframe(
type="array",
headers=["dataset_name"],
value=[["glue"]],
row_count=(1, "static"),
col_count=(1, "dynamic"),
label="neural-compressor.calibration_config",
),
]
def get_inference_config():
return [
# duration
gr.Textbox(
value=10,
label="inference.duration",
info="Minimum duration of benchmark in seconds",
),
# warmup runs
gr.Textbox(
value=10,
label="inference.warmup_runs",
info="Number of warmup runs before measurements",
),
# memory
gr.Checkbox(
value=False,
label="inference.memory",
info="Measures the peak memory footprint",
),
# energy
gr.Checkbox(
value=False,
label="inference.energy",
info="Measures energy consumption and carbon emissions",
),
# input_shapes
gr.Dataframe(
type="array",
value=[[2, 16]],
row_count=(1, "static"),
col_count=(2, "dynamic"),
label="inference.input_shapes",
headers=["batch_size", "sequence_length"],
info="Controllable input shapes, add more columns for more inputs",
),
# forward kwargs
gr.Dataframe(
type="array",
value=[[False]],
headers=["return_dict"],
row_count=(1, "static"),
col_count=(1, "dynamic"),
label="inference.forward_kwargs",
info="Keyword arguments for the forward pass, add more columns for more arguments",
),
]
def get_training_config():
return [
# warmup steps
gr.Textbox(
value=40,
label="training.warmup_steps",
),
# dataset_shapes
gr.Dataframe(
type="array",
value=[[500, 16]],
headers=["dataset_size", "sequence_length"],
row_count=(1, "static"),
col_count=(2, "dynamic"),
label="training.dataset_shapes",
),
# training_arguments
gr.Dataframe(
value=[[2]],
type="array",
row_count=(1, "static"),
col_count=(1, "dynamic"),
label="training.training_arguments",
headers=["per_device_train_batch_size"],
),
]