Spaces:

optimum
/

auto-benchmark

Running

App Files Files Community

auto-benchmark / config_store.py

IlyasMoutawwakil HF staff

remove tgi

eabde51 over 1 year ago

raw

history blame

12.5 kB

	import gradio as gr


	def get_base_backend_config(backend_name="pytorch"):
	return [
	# seed
	gr.Textbox(
	value=42,
	label=f"{backend_name}.seed",
	info="Sets seed for reproducibility",
	),
	# inter_op_num_threads
	gr.Textbox(
	value="null",
	label=f"{backend_name}.inter_op_num_threads",
	info="Use null for default and -1 for cpu_count()",
	),
	# intra_op_num_threads
	gr.Textbox(
	value="null",
	label=f"{backend_name}.intra_op_num_threads",
	info="Use null for default and -1 for cpu_count()",
	),
	# initial_isolation_check
	gr.Checkbox(
	value=True,
	label=f"{backend_name}.initial_isolation_check",
	info="Makes sure that initially, no other process is running on the target device",
	),
	# continous_isolation_check
	gr.Checkbox(
	value=True,
	label=f"{backend_name}.continous_isolation_check",
	info="Makes sure that throughout the benchmark, no other process is running on the target device",
	),
	# delete_cache
	gr.Checkbox(
	value=False,
	label=f"{backend_name}.delete_cache",
	info="Deletes model cache (weights & configs) after benchmark is done",
	),
	]


	def get_pytorch_config():
	return get_base_backend_config(backend_name="pytorch") + [
	# no_weights
	gr.Checkbox(
	value=False,
	label="pytorch.no_weights",
	info="Generates random weights instead of downloading pretrained ones",
	),
	# # device_map
	# gr.Dropdown(
	# value="null",
	#
	# label="pytorch.device_map",
	# choices=["null", "auto", "sequential"],
	# info="Use null for default and `auto` or `sequential` the same way as in `from_pretrained`",
	# ),
	# torch_dtype
	gr.Dropdown(
	value="null",
	label="pytorch.torch_dtype",
	choices=["null", "bfloat16", "float16", "float32", "auto"],
	info="Use null for default and `auto` for automatic dtype selection",
	),
	# amp_autocast
	gr.Checkbox(
	value=False,
	label="pytorch.amp_autocast",
	info="Enables Pytorch's native Automatic Mixed Precision",
	),
	# amp_dtype
	gr.Dropdown(
	value="null",
	label="pytorch.amp_dtype",
	info="Use null for default",
	choices=["null", "bfloat16", "float16"],
	),
	# torch_compile
	gr.Checkbox(
	value=False,
	label="pytorch.torch_compile",
	info="Compiles the model with torch.compile",
	),
	# bettertransformer
	gr.Checkbox(
	value=False,
	label="pytorch.bettertransformer",
	info="Applies optimum.BetterTransformer for fastpath anf optimized attention",
	),
	# quantization_scheme
	gr.Dropdown(
	value="null",
	choices=["null", "gptq", "bnb"],
	label="pytorch.quantization_scheme",
	info="Use null for no quantization",
	),
	# # use_ddp
	# gr.Checkbox(
	# value=False,
	#
	# label="pytorch.use_ddp",
	# info="Uses DistributedDataParallel for multi-gpu training",
	# ),
	# peft_strategy
	gr.Dropdown(
	value="null",
	choices=["null", "lora", "ada_lora", "prompt_tuning", "prefix_tuning", "p_tuning", "ia3"],
	label="pytorch.peft_strategy",
	info="Use null for no PEFT",
	),
	]


	def get_onnxruntime_config():
	return get_base_backend_config(backend_name="onnxruntime") + [
	# no_weights
	gr.Checkbox(
	value=False,
	label="pytorch.no_weights",
	info="Generates random weights instead of downloading pretrained ones",
	),
	# export
	gr.Checkbox(
	value=True,
	label="onnxruntime.export",
	info="Exports the model to ONNX",
	),
	# use_cache
	gr.Checkbox(
	value=True,
	label="onnxruntime.use_cache",
	info="Uses cached ONNX model if available",
	),
	# use_merged
	gr.Checkbox(
	value=False,
	label="onnxruntime.use_merged",
	info="Uses merged ONNX model if available",
	),
	# torch_dtype
	gr.Dropdown(
	value="null",
	label="onnxruntime.torch_dtype",
	choices=["null", "bfloat16", "float16", "float32", "auto"],
	info="Use null for default and `auto` for automatic dtype selection",
	),
	# use_io_binding
	gr.Checkbox(
	value=True,
	label="onnxruntime.use_io_binding",
	info="Uses IO binding for inference",
	),
	# auto_optimization
	gr.Dropdown(
	value="null",
	label="onnxruntime.auto_optimization",
	choices=["null", "O1", "O2", "O3", "O4"],
	info="Use null for default",
	),
	# auto_quantization
	gr.Dropdown(
	value="null",
	label="onnxruntime.auto_quantization",
	choices=["null", "arm64", "avx2", "avx512", "avx512_vnni", "tensorrt"],
	info="Use null for default",
	),
	# optimization
	gr.Checkbox(
	value=False,
	label="onnxruntime.optimization",
	info="Enables manual optimization",
	),
	# optimization_config
	gr.Dataframe(
	type="array",
	value=[["optimization_level"]],
	headers=["1"],
	row_count=(1, "static"),
	col_count=(1, "dynamic"),
	label="onnxruntime.optimization_config",
	),
	# quantization
	gr.Checkbox(
	value=False,
	label="onnxruntime.quantization",
	info="Enables manual quantization",
	),
	# quantization_config
	gr.Dataframe(
	type="array",
	value=[["is_static"]],
	headers=[False],
	row_count=(1, "static"),
	col_count=(1, "dynamic"),
	label="onnxruntime.quantization_config",
	info="Use null for default",
	),
	# calibration
	gr.Checkbox(
	value=False,
	label="onnxruntime.calibration",
	info="Enables calibration",
	),
	# calibration_config
	gr.Dataframe(
	type="array",
	value=[["glue"]],
	headers=["dataset_name"],
	row_count=(1, "static"),
	col_count=(1, "dynamic"),
	label="onnxruntime.calibration_config",
	info="Use null for default",
	),
	# peft_strategy
	gr.Dropdown(
	value="null",
	label="onnxruntime.peft_strategy",
	choices=["null", "lora", "ada_lora", "prompt_tuning", "prefix_tuning", "p_tuning", "ia3"],
	info="Use null for full parameters fine-tuning",
	),
	]


	def get_openvino_config():
	return get_base_backend_config(backend_name="openvino") + [
	# export
	gr.Checkbox(
	value=True,
	label="openvino.export",
	info="Exports the model to ONNX",
	),
	# use_cache
	gr.Checkbox(
	value=True,
	label="openvino.use_cache",
	info="Uses cached ONNX model if available",
	),
	# use_merged
	gr.Checkbox(
	value=False,
	label="openvino.use_merged",
	info="Uses merged ONNX model if available",
	),
	# reshape
	gr.Checkbox(
	value=False,
	label="openvino.reshape",
	info="Reshapes the model to the input shape",
	),
	# half
	gr.Checkbox(
	value=False,
	label="openvino.half",
	info="Converts model to half precision",
	),
	# quantization
	gr.Checkbox(
	value=False,
	label="openvino.quantization",
	info="Enables quantization",
	),
	# quantization_config
	gr.Dataframe(
	type="array",
	headers=["compression", "input_info", "save_onnx_model"],
	value=[[None, None, None]],
	row_count=(1, "static"),
	col_count=(3, "dynamic"),
	label="openvino.quantization_config",
	),
	# calibration
	gr.Checkbox(
	value=False,
	label="openvino.calibration",
	info="Enables calibration",
	),
	# calibration_config
	gr.Dataframe(
	type="array",
	headers=["dataset_name"],
	value=[["glue"]],
	row_count=(1, "static"),
	col_count=(1, "dynamic"),
	label="openvino.calibration_config",
	),
	]


	def get_neural_compressor_config():
	return get_base_backend_config(backend_name="neural-compressor") + [
	# ptq_quantization
	gr.Checkbox(
	value=False,
	label="neural-compressor.ptq_quantization",
	info="Enables post-training quantization",
	),
	# ptq_quantization_config
	gr.Dataframe(
	type="array",
	headers=["device"],
	value=[["cpu"]],
	row_count=(1, "static"),
	col_count=(1, "dynamic"),
	label="neural-compressor.ptq_quantization_config",
	),
	# calibration
	gr.Checkbox(
	value=False,
	label="neural-compressor.calibration",
	info="Enables calibration",
	),
	# calibration_config
	gr.Dataframe(
	type="array",
	headers=["dataset_name"],
	value=[["glue"]],
	row_count=(1, "static"),
	col_count=(1, "dynamic"),
	label="neural-compressor.calibration_config",
	),
	]


	def get_inference_config():
	return [
	# duration
	gr.Textbox(
	value=10,
	label="inference.duration",
	info="Minimum duration of benchmark in seconds",
	),
	# warmup runs
	gr.Textbox(
	value=10,
	label="inference.warmup_runs",
	info="Number of warmup runs before measurements",
	),
	# memory
	gr.Checkbox(
	value=False,
	label="inference.memory",
	info="Measures the peak memory footprint",
	),
	# energy
	gr.Checkbox(
	value=False,
	label="inference.energy",
	info="Measures energy consumption and carbon emissions",
	),
	# input_shapes
	gr.Dataframe(
	type="array",
	value=[[2, 16]],
	row_count=(1, "static"),
	col_count=(2, "dynamic"),
	label="inference.input_shapes",
	headers=["batch_size", "sequence_length"],
	info="Controllable input shapes, add more columns for more inputs",
	),
	# forward kwargs
	gr.Dataframe(
	type="array",
	value=[[False]],
	headers=["return_dict"],
	row_count=(1, "static"),
	col_count=(1, "dynamic"),
	label="inference.forward_kwargs",
	info="Keyword arguments for the forward pass, add more columns for more arguments",
	),
	]


	def get_training_config():
	return [
	# warmup steps
	gr.Textbox(
	value=40,
	label="training.warmup_steps",
	),
	# dataset_shapes
	gr.Dataframe(
	type="array",
	value=[[500, 16]],
	headers=["dataset_size", "sequence_length"],
	row_count=(1, "static"),
	col_count=(2, "dynamic"),
	label="training.dataset_shapes",
	),
	# training_arguments
	gr.Dataframe(
	value=[[2]],
	type="array",
	row_count=(1, "static"),
	col_count=(1, "dynamic"),
	label="training.training_arguments",
	headers=["per_device_train_batch_size"],
	),
	]