import gradio as gr def get_base_backend_config(backend_name="pytorch"): return [ # seed gr.Textbox( value=42, label=f"{backend_name}.seed", info="Sets seed for reproducibility", ), # inter_op_num_threads gr.Textbox( value="null", label=f"{backend_name}.inter_op_num_threads", info="Use null for default and -1 for cpu_count()", ), # intra_op_num_threads gr.Textbox( value="null", label=f"{backend_name}.intra_op_num_threads", info="Use null for default and -1 for cpu_count()", ), # initial_isolation_check gr.Checkbox( value=True, label=f"{backend_name}.initial_isolation_check", info="Makes sure that initially, no other process is running on the target device", ), # continous_isolation_check gr.Checkbox( value=True, label=f"{backend_name}.continous_isolation_check", info="Makes sure that throughout the benchmark, no other process is running on the target device", ), # delete_cache gr.Checkbox( value=False, label=f"{backend_name}.delete_cache", info="Deletes model cache (weights & configs) after benchmark is done", ), ] def get_pytorch_config(): return get_base_backend_config(backend_name="pytorch") + [ # no_weights gr.Checkbox( value=False, label="pytorch.no_weights", info="Generates random weights instead of downloading pretrained ones", ), # # device_map # gr.Dropdown( # value="null", # # label="pytorch.device_map", # choices=["null", "auto", "sequential"], # info="Use null for default and `auto` or `sequential` the same way as in `from_pretrained`", # ), # torch_dtype gr.Dropdown( value="null", label="pytorch.torch_dtype", choices=["null", "bfloat16", "float16", "float32", "auto"], info="Use null for default and `auto` for automatic dtype selection", ), # amp_autocast gr.Checkbox( value=False, label="pytorch.amp_autocast", info="Enables Pytorch's native Automatic Mixed Precision", ), # amp_dtype gr.Dropdown( value="null", label="pytorch.amp_dtype", info="Use null for default", choices=["null", "bfloat16", "float16"], ), # torch_compile gr.Checkbox( value=False, label="pytorch.torch_compile", info="Compiles the model with torch.compile", ), # bettertransformer gr.Checkbox( value=False, label="pytorch.bettertransformer", info="Applies optimum.BetterTransformer for fastpath anf optimized attention", ), # quantization_scheme gr.Dropdown( value="null", choices=["null", "gptq", "bnb"], label="pytorch.quantization_scheme", info="Use null for no quantization", ), # # use_ddp # gr.Checkbox( # value=False, # # label="pytorch.use_ddp", # info="Uses DistributedDataParallel for multi-gpu training", # ), # peft_strategy gr.Dropdown( value="null", choices=["null", "lora", "ada_lora", "prompt_tuning", "prefix_tuning", "p_tuning", "ia3"], label="pytorch.peft_strategy", info="Use null for no PEFT", ), ] def get_onnxruntime_config(): return get_base_backend_config(backend_name="onnxruntime") + [ # no_weights gr.Checkbox( value=False, label="pytorch.no_weights", info="Generates random weights instead of downloading pretrained ones", ), # export gr.Checkbox( value=True, label="onnxruntime.export", info="Exports the model to ONNX", ), # use_cache gr.Checkbox( value=True, label="onnxruntime.use_cache", info="Uses cached ONNX model if available", ), # use_merged gr.Checkbox( value=False, label="onnxruntime.use_merged", info="Uses merged ONNX model if available", ), # torch_dtype gr.Dropdown( value="null", label="onnxruntime.torch_dtype", choices=["null", "bfloat16", "float16", "float32", "auto"], info="Use null for default and `auto` for automatic dtype selection", ), # use_io_binding gr.Checkbox( value=True, label="onnxruntime.use_io_binding", info="Uses IO binding for inference", ), # auto_optimization gr.Dropdown( value="null", label="onnxruntime.auto_optimization", choices=["null", "O1", "O2", "O3", "O4"], info="Use null for default", ), # auto_quantization gr.Dropdown( value="null", label="onnxruntime.auto_quantization", choices=["null", "arm64", "avx2", "avx512", "avx512_vnni", "tensorrt"], info="Use null for default", ), # optimization gr.Checkbox( value=False, label="onnxruntime.optimization", info="Enables manual optimization", ), # optimization_config gr.Dataframe( type="array", value=[["optimization_level"]], headers=["1"], row_count=(1, "static"), col_count=(1, "dynamic"), label="onnxruntime.optimization_config", ), # quantization gr.Checkbox( value=False, label="onnxruntime.quantization", info="Enables manual quantization", ), # quantization_config gr.Dataframe( type="array", value=[["is_static"]], headers=[False], row_count=(1, "static"), col_count=(1, "dynamic"), label="onnxruntime.quantization_config", info="Use null for default", ), # calibration gr.Checkbox( value=False, label="onnxruntime.calibration", info="Enables calibration", ), # calibration_config gr.Dataframe( type="array", value=[["glue"]], headers=["dataset_name"], row_count=(1, "static"), col_count=(1, "dynamic"), label="onnxruntime.calibration_config", info="Use null for default", ), # peft_strategy gr.Dropdown( value="null", label="onnxruntime.peft_strategy", choices=["null", "lora", "ada_lora", "prompt_tuning", "prefix_tuning", "p_tuning", "ia3"], info="Use null for full parameters fine-tuning", ), ] def get_openvino_config(): return get_base_backend_config(backend_name="openvino") + [ # export gr.Checkbox( value=True, label="openvino.export", info="Exports the model to ONNX", ), # use_cache gr.Checkbox( value=True, label="openvino.use_cache", info="Uses cached ONNX model if available", ), # use_merged gr.Checkbox( value=False, label="openvino.use_merged", info="Uses merged ONNX model if available", ), # reshape gr.Checkbox( value=False, label="openvino.reshape", info="Reshapes the model to the input shape", ), # half gr.Checkbox( value=False, label="openvino.half", info="Converts model to half precision", ), # quantization gr.Checkbox( value=False, label="openvino.quantization", info="Enables quantization", ), # quantization_config gr.Dataframe( type="array", headers=["compression", "input_info", "save_onnx_model"], value=[[None, None, None]], row_count=(1, "static"), col_count=(3, "dynamic"), label="openvino.quantization_config", ), # calibration gr.Checkbox( value=False, label="openvino.calibration", info="Enables calibration", ), # calibration_config gr.Dataframe( type="array", headers=["dataset_name"], value=[["glue"]], row_count=(1, "static"), col_count=(1, "dynamic"), label="openvino.calibration_config", ), ] def get_neural_compressor_config(): return get_base_backend_config(backend_name="neural-compressor") + [ # ptq_quantization gr.Checkbox( value=False, label="neural-compressor.ptq_quantization", info="Enables post-training quantization", ), # ptq_quantization_config gr.Dataframe( type="array", headers=["device"], value=[["cpu"]], row_count=(1, "static"), col_count=(1, "dynamic"), label="neural-compressor.ptq_quantization_config", ), # calibration gr.Checkbox( value=False, label="neural-compressor.calibration", info="Enables calibration", ), # calibration_config gr.Dataframe( type="array", headers=["dataset_name"], value=[["glue"]], row_count=(1, "static"), col_count=(1, "dynamic"), label="neural-compressor.calibration_config", ), ] def get_inference_config(): return [ # duration gr.Textbox( value=10, label="inference.duration", info="Minimum duration of benchmark in seconds", ), # warmup runs gr.Textbox( value=10, label="inference.warmup_runs", info="Number of warmup runs before measurements", ), # memory gr.Checkbox( value=False, label="inference.memory", info="Measures the peak memory footprint", ), # energy gr.Checkbox( value=False, label="inference.energy", info="Measures energy consumption and carbon emissions", ), # input_shapes gr.Dataframe( type="array", value=[[2, 16]], row_count=(1, "static"), col_count=(2, "dynamic"), label="inference.input_shapes", headers=["batch_size", "sequence_length"], info="Controllable input shapes, add more columns for more inputs", ), # forward kwargs gr.Dataframe( type="array", value=[[False]], headers=["return_dict"], row_count=(1, "static"), col_count=(1, "dynamic"), label="inference.forward_kwargs", info="Keyword arguments for the forward pass, add more columns for more arguments", ), ] def get_training_config(): return [ # warmup steps gr.Textbox( value=40, label="training.warmup_steps", ), # dataset_shapes gr.Dataframe( type="array", value=[[500, 16]], headers=["dataset_size", "sequence_length"], row_count=(1, "static"), col_count=(2, "dynamic"), label="training.dataset_shapes", ), # training_arguments gr.Dataframe( value=[[2]], type="array", row_count=(1, "static"), col_count=(1, "dynamic"), label="training.training_arguments", headers=["per_device_train_batch_size"], ), ]