import os import time import traceback from typing import Optional from config_store import ( get_process_config, get_inference_config, get_openvino_config, get_pytorch_config, ) import gradio as gr from huggingface_hub import whoami from huggingface_hub.errors import GatedRepoError from gradio_huggingfacehub_search import HuggingfaceHubSearch from optimum_benchmark.launchers.device_isolation_utils import * # noqa from optimum_benchmark.backends.openvino.utils import TASKS_TO_OVMODEL from optimum_benchmark.backends.transformers_utils import TASKS_TO_MODEL_LOADERS from optimum_benchmark import ( Benchmark, BenchmarkConfig, InferenceConfig, ProcessConfig, PyTorchConfig, OVConfig, ) from optimum_benchmark.logging_utils import setup_logging from optimum_benchmark.task_utils import infer_task_from_model_name_or_path DEVICE = "cpu" LAUNCHER = "process" SCENARIO = "inference" BACKENDS = ["pytorch", "openvino"] BENCHMARKS_HF_TOKEN = os.getenv("BENCHMARKS_HF_TOKEN") BENCHMARKS_REPO_ID = "optimum-benchmark/OpenVINO-Benchmarks" TASKS = set(TASKS_TO_OVMODEL.keys()) & set(TASKS_TO_MODEL_LOADERS.keys()) def parse_configs(inputs): configs = {"process": {}, "inference": {}, "pytorch": {}, "openvino": {}} for key, value in inputs.items(): if key.label == "model": model = value elif key.label == "task": task = value elif "." in key.label: backend, argument = key.label.split(".") configs[backend][argument] = value else: continue for key in configs.keys(): for k, v in configs[key].items(): if k in ["input_shapes", "generate_kwargs", "numactl_kwargs"]: configs[key][k] = eval(v) configs["process"] = ProcessConfig(**configs.pop("process")) configs["inference"] = InferenceConfig(**configs.pop("inference")) configs["pytorch"] = PyTorchConfig( task=task, model=model, device=DEVICE, **configs["pytorch"], ) configs["openvino"] = OVConfig( task=task, model=model, device=DEVICE, **configs["openvino"], ) return configs def run_benchmark(inputs, oauth_token: Optional[gr.OAuthToken]): if oauth_token is None: raise gr.Error("Please login to be able to run the benchmark.") timestamp = time.strftime("%Y-%m-%d-%H-%M-%S") use_name = whoami(oauth_token.token)["name"] folder = f"{use_name}/{timestamp}" gr.Info(f"📩 Benchmark will be saved under {BENCHMARKS_REPO_ID}/{folder}") outputs = {backend: "Running..." for backend in BACKENDS} configs = parse_configs(inputs) yield tuple(outputs[b] for b in BACKENDS) for backend in BACKENDS: try: benchmark_name = f"{folder}/{backend}" benchmark_config = BenchmarkConfig( name=benchmark_name, backend=configs[backend], launcher=configs[LAUNCHER], scenario=configs[SCENARIO], ) benchmark_report = Benchmark.launch(benchmark_config) benchmark_config.push_to_hub( repo_id=BENCHMARKS_REPO_ID, subfolder=benchmark_name, token=BENCHMARKS_HF_TOKEN, ) benchmark_report.push_to_hub( repo_id=BENCHMARKS_REPO_ID, subfolder=benchmark_name, token=BENCHMARKS_HF_TOKEN, ) except GatedRepoError: outputs[backend] = f"🔒 Model {configs[backend].model} is gated." yield tuple(outputs[b] for b in BACKENDS) gr.Info("🔒 Gated Repo Error while trying to access the model.") except Exception: outputs[backend] = f"\n```python-traceback\n{traceback.format_exc()}```\n" yield tuple(outputs[b] for b in BACKENDS) gr.Info(f"❌ Error while running benchmark for {backend} backend.") else: outputs[backend] = f"\n{benchmark_report.to_markdown_text()}\n" yield tuple(outputs[b] for b in BACKENDS) gr.Info(f"✅ Benchmark for {backend} backend ran successfully.") def update_task(model_id): try: inferred_task = infer_task_from_model_name_or_path(model_id) except GatedRepoError: raise gr.Error( f"Model {model_id} is gated, please use optimum-benchmark locally to benchmark it." ) except Exception: raise gr.Error( f"Error while inferring task for {model_id}, please select a task manually." ) if inferred_task not in TASKS: raise gr.Error( f"Task {inferred_task} is not supported by OpenVINO, please select a task manually." ) return inferred_task with gr.Blocks() as demo: # add login button gr.LoginButton() # add image gr.HTML( """""" "
"
"This Space uses Optimum-Benchmark to automatically benchmark a model from the Hub on different backends."
"
The results (config and report) will be pushed under your namespace in a benchmark repository on the Hub."
"