import json import gradio as gr from nn_meter import load_latency_predictor cortexA76cpu_predictor = load_latency_predictor("cortexA76cpu_tflite21") adreno640gpu_predictor = load_latency_predictor("adreno640gpu_tflite21") adreno630gpu = load_latency_predictor("adreno630gpu_tflite21") myriadvpu_predictor = load_latency_predictor("myriadvpu_openvino2019r2") predictor_map = { "cortexA76cpu_tflite21": cortexA76cpu_predictor, "adreno640gpu_tflite21": adreno640gpu_predictor, "adreno630gpu_tflite21": adreno630gpu, "myriadvpu_openvino2019r2": myriadvpu_predictor } feature_for_kernel = { # remove the last two float "conv": ["HW", "CIN", "COUT", "KERNEL_SIZE", "STRIDES"], "dwconv": ["HW", "CIN", "COUT", "KERNEL_SIZE", "STRIDES"], "fc": ["CIN", "COUT"], # support up to 4 cin, if less than 4, the latter cin will be set to 0 "concat": ["HW", "CIN1", "CIN2", "CIN3", "CIN4"], # "maxpool": ["HW", "CIN", "COUT", "KERNEL_SIZE", "POOL_STRIDES"], "avgpool": ["HW", "CIN", "COUT", "KERNEL_SIZE", "POOL_STRIDES"], "split": ["HW", "CIN"], "channelshuffle": ["HW", "CIN"], "se": ["HW", "CIN"], "global-avgpool": ["HW", "CIN"], "bnrelu": ["HW", "CIN"], "bn": ["HW", "CIN"], "hswish": ["HW", "CIN"], "relu": ["HW", "CIN"], "addrelu": ["HW", "CIN1", "CIN2"], "add": ["HW", "CIN1", "CIN2"], } def get_type(str): operate_type = str.split("-")[0] if operate_type == 'global' or operate_type == 'gap': operate_type = 'global-avgpool' return operate_type def get_configuration(operate_type, value_arr): feature_arr = feature_for_kernel[operate_type] if operate_type == 'concat': configuration_arr = [] for i in range(len(feature_arr)): if value_arr[i] != 0: configuration_arr.append(feature_arr[i]+"="+str(value_arr[i])) else: break else: configuration_arr = [feature_arr[i]+"="+str(value_arr[i]) for i in range(min(len(feature_arr),len(value_arr)))] return ', '.join(configuration_arr) def data_process(data): new_data = [] for item in data: operate_type = get_type(item[1]) new_item = { "order": item[0], "type": operate_type, "configuration": get_configuration(operate_type, item[2]), "latency": item[3], "name": item[4], } new_data.append(new_item) return new_data def generate_html(hardware, latency, block_detail): data = data_process(block_detail) doc = """
Excution Order | Operator Type | Configuration | Latency (ms) | Detail Operator |
---|
nn-Meter: towards accurate latency prediction of deep-learning model inference on diverse edge devices | Github Repo
" examples =[ ["samples/mobilenetv3small_0.pb", "cortexA76cpu_tflite21"], ["samples/mobilenetv3small_0.onnx", "adreno640gpu_tflite21"], ["samples/mobilenetv3small_0.json", "adreno630gpu_tflite21"] ] inputs = [ gr.inputs.File(label="Model File"), gr.inputs.Radio(choices=["cortexA76cpu_tflite21", "adreno640gpu_tflite21", "adreno630gpu_tflite21", "myriadvpu_openvino2019r2"], label="Device"), ] outputs = gr.outputs.HTML() iface = gr.Interface(fn=get_latency, inputs=inputs, outputs=outputs, title=title, description=description, article=article, examples=examples, allow_flagging="auto", css=""" div[id="6"] { flex-direction: column; } div[id="12"] { margin-left: 0px !important; margin-top: 0.75em !important; } div[id="12"] iframe{ height: 80vh !important; } """) iface.launch()