Spaces:

sayakpaul
/

tensorrt-tf

Sleeping

App Files Files Community

sayakpaul HF staff commited on Dec 22, 2022

Commit

bf49c9f

•

1 Parent(s): c37b8d0

fix: remove benchmarking and kept it simple.

Browse files

Files changed (2) hide show

main.py +21 -9
utils.py +0 -51

main.py CHANGED Viewed

@@ -2,9 +2,9 @@ from typing import List
 import gradio as gr
 import tensorflow as tf
-from huggingface_hub import HfApi, Repository, create_repo
-from utils import benchmark, convert_to_trt
 DESCRIPTION = """
 This Space shows how to easily optimize a [ResNet50 model from Keras](https://keras.io/api/applications/) with [TensorRT](https://developer.nvidia.com/tensorrt). TensorRT is a framework to optimize deep learning models specifically for NVIDIA hardware.
@@ -12,8 +12,8 @@ This Space does the following things:
 * Loads a ResNet50 model from `tf.keras.applications` and serializes it as a SavedModel.
 * Performs optimizations with TensorRT.
-* Runs and displays the benchmarks to compare the throughputs of the native TensorFlow SavedModel and its TensorRT-optimized variant.
-* Optionally, pushes the optimized model to a repository on the Hugging Face Hub. For this to work, one must provide a write-access token (from hf.co/settings/tokens) to `your_hf_token`.
 As a consequence, you might have to wait for a few minutes to note the results.
@@ -34,16 +34,16 @@ model = tf.keras.applications.ResNet50(weights="imagenet")
 def push_to_hub(hf_token: str, push_dir: str) -> str:
     try:
         if hf_token == "":
-            return "### No HF token provided. Model won't be pushed."
         else:
             hf_api = HfApi(token=hf_token)
             user = hf_api.whoami()["name"]
             repo_id = f"{user}/{push_dir}"
             _ = create_repo(repo_id=repo_id, token=hf_token)
             url = hf_api.upload_folder(folder_path=push_dir, repo_id=repo_id)
-            return f"### Model successfully pushed: [{url}]({url})"
     except Exception as e:
-        return f"### {e}"
 def post_optimization(list_of_strs: List[str]) -> str:
@@ -57,6 +57,18 @@ def post_optimization(list_of_strs: List[str]) -> str:
     {tf_trt_throughput}
     ### (TensorRT) model push
     """
     return benchamrk_str
@@ -71,8 +83,8 @@ def run(hf_token: str) -> str:
     tensorrt_path = "trt_resnet50_keras"
     convert_to_trt(saved_model_path, tensorrt_path)
-    tf_throughput = benchmark(model)
-    tf_trt_throughput = benchmark(tensorrt_path)
     benchmark_str = post_optimization([tf_throughput, tf_trt_throughput])
     benchmark_str += "\n"

 import gradio as gr
 import tensorflow as tf
+from huggingface_hub import HfApi, create_repo
+from utils import convert_to_trt
 DESCRIPTION = """
 This Space shows how to easily optimize a [ResNet50 model from Keras](https://keras.io/api/applications/) with [TensorRT](https://developer.nvidia.com/tensorrt). TensorRT is a framework to optimize deep learning models specifically for NVIDIA hardware.
 * Loads a ResNet50 model from `tf.keras.applications` and serializes it as a SavedModel.
 * Performs optimizations with TensorRT.
+* Displays the benchmarks to compare the throughputs of the native TensorFlow SavedModel and its TensorRT-optimized variant.
+* Pushes the optimized model to a repository on the Hugging Face Hub. For this to work, one must provide a write-access token (from hf.co/settings/tokens) to `your_hf_token`.
 As a consequence, you might have to wait for a few minutes to note the results.
 def push_to_hub(hf_token: str, push_dir: str) -> str:
     try:
         if hf_token == "":
+            return "No HF token provided. Model won't be pushed."
         else:
             hf_api = HfApi(token=hf_token)
             user = hf_api.whoami()["name"]
             repo_id = f"{user}/{push_dir}"
             _ = create_repo(repo_id=repo_id, token=hf_token)
             url = hf_api.upload_folder(folder_path=push_dir, repo_id=repo_id)
+            return f"Model successfully pushed: [{url}]({url})"
     except Exception as e:
+        return f"{e}"
 def post_optimization(list_of_strs: List[str]) -> str:
     {tf_trt_throughput}
+    ### Benchmarking information
+    | Parameter | Value |
+    |:---:|:---:|
+    | OS | Ubuntu 20.04.5 |
+    | Python | 3.8.10 |
+    | CUDA | 11.8 |
+    | TensorFlow | 2.10.1 |
+    | TensorRT | 8.5.1 |
+    | GPU | T4 |
+    | Benchmarking Script | [Link](https://gist.github.com/sayakpaul/ba4a4c47fcc661b9d18ea3b53e51f82e) |
     ### (TensorRT) model push
     """
     return benchamrk_str
     tensorrt_path = "trt_resnet50_keras"
     convert_to_trt(saved_model_path, tensorrt_path)
+    tf_throughput = "Throughput: 89 images/s"
+    tf_trt_throughput = "Throughput: 497 images/s"
     benchmark_str = post_optimization([tf_throughput, tf_trt_throughput])
     benchmark_str += "\n"

utils.py CHANGED Viewed

@@ -1,15 +1,4 @@
-import time
-from typing import Union
-import numpy as np
-import tensorflow as tf
 from tensorflow.python.compiler.tensorrt import trt_convert as trt
-from tensorflow.python.saved_model import tag_constants
-BATCH_SIZE = 8
-BATCH_INPUT = tf.random.normal((BATCH_SIZE, 224, 224, 3))
-N_WARMUP_RUN = 25
-N_RUN = 100
 def convert_to_trt(input_model_path: str, trt_model_path: str) -> None:
@@ -27,43 +16,3 @@ def convert_to_trt(input_model_path: str, trt_model_path: str) -> None:
     converter.convert()
     converter.save(output_saved_model_dir=trt_model_path)
     print("Done Converting to TF-TRT FP32")
-def benchmark(model: Union[tf.keras.Model, str]) -> str:
-    """Benchmarking utility for a TensorFlow model and its optimized
-    TRT version.
-    Args:
-        model: Either a TensorFlow model of instance `tf.keras.Model` or a path to
-            the Saved TensorRT model.
-    Returns:
-        a string containing throughput information for the given model.
-    References:
-        * https://github.com/tensorflow/tensorrt/blob/master/tftrt/benchmarking-python/image_classification/NGC-TFv2-TF-TRT-inference-from-Keras-saved-model.ipynb
-    """
-    elapsed_time = []
-    if isinstance(model, tf.keras.Model):
-        predict_fn = model.predict
-    else:
-        saved_model_loaded = tf.saved_model.load(model, tags=[tag_constants.SERVING])
-        predict_fn = saved_model_loaded.signatures["serving_default"]
-    for i in range(N_WARMUP_RUN):
-        _ = predict_fn(BATCH_INPUT)
-    for i in range(N_RUN):
-        start_time = time.time()
-        _ = predict_fn(BATCH_INPUT)
-        end_time = time.time()
-        elapsed_time = np.append(elapsed_time, end_time - start_time)
-        if i % 50 == 0:
-            print("Step {}: {:4.1f}ms".format(i, (elapsed_time[-50:].mean()) * 1000))
-    return_str = "Throughput: {:.0f} images/s".format(
-        N_RUN * BATCH_SIZE / elapsed_time.sum()
-    )
-    print(return_str)
-    return return_str

 from tensorflow.python.compiler.tensorrt import trt_convert as trt
 def convert_to_trt(input_model_path: str, trt_model_path: str) -> None:
     converter.convert()
     converter.save(output_saved_model_dir=trt_model_path)
     print("Done Converting to TF-TRT FP32")