Spaces:

sayakpaul
/

tensorrt-tf

Running on T4

App Files Files Community

sayakpaul HF staff commited on Dec 21, 2022

Commit

80c09f1

•

1 Parent(s): 9be97d2

add: initial files.

Browse files

Files changed (5) hide show

Dockerfile +29 -0
README.md +3 -3
app.py +88 -0
requirements.txt +2 -0
utils.py +69 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,29 @@

+FROM nvcr.io/nvidia/tensorflow:22.12-tf2-py3
+# Set the working directory to /code
+WORKDIR /code
+# Copy the current directory contents into the container at /code
+COPY ./requirements.txt /code/requirements.txt
+# Install requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+# Set up a new user named "user" with user ID 1000
+RUN useradd -m -u 1000 user
+# Switch to the "user" user
+USER user
+# Set home to the user's home directory
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH
+# Set the working directory to the user's home directory
+WORKDIR $HOME/app
+# Copy the current directory contents into the container at $HOME/app setting the owner to the user
+COPY --chown=user . $HOME/app
+# Define entrypoint.
+CMD ["python", "app.py"]

README.md CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
-title: Tensorrt Tf
-emoji: 💻
-colorFrom: purple
 colorTo: blue
 sdk: docker
 pinned: false

 ---
+title: TensorRT
+emoji: 🐬
+colorFrom: pink
 colorTo: blue
 sdk: docker
 pinned: false

app.py ADDED Viewed

	@@ -0,0 +1,88 @@

+import os
+import gradio as gr
+import tensorflow as tf
+from huggingface_hub import Repository
+from utils import benchmark, convert_to_trt
+print("Loading ResNet50 model.")
+model = tf.keras.applications.ResNet50(weights="imagenet")
+def push_to_hub(hf_token: str, push_dir: str):
+    try:
+        if hf_token is None:
+            return "No HF token provided. Model won't be pushed."
+        else:
+            repo = Repository(local_dir=push_dir, token=hf_token)
+            commit_url = repo.push_to_hub()
+            return f"Model successfully pushed: [{commit_url}]({commit_url})"
+    except Exception as e:
+        return e
+def post_optimization(list_of_strs):
+    tf_throughput, tf_trt_throughput = list_of_strs
+    benchamrk_str = f"""
+    ### TensorFlow
+    {tf_throughput}
+    ### TensorRT-optimized
+    {tf_trt_throughput}
+    ### (TensorRT) model push
+    """
+    return benchamrk_str
+def run(hf_token: str):
+    print("Serializing the ResNet50 as a SavedModel.")
+    saved_model_path = "resnet50_saved_model"
+    model.save(saved_model_path)
+    print("Converting to TensorRT.")
+    tensorrt_path = "trt_resnet50_keras"
+    convert_to_trt(saved_model_path, tensorrt_path)
+    tf_throughput = benchmark(model)
+    tf_trt_throughput = benchmark(tensorrt_path)
+    benchmark_str = post_optimization(tf_throughput, tf_trt_throughput)
+    benchmark_str += "\n"
+    benchmark_str += push_to_hub(hf_token, tensorrt_path)
+    return benchmark_str
+DESCRIPTION = """
+This Space shows how to easily optimize a [ResNet50 model from Keras](https://keras.io/api/applications/) with [TensorRT](https://developer.nvidia.com/tensorrt). TensorRT is a framework to optimize deep learning models specifically for NVIDIA hardware.
+This Space does the following things:
+* Loads a ResNet50 model from `tf.keras.applications` and serializes it as a SavedModel.
+* Performs optimizations with TensorRT.
+* Runs and displays the benchmarks to compare the throughputs of the native TensorFlow SavedModel and its TensorRT-optimized variant.
+* Optionally, pushes the optimized model to a repository on the Hugging Face Hub. For this to work, one must provide a write-access token (from [hf.co/settings/tokens](hf.co/settings/tokens)) to `your_hf_token`.
+## Notes (important)
+* For this Space to work, having access to a GPU (at least T4) is a must.
+* This Space makes use of the [Docker x Space integration](https://huggingface.co/docs/hub/spaces-sdks-docker) to perform the TensorRT optimizations.
+* The default TensorFlow installation doesn't come loaded with a correctly compiled TensorRT. This is why it's recommended to use an [NVIDIA container](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tensorflow) to perform your TensorRT-related stuff. This is why the Docker x Space integration was used in this Space.
+* To get the maximum peformance, one must use the same hardware for inference as the one used for running the optimizations. For example, if you used a T4-based machine to perform the optimizations, ensure that you're using the same GPU while running inference with your optimized model.
+* One is encouraged to try out different forms of post-training quantization as shown in [this notebook](https://github.com/tensorflow/tensorrt/blob/master/tftrt/benchmarking-python/image_classification/NGC-TFv2-TF-TRT-inference-from-Keras-saved-model.ipynb) to squeeze out the maximum performance using NVIDIA hardware and TensorRT.
+"""
+demo = gr.Interface(
+    title="Optimize a ResNet50 model from Keras with TensorRT",
+    description=DESCRIPTION,
+    allow_flagging="never",
+    inputs=[gr.Text(max_lines=1, label="your_hf_token")],
+    outputs=[gr.Markdown(label="output")],
+    fn=run,
+)
+demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ gradio==3.14.0
2	+ huggingface_hub==0.11.1

utils.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import time
+from typing import Union
+import numpy as np
+import tensorflow as tf
+from tensorflow.python.compiler.tensorrt import trt_convert as trt
+from tensorflow.python.saved_model import tag_constants
+BATCH_SIZE = 8
+BATCH_INPUT = tf.random.normal((BATCH_SIZE, 224, 224, 3))
+N_WARMUP_RUN = 50
+N_RUN = 1000
+def convert_to_trt(input_model_path: str, trt_model_path: str) -> None:
+    """Utility to convert and save an input SavedModel to an optimized TensorRT graph.
+    Args:
+        input_model_path: Path to the SavedModel to optimize.
+        trt_model_path: Path to save the converted TensorRT graph.
+    """
+    converter = trt.TrtGraphConverterV2(
+        input_saved_model_dir=input_model_path,
+        precision_mode=trt.TrtPrecisionMode.FP32,
+        max_workspace_size_bytes=8000000000,
+    )
+    converter.convert()
+    converter.save(output_saved_model_dir=trt_model_path)
+    print("Done Converting to TF-TRT FP32")
+def benchmark(model: Union[tf.keras.Model, str]) -> str:
+    """Benchmarking utility for a TensorFlow model and its optimized
+    TRT version.
+    Args:
+        model: Either a TensorFlow model of instance `tf.keras.Model` or a path to
+            the Saved TensorRT model.
+    Returns:
+        a string containing throughput information for the given model.
+    References:
+        * https://github.com/tensorflow/tensorrt/blob/master/tftrt/benchmarking-python/image_classification/NGC-TFv2-TF-TRT-inference-from-Keras-saved-model.ipynb
+    """
+    elapsed_time = []
+    if isinstance(model, tf.keras.Model):
+        predict_fn = model.predict
+    else:
+        saved_model_loaded = tf.saved_model.load(model, tags=[tag_constants.SERVING])
+        predict_fn = saved_model_loaded.signatures["serving_default"]
+    for i in range(N_WARMUP_RUN):
+        _ = predict_fn(BATCH_INPUT)
+    for i in range(N_RUN):
+        start_time = time.time()
+        _ = predict_fn(BATCH_INPUT)
+        end_time = time.time()
+        elapsed_time = np.append(elapsed_time, end_time - start_time)
+        if i % 50 == 0:
+            print("Step {}: {:4.1f}ms".format(i, (elapsed_time[-50:].mean()) * 1000))
+    return_str = "Throughput: {:.0f} images/s".format(
+        N_RUN * BATCH_SIZE / elapsed_time.sum()
+    )
+    print(return_str)
+    return return_str