Spaces:

sayakpaul
/

tensorrt-tf

Sleeping

App Files Files Community

sayakpaul HF staff commited on Dec 21, 2022

Commit

ef59ad0

•

1 Parent(s): 80c09f1

fix: Dockerfile.

Browse files

Files changed (2) hide show

Dockerfile +3 -2
app.py → main.py +34 -30

Dockerfile CHANGED Viewed

@@ -1,4 +1,5 @@
-FROM nvcr.io/nvidia/tensorflow:22.12-tf2-py3
 # Set the working directory to /code
 WORKDIR /code
@@ -26,4 +27,4 @@ WORKDIR $HOME/app
 COPY --chown=user . $HOME/app
 # Define entrypoint.
-CMD ["python", "app.py"]

+# nvcr.io/nvidia/tensorflow:22.12-tf2-py3 often leads to rate-limiting issues. Hence the personal clone.
+FROM spsayakpaul/nvidia-tf-trt:22.12-tf2-py3
 # Set the working directory to /code
 WORKDIR /code
 COPY --chown=user . $HOME/app
 # Define entrypoint.
+CMD ["python", "main.py"]

app.py → main.py RENAMED Viewed

@@ -1,4 +1,4 @@
-import os
 import gradio as gr
 import tensorflow as tf
@@ -6,11 +6,29 @@ from huggingface_hub import Repository
 from utils import benchmark, convert_to_trt
 print("Loading ResNet50 model.")
 model = tf.keras.applications.ResNet50(weights="imagenet")
-def push_to_hub(hf_token: str, push_dir: str):
     try:
         if hf_token is None:
             return "No HF token provided. Model won't be pushed."
@@ -22,7 +40,7 @@ def push_to_hub(hf_token: str, push_dir: str):
         return e
-def post_optimization(list_of_strs):
     tf_throughput, tf_trt_throughput = list_of_strs
     benchamrk_str = f"""
     ### TensorFlow
@@ -38,7 +56,7 @@ def post_optimization(list_of_strs):
     return benchamrk_str
-def run(hf_token: str):
     print("Serializing the ResNet50 as a SavedModel.")
     saved_model_path = "resnet50_saved_model"
     model.save(saved_model_path)
@@ -57,32 +75,18 @@ def run(hf_token: str):
     return benchmark_str
-DESCRIPTION = """
-This Space shows how to easily optimize a [ResNet50 model from Keras](https://keras.io/api/applications/) with [TensorRT](https://developer.nvidia.com/tensorrt). TensorRT is a framework to optimize deep learning models specifically for NVIDIA hardware.
-This Space does the following things:
-* Loads a ResNet50 model from `tf.keras.applications` and serializes it as a SavedModel.
-* Performs optimizations with TensorRT.
-* Runs and displays the benchmarks to compare the throughputs of the native TensorFlow SavedModel and its TensorRT-optimized variant.
-* Optionally, pushes the optimized model to a repository on the Hugging Face Hub. For this to work, one must provide a write-access token (from [hf.co/settings/tokens](hf.co/settings/tokens)) to `your_hf_token`.
-## Notes (important)
-* For this Space to work, having access to a GPU (at least T4) is a must.
-* This Space makes use of the [Docker x Space integration](https://huggingface.co/docs/hub/spaces-sdks-docker) to perform the TensorRT optimizations.
-* The default TensorFlow installation doesn't come loaded with a correctly compiled TensorRT. This is why it's recommended to use an [NVIDIA container](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tensorflow) to perform your TensorRT-related stuff. This is why the Docker x Space integration was used in this Space.
-* To get the maximum peformance, one must use the same hardware for inference as the one used for running the optimizations. For example, if you used a T4-based machine to perform the optimizations, ensure that you're using the same GPU while running inference with your optimized model.
-* One is encouraged to try out different forms of post-training quantization as shown in [this notebook](https://github.com/tensorflow/tensorrt/blob/master/tftrt/benchmarking-python/image_classification/NGC-TFv2-TF-TRT-inference-from-Keras-saved-model.ipynb) to squeeze out the maximum performance using NVIDIA hardware and TensorRT.
-"""
-demo = gr.Interface(
-    title="Optimize a ResNet50 model from Keras with TensorRT",
-    description=DESCRIPTION,
-    allow_flagging="never",
-    inputs=[gr.Text(max_lines=1, label="your_hf_token")],
-    outputs=[gr.Markdown(label="output")],
-    fn=run,
-)
-demo.launch()

+from typing import List
 import gradio as gr
 import tensorflow as tf
 from utils import benchmark, convert_to_trt
+DESCRIPTION = """
+This Space shows how to easily optimize a [ResNet50 model from Keras](https://keras.io/api/applications/) with [TensorRT](https://developer.nvidia.com/tensorrt). TensorRT is a framework to optimize deep learning models specifically for NVIDIA hardware.
+This Space does the following things:
+* Loads a ResNet50 model from `tf.keras.applications` and serializes it as a SavedModel.
+* Performs optimizations with TensorRT.
+* Runs and displays the benchmarks to compare the throughputs of the native TensorFlow SavedModel and its TensorRT-optimized variant.
+* Optionally, pushes the optimized model to a repository on the Hugging Face Hub. For this to work, one must provide a write-access token (from [hf.co/settings/tokens](hf.co/settings/tokens)) to `your_hf_token`.
+## Notes (important)
+* For this Space to work, having access to a GPU (at least T4) is a must.
+* This Space makes use of the [Docker x Space integration](https://huggingface.co/docs/hub/spaces-sdks-docker) to perform the TensorRT optimizations.
+* The default TensorFlow installation doesn't come loaded with a correctly compiled TensorRT. This is why it's recommended to use an [NVIDIA container](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tensorflow) to perform your TensorRT-related stuff. This is why the Docker x Space integration was used in this Space.
+* To get the maximum peformance, one must use the same hardware for inference as the one used for running the optimizations. For example, if you used a T4-based machine to perform the optimizations, ensure that you're using the same GPU while running inference with your optimized model.
+* One is encouraged to try out different forms of post-training quantization as shown in [this notebook](https://github.com/tensorflow/tensorrt/blob/master/tftrt/benchmarking-python/image_classification/NGC-TFv2-TF-TRT-inference-from-Keras-saved-model.ipynb) to squeeze out the maximum performance using NVIDIA hardware and TensorRT.
+"""
 print("Loading ResNet50 model.")
 model = tf.keras.applications.ResNet50(weights="imagenet")
+def push_to_hub(hf_token: str, push_dir: str) -> str:
     try:
         if hf_token is None:
             return "No HF token provided. Model won't be pushed."
         return e
+def post_optimization(list_of_strs: List[str]) -> str:
     tf_throughput, tf_trt_throughput = list_of_strs
     benchamrk_str = f"""
     ### TensorFlow
     return benchamrk_str
+def run(hf_token: str) -> str:
     print("Serializing the ResNet50 as a SavedModel.")
     saved_model_path = "resnet50_saved_model"
     model.save(saved_model_path)
     return benchmark_str
+def launch_gradio():
+    demo = gr.Interface(
+        title="Optimize a ResNet50 model from Keras with TensorRT",
+        description=DESCRIPTION,
+        allow_flagging="never",
+        inputs=[gr.Text(max_lines=1, label="your_hf_token")],
+        outputs=[gr.Markdown(label="output")],
+        fn=run,
+    )
+    demo.launch(server_name="0.0.0.0", server_port=7860)
+if __name__ == "__main__":
+    launch_gradio()