sayakpaul HF staff commited on
Commit
ef59ad0
1 Parent(s): 80c09f1

fix: Dockerfile.

Browse files
Files changed (2) hide show
  1. Dockerfile +3 -2
  2. app.py → main.py +34 -30
Dockerfile CHANGED
@@ -1,4 +1,5 @@
1
- FROM nvcr.io/nvidia/tensorflow:22.12-tf2-py3
 
2
 
3
  # Set the working directory to /code
4
  WORKDIR /code
@@ -26,4 +27,4 @@ WORKDIR $HOME/app
26
  COPY --chown=user . $HOME/app
27
 
28
  # Define entrypoint.
29
- CMD ["python", "app.py"]
 
1
+ # nvcr.io/nvidia/tensorflow:22.12-tf2-py3 often leads to rate-limiting issues. Hence the personal clone.
2
+ FROM spsayakpaul/nvidia-tf-trt:22.12-tf2-py3
3
 
4
  # Set the working directory to /code
5
  WORKDIR /code
 
27
  COPY --chown=user . $HOME/app
28
 
29
  # Define entrypoint.
30
+ CMD ["python", "main.py"]
app.py → main.py RENAMED
@@ -1,4 +1,4 @@
1
- import os
2
 
3
  import gradio as gr
4
  import tensorflow as tf
@@ -6,11 +6,29 @@ from huggingface_hub import Repository
6
 
7
  from utils import benchmark, convert_to_trt
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  print("Loading ResNet50 model.")
10
  model = tf.keras.applications.ResNet50(weights="imagenet")
11
 
12
 
13
- def push_to_hub(hf_token: str, push_dir: str):
14
  try:
15
  if hf_token is None:
16
  return "No HF token provided. Model won't be pushed."
@@ -22,7 +40,7 @@ def push_to_hub(hf_token: str, push_dir: str):
22
  return e
23
 
24
 
25
- def post_optimization(list_of_strs):
26
  tf_throughput, tf_trt_throughput = list_of_strs
27
  benchamrk_str = f"""
28
  ### TensorFlow
@@ -38,7 +56,7 @@ def post_optimization(list_of_strs):
38
  return benchamrk_str
39
 
40
 
41
- def run(hf_token: str):
42
  print("Serializing the ResNet50 as a SavedModel.")
43
  saved_model_path = "resnet50_saved_model"
44
  model.save(saved_model_path)
@@ -57,32 +75,18 @@ def run(hf_token: str):
57
  return benchmark_str
58
 
59
 
60
- DESCRIPTION = """
61
- This Space shows how to easily optimize a [ResNet50 model from Keras](https://keras.io/api/applications/) with [TensorRT](https://developer.nvidia.com/tensorrt). TensorRT is a framework to optimize deep learning models specifically for NVIDIA hardware.
62
- This Space does the following things:
63
-
64
- * Loads a ResNet50 model from `tf.keras.applications` and serializes it as a SavedModel.
65
- * Performs optimizations with TensorRT.
66
- * Runs and displays the benchmarks to compare the throughputs of the native TensorFlow SavedModel and its TensorRT-optimized variant.
67
- * Optionally, pushes the optimized model to a repository on the Hugging Face Hub. For this to work, one must provide a write-access token (from [hf.co/settings/tokens](hf.co/settings/tokens)) to `your_hf_token`.
68
-
69
- ## Notes (important)
70
-
71
- * For this Space to work, having access to a GPU (at least T4) is a must.
72
- * This Space makes use of the [Docker x Space integration](https://huggingface.co/docs/hub/spaces-sdks-docker) to perform the TensorRT optimizations.
73
- * The default TensorFlow installation doesn't come loaded with a correctly compiled TensorRT. This is why it's recommended to use an [NVIDIA container](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tensorflow) to perform your TensorRT-related stuff. This is why the Docker x Space integration was used in this Space.
74
- * To get the maximum peformance, one must use the same hardware for inference as the one used for running the optimizations. For example, if you used a T4-based machine to perform the optimizations, ensure that you're using the same GPU while running inference with your optimized model.
75
- * One is encouraged to try out different forms of post-training quantization as shown in [this notebook](https://github.com/tensorflow/tensorrt/blob/master/tftrt/benchmarking-python/image_classification/NGC-TFv2-TF-TRT-inference-from-Keras-saved-model.ipynb) to squeeze out the maximum performance using NVIDIA hardware and TensorRT.
76
- """
77
 
 
78
 
79
- demo = gr.Interface(
80
- title="Optimize a ResNet50 model from Keras with TensorRT",
81
- description=DESCRIPTION,
82
- allow_flagging="never",
83
- inputs=[gr.Text(max_lines=1, label="your_hf_token")],
84
- outputs=[gr.Markdown(label="output")],
85
- fn=run,
86
- )
87
 
88
- demo.launch()
 
 
1
+ from typing import List
2
 
3
  import gradio as gr
4
  import tensorflow as tf
 
6
 
7
  from utils import benchmark, convert_to_trt
8
 
9
+ DESCRIPTION = """
10
+ This Space shows how to easily optimize a [ResNet50 model from Keras](https://keras.io/api/applications/) with [TensorRT](https://developer.nvidia.com/tensorrt). TensorRT is a framework to optimize deep learning models specifically for NVIDIA hardware.
11
+ This Space does the following things:
12
+
13
+ * Loads a ResNet50 model from `tf.keras.applications` and serializes it as a SavedModel.
14
+ * Performs optimizations with TensorRT.
15
+ * Runs and displays the benchmarks to compare the throughputs of the native TensorFlow SavedModel and its TensorRT-optimized variant.
16
+ * Optionally, pushes the optimized model to a repository on the Hugging Face Hub. For this to work, one must provide a write-access token (from [hf.co/settings/tokens](hf.co/settings/tokens)) to `your_hf_token`.
17
+
18
+ ## Notes (important)
19
+
20
+ * For this Space to work, having access to a GPU (at least T4) is a must.
21
+ * This Space makes use of the [Docker x Space integration](https://huggingface.co/docs/hub/spaces-sdks-docker) to perform the TensorRT optimizations.
22
+ * The default TensorFlow installation doesn't come loaded with a correctly compiled TensorRT. This is why it's recommended to use an [NVIDIA container](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tensorflow) to perform your TensorRT-related stuff. This is why the Docker x Space integration was used in this Space.
23
+ * To get the maximum peformance, one must use the same hardware for inference as the one used for running the optimizations. For example, if you used a T4-based machine to perform the optimizations, ensure that you're using the same GPU while running inference with your optimized model.
24
+ * One is encouraged to try out different forms of post-training quantization as shown in [this notebook](https://github.com/tensorflow/tensorrt/blob/master/tftrt/benchmarking-python/image_classification/NGC-TFv2-TF-TRT-inference-from-Keras-saved-model.ipynb) to squeeze out the maximum performance using NVIDIA hardware and TensorRT.
25
+ """
26
+
27
  print("Loading ResNet50 model.")
28
  model = tf.keras.applications.ResNet50(weights="imagenet")
29
 
30
 
31
+ def push_to_hub(hf_token: str, push_dir: str) -> str:
32
  try:
33
  if hf_token is None:
34
  return "No HF token provided. Model won't be pushed."
 
40
  return e
41
 
42
 
43
+ def post_optimization(list_of_strs: List[str]) -> str:
44
  tf_throughput, tf_trt_throughput = list_of_strs
45
  benchamrk_str = f"""
46
  ### TensorFlow
 
56
  return benchamrk_str
57
 
58
 
59
+ def run(hf_token: str) -> str:
60
  print("Serializing the ResNet50 as a SavedModel.")
61
  saved_model_path = "resnet50_saved_model"
62
  model.save(saved_model_path)
 
75
  return benchmark_str
76
 
77
 
78
+ def launch_gradio():
79
+ demo = gr.Interface(
80
+ title="Optimize a ResNet50 model from Keras with TensorRT",
81
+ description=DESCRIPTION,
82
+ allow_flagging="never",
83
+ inputs=[gr.Text(max_lines=1, label="your_hf_token")],
84
+ outputs=[gr.Markdown(label="output")],
85
+ fn=run,
86
+ )
 
 
 
 
 
 
 
 
87
 
88
+ demo.launch(server_name="0.0.0.0", server_port=7860)
89
 
 
 
 
 
 
 
 
 
90
 
91
+ if __name__ == "__main__":
92
+ launch_gradio()