Spaces:
Sleeping
Sleeping
fix: Dockerfile.
Browse files- Dockerfile +3 -2
- app.py → main.py +34 -30
Dockerfile
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
-
|
|
|
2 |
|
3 |
# Set the working directory to /code
|
4 |
WORKDIR /code
|
@@ -26,4 +27,4 @@ WORKDIR $HOME/app
|
|
26 |
COPY --chown=user . $HOME/app
|
27 |
|
28 |
# Define entrypoint.
|
29 |
-
CMD ["python", "
|
|
|
1 |
+
# nvcr.io/nvidia/tensorflow:22.12-tf2-py3 often leads to rate-limiting issues. Hence the personal clone.
|
2 |
+
FROM spsayakpaul/nvidia-tf-trt:22.12-tf2-py3
|
3 |
|
4 |
# Set the working directory to /code
|
5 |
WORKDIR /code
|
|
|
27 |
COPY --chown=user . $HOME/app
|
28 |
|
29 |
# Define entrypoint.
|
30 |
+
CMD ["python", "main.py"]
|
app.py → main.py
RENAMED
@@ -1,4 +1,4 @@
|
|
1 |
-
import
|
2 |
|
3 |
import gradio as gr
|
4 |
import tensorflow as tf
|
@@ -6,11 +6,29 @@ from huggingface_hub import Repository
|
|
6 |
|
7 |
from utils import benchmark, convert_to_trt
|
8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
print("Loading ResNet50 model.")
|
10 |
model = tf.keras.applications.ResNet50(weights="imagenet")
|
11 |
|
12 |
|
13 |
-
def push_to_hub(hf_token: str, push_dir: str):
|
14 |
try:
|
15 |
if hf_token is None:
|
16 |
return "No HF token provided. Model won't be pushed."
|
@@ -22,7 +40,7 @@ def push_to_hub(hf_token: str, push_dir: str):
|
|
22 |
return e
|
23 |
|
24 |
|
25 |
-
def post_optimization(list_of_strs):
|
26 |
tf_throughput, tf_trt_throughput = list_of_strs
|
27 |
benchamrk_str = f"""
|
28 |
### TensorFlow
|
@@ -38,7 +56,7 @@ def post_optimization(list_of_strs):
|
|
38 |
return benchamrk_str
|
39 |
|
40 |
|
41 |
-
def run(hf_token: str):
|
42 |
print("Serializing the ResNet50 as a SavedModel.")
|
43 |
saved_model_path = "resnet50_saved_model"
|
44 |
model.save(saved_model_path)
|
@@ -57,32 +75,18 @@ def run(hf_token: str):
|
|
57 |
return benchmark_str
|
58 |
|
59 |
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
## Notes (important)
|
70 |
-
|
71 |
-
* For this Space to work, having access to a GPU (at least T4) is a must.
|
72 |
-
* This Space makes use of the [Docker x Space integration](https://huggingface.co/docs/hub/spaces-sdks-docker) to perform the TensorRT optimizations.
|
73 |
-
* The default TensorFlow installation doesn't come loaded with a correctly compiled TensorRT. This is why it's recommended to use an [NVIDIA container](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tensorflow) to perform your TensorRT-related stuff. This is why the Docker x Space integration was used in this Space.
|
74 |
-
* To get the maximum peformance, one must use the same hardware for inference as the one used for running the optimizations. For example, if you used a T4-based machine to perform the optimizations, ensure that you're using the same GPU while running inference with your optimized model.
|
75 |
-
* One is encouraged to try out different forms of post-training quantization as shown in [this notebook](https://github.com/tensorflow/tensorrt/blob/master/tftrt/benchmarking-python/image_classification/NGC-TFv2-TF-TRT-inference-from-Keras-saved-model.ipynb) to squeeze out the maximum performance using NVIDIA hardware and TensorRT.
|
76 |
-
"""
|
77 |
|
|
|
78 |
|
79 |
-
demo = gr.Interface(
|
80 |
-
title="Optimize a ResNet50 model from Keras with TensorRT",
|
81 |
-
description=DESCRIPTION,
|
82 |
-
allow_flagging="never",
|
83 |
-
inputs=[gr.Text(max_lines=1, label="your_hf_token")],
|
84 |
-
outputs=[gr.Markdown(label="output")],
|
85 |
-
fn=run,
|
86 |
-
)
|
87 |
|
88 |
-
|
|
|
|
1 |
+
from typing import List
|
2 |
|
3 |
import gradio as gr
|
4 |
import tensorflow as tf
|
|
|
6 |
|
7 |
from utils import benchmark, convert_to_trt
|
8 |
|
9 |
+
DESCRIPTION = """
|
10 |
+
This Space shows how to easily optimize a [ResNet50 model from Keras](https://keras.io/api/applications/) with [TensorRT](https://developer.nvidia.com/tensorrt). TensorRT is a framework to optimize deep learning models specifically for NVIDIA hardware.
|
11 |
+
This Space does the following things:
|
12 |
+
|
13 |
+
* Loads a ResNet50 model from `tf.keras.applications` and serializes it as a SavedModel.
|
14 |
+
* Performs optimizations with TensorRT.
|
15 |
+
* Runs and displays the benchmarks to compare the throughputs of the native TensorFlow SavedModel and its TensorRT-optimized variant.
|
16 |
+
* Optionally, pushes the optimized model to a repository on the Hugging Face Hub. For this to work, one must provide a write-access token (from [hf.co/settings/tokens](hf.co/settings/tokens)) to `your_hf_token`.
|
17 |
+
|
18 |
+
## Notes (important)
|
19 |
+
|
20 |
+
* For this Space to work, having access to a GPU (at least T4) is a must.
|
21 |
+
* This Space makes use of the [Docker x Space integration](https://huggingface.co/docs/hub/spaces-sdks-docker) to perform the TensorRT optimizations.
|
22 |
+
* The default TensorFlow installation doesn't come loaded with a correctly compiled TensorRT. This is why it's recommended to use an [NVIDIA container](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tensorflow) to perform your TensorRT-related stuff. This is why the Docker x Space integration was used in this Space.
|
23 |
+
* To get the maximum peformance, one must use the same hardware for inference as the one used for running the optimizations. For example, if you used a T4-based machine to perform the optimizations, ensure that you're using the same GPU while running inference with your optimized model.
|
24 |
+
* One is encouraged to try out different forms of post-training quantization as shown in [this notebook](https://github.com/tensorflow/tensorrt/blob/master/tftrt/benchmarking-python/image_classification/NGC-TFv2-TF-TRT-inference-from-Keras-saved-model.ipynb) to squeeze out the maximum performance using NVIDIA hardware and TensorRT.
|
25 |
+
"""
|
26 |
+
|
27 |
print("Loading ResNet50 model.")
|
28 |
model = tf.keras.applications.ResNet50(weights="imagenet")
|
29 |
|
30 |
|
31 |
+
def push_to_hub(hf_token: str, push_dir: str) -> str:
|
32 |
try:
|
33 |
if hf_token is None:
|
34 |
return "No HF token provided. Model won't be pushed."
|
|
|
40 |
return e
|
41 |
|
42 |
|
43 |
+
def post_optimization(list_of_strs: List[str]) -> str:
|
44 |
tf_throughput, tf_trt_throughput = list_of_strs
|
45 |
benchamrk_str = f"""
|
46 |
### TensorFlow
|
|
|
56 |
return benchamrk_str
|
57 |
|
58 |
|
59 |
+
def run(hf_token: str) -> str:
|
60 |
print("Serializing the ResNet50 as a SavedModel.")
|
61 |
saved_model_path = "resnet50_saved_model"
|
62 |
model.save(saved_model_path)
|
|
|
75 |
return benchmark_str
|
76 |
|
77 |
|
78 |
+
def launch_gradio():
|
79 |
+
demo = gr.Interface(
|
80 |
+
title="Optimize a ResNet50 model from Keras with TensorRT",
|
81 |
+
description=DESCRIPTION,
|
82 |
+
allow_flagging="never",
|
83 |
+
inputs=[gr.Text(max_lines=1, label="your_hf_token")],
|
84 |
+
outputs=[gr.Markdown(label="output")],
|
85 |
+
fn=run,
|
86 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
|
88 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|
89 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
|
91 |
+
if __name__ == "__main__":
|
92 |
+
launch_gradio()
|