sayakpaul HF staff commited on
Commit
80c09f1
1 Parent(s): 9be97d2

add: initial files.

Browse files
Files changed (5) hide show
  1. Dockerfile +29 -0
  2. README.md +3 -3
  3. app.py +88 -0
  4. requirements.txt +2 -0
  5. utils.py +69 -0
Dockerfile ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM nvcr.io/nvidia/tensorflow:22.12-tf2-py3
2
+
3
+ # Set the working directory to /code
4
+ WORKDIR /code
5
+
6
+ # Copy the current directory contents into the container at /code
7
+ COPY ./requirements.txt /code/requirements.txt
8
+
9
+ # Install requirements.txt
10
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
11
+
12
+ # Set up a new user named "user" with user ID 1000
13
+ RUN useradd -m -u 1000 user
14
+
15
+ # Switch to the "user" user
16
+ USER user
17
+
18
+ # Set home to the user's home directory
19
+ ENV HOME=/home/user \
20
+ PATH=/home/user/.local/bin:$PATH
21
+
22
+ # Set the working directory to the user's home directory
23
+ WORKDIR $HOME/app
24
+
25
+ # Copy the current directory contents into the container at $HOME/app setting the owner to the user
26
+ COPY --chown=user . $HOME/app
27
+
28
+ # Define entrypoint.
29
+ CMD ["python", "app.py"]
README.md CHANGED
@@ -1,7 +1,7 @@
1
  ---
2
- title: Tensorrt Tf
3
- emoji: 💻
4
- colorFrom: purple
5
  colorTo: blue
6
  sdk: docker
7
  pinned: false
1
  ---
2
+ title: TensorRT
3
+ emoji: 🐬
4
+ colorFrom: pink
5
  colorTo: blue
6
  sdk: docker
7
  pinned: false
app.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import gradio as gr
4
+ import tensorflow as tf
5
+ from huggingface_hub import Repository
6
+
7
+ from utils import benchmark, convert_to_trt
8
+
9
+ print("Loading ResNet50 model.")
10
+ model = tf.keras.applications.ResNet50(weights="imagenet")
11
+
12
+
13
+ def push_to_hub(hf_token: str, push_dir: str):
14
+ try:
15
+ if hf_token is None:
16
+ return "No HF token provided. Model won't be pushed."
17
+ else:
18
+ repo = Repository(local_dir=push_dir, token=hf_token)
19
+ commit_url = repo.push_to_hub()
20
+ return f"Model successfully pushed: [{commit_url}]({commit_url})"
21
+ except Exception as e:
22
+ return e
23
+
24
+
25
+ def post_optimization(list_of_strs):
26
+ tf_throughput, tf_trt_throughput = list_of_strs
27
+ benchamrk_str = f"""
28
+ ### TensorFlow
29
+
30
+ {tf_throughput}
31
+
32
+ ### TensorRT-optimized
33
+
34
+ {tf_trt_throughput}
35
+
36
+ ### (TensorRT) model push
37
+ """
38
+ return benchamrk_str
39
+
40
+
41
+ def run(hf_token: str):
42
+ print("Serializing the ResNet50 as a SavedModel.")
43
+ saved_model_path = "resnet50_saved_model"
44
+ model.save(saved_model_path)
45
+
46
+ print("Converting to TensorRT.")
47
+ tensorrt_path = "trt_resnet50_keras"
48
+ convert_to_trt(saved_model_path, tensorrt_path)
49
+
50
+ tf_throughput = benchmark(model)
51
+ tf_trt_throughput = benchmark(tensorrt_path)
52
+
53
+ benchmark_str = post_optimization(tf_throughput, tf_trt_throughput)
54
+ benchmark_str += "\n"
55
+ benchmark_str += push_to_hub(hf_token, tensorrt_path)
56
+
57
+ return benchmark_str
58
+
59
+
60
+ DESCRIPTION = """
61
+ This Space shows how to easily optimize a [ResNet50 model from Keras](https://keras.io/api/applications/) with [TensorRT](https://developer.nvidia.com/tensorrt). TensorRT is a framework to optimize deep learning models specifically for NVIDIA hardware.
62
+ This Space does the following things:
63
+
64
+ * Loads a ResNet50 model from `tf.keras.applications` and serializes it as a SavedModel.
65
+ * Performs optimizations with TensorRT.
66
+ * Runs and displays the benchmarks to compare the throughputs of the native TensorFlow SavedModel and its TensorRT-optimized variant.
67
+ * Optionally, pushes the optimized model to a repository on the Hugging Face Hub. For this to work, one must provide a write-access token (from [hf.co/settings/tokens](hf.co/settings/tokens)) to `your_hf_token`.
68
+
69
+ ## Notes (important)
70
+
71
+ * For this Space to work, having access to a GPU (at least T4) is a must.
72
+ * This Space makes use of the [Docker x Space integration](https://huggingface.co/docs/hub/spaces-sdks-docker) to perform the TensorRT optimizations.
73
+ * The default TensorFlow installation doesn't come loaded with a correctly compiled TensorRT. This is why it's recommended to use an [NVIDIA container](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tensorflow) to perform your TensorRT-related stuff. This is why the Docker x Space integration was used in this Space.
74
+ * To get the maximum peformance, one must use the same hardware for inference as the one used for running the optimizations. For example, if you used a T4-based machine to perform the optimizations, ensure that you're using the same GPU while running inference with your optimized model.
75
+ * One is encouraged to try out different forms of post-training quantization as shown in [this notebook](https://github.com/tensorflow/tensorrt/blob/master/tftrt/benchmarking-python/image_classification/NGC-TFv2-TF-TRT-inference-from-Keras-saved-model.ipynb) to squeeze out the maximum performance using NVIDIA hardware and TensorRT.
76
+ """
77
+
78
+
79
+ demo = gr.Interface(
80
+ title="Optimize a ResNet50 model from Keras with TensorRT",
81
+ description=DESCRIPTION,
82
+ allow_flagging="never",
83
+ inputs=[gr.Text(max_lines=1, label="your_hf_token")],
84
+ outputs=[gr.Markdown(label="output")],
85
+ fn=run,
86
+ )
87
+
88
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
1
+ gradio==3.14.0
2
+ huggingface_hub==0.11.1
utils.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ from typing import Union
3
+
4
+ import numpy as np
5
+ import tensorflow as tf
6
+ from tensorflow.python.compiler.tensorrt import trt_convert as trt
7
+ from tensorflow.python.saved_model import tag_constants
8
+
9
+ BATCH_SIZE = 8
10
+ BATCH_INPUT = tf.random.normal((BATCH_SIZE, 224, 224, 3))
11
+ N_WARMUP_RUN = 50
12
+ N_RUN = 1000
13
+
14
+
15
+ def convert_to_trt(input_model_path: str, trt_model_path: str) -> None:
16
+ """Utility to convert and save an input SavedModel to an optimized TensorRT graph.
17
+
18
+ Args:
19
+ input_model_path: Path to the SavedModel to optimize.
20
+ trt_model_path: Path to save the converted TensorRT graph.
21
+ """
22
+ converter = trt.TrtGraphConverterV2(
23
+ input_saved_model_dir=input_model_path,
24
+ precision_mode=trt.TrtPrecisionMode.FP32,
25
+ max_workspace_size_bytes=8000000000,
26
+ )
27
+ converter.convert()
28
+ converter.save(output_saved_model_dir=trt_model_path)
29
+ print("Done Converting to TF-TRT FP32")
30
+
31
+
32
+ def benchmark(model: Union[tf.keras.Model, str]) -> str:
33
+ """Benchmarking utility for a TensorFlow model and its optimized
34
+ TRT version.
35
+
36
+ Args:
37
+ model: Either a TensorFlow model of instance `tf.keras.Model` or a path to
38
+ the Saved TensorRT model.
39
+
40
+ Returns:
41
+ a string containing throughput information for the given model.
42
+
43
+ References:
44
+ * https://github.com/tensorflow/tensorrt/blob/master/tftrt/benchmarking-python/image_classification/NGC-TFv2-TF-TRT-inference-from-Keras-saved-model.ipynb
45
+ """
46
+ elapsed_time = []
47
+
48
+ if isinstance(model, tf.keras.Model):
49
+ predict_fn = model.predict
50
+ else:
51
+ saved_model_loaded = tf.saved_model.load(model, tags=[tag_constants.SERVING])
52
+ predict_fn = saved_model_loaded.signatures["serving_default"]
53
+
54
+ for i in range(N_WARMUP_RUN):
55
+ _ = predict_fn(BATCH_INPUT)
56
+
57
+ for i in range(N_RUN):
58
+ start_time = time.time()
59
+ _ = predict_fn(BATCH_INPUT)
60
+ end_time = time.time()
61
+ elapsed_time = np.append(elapsed_time, end_time - start_time)
62
+ if i % 50 == 0:
63
+ print("Step {}: {:4.1f}ms".format(i, (elapsed_time[-50:].mean()) * 1000))
64
+
65
+ return_str = "Throughput: {:.0f} images/s".format(
66
+ N_RUN * BATCH_SIZE / elapsed_time.sum()
67
+ )
68
+ print(return_str)
69
+ return return_str