Spaces:
Sleeping
Sleeping
fix: remove benchmarking and kept it simple.
Browse files
main.py
CHANGED
@@ -2,9 +2,9 @@ from typing import List
|
|
2 |
|
3 |
import gradio as gr
|
4 |
import tensorflow as tf
|
5 |
-
from huggingface_hub import HfApi,
|
6 |
|
7 |
-
from utils import
|
8 |
|
9 |
DESCRIPTION = """
|
10 |
This Space shows how to easily optimize a [ResNet50 model from Keras](https://keras.io/api/applications/) with [TensorRT](https://developer.nvidia.com/tensorrt). TensorRT is a framework to optimize deep learning models specifically for NVIDIA hardware.
|
@@ -12,8 +12,8 @@ This Space does the following things:
|
|
12 |
|
13 |
* Loads a ResNet50 model from `tf.keras.applications` and serializes it as a SavedModel.
|
14 |
* Performs optimizations with TensorRT.
|
15 |
-
*
|
16 |
-
*
|
17 |
|
18 |
As a consequence, you might have to wait for a few minutes to note the results.
|
19 |
|
@@ -34,16 +34,16 @@ model = tf.keras.applications.ResNet50(weights="imagenet")
|
|
34 |
def push_to_hub(hf_token: str, push_dir: str) -> str:
|
35 |
try:
|
36 |
if hf_token == "":
|
37 |
-
return "
|
38 |
else:
|
39 |
hf_api = HfApi(token=hf_token)
|
40 |
user = hf_api.whoami()["name"]
|
41 |
repo_id = f"{user}/{push_dir}"
|
42 |
_ = create_repo(repo_id=repo_id, token=hf_token)
|
43 |
url = hf_api.upload_folder(folder_path=push_dir, repo_id=repo_id)
|
44 |
-
return f"
|
45 |
except Exception as e:
|
46 |
-
return f"
|
47 |
|
48 |
|
49 |
def post_optimization(list_of_strs: List[str]) -> str:
|
@@ -57,6 +57,18 @@ def post_optimization(list_of_strs: List[str]) -> str:
|
|
57 |
|
58 |
{tf_trt_throughput}
|
59 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
### (TensorRT) model push
|
61 |
"""
|
62 |
return benchamrk_str
|
@@ -71,8 +83,8 @@ def run(hf_token: str) -> str:
|
|
71 |
tensorrt_path = "trt_resnet50_keras"
|
72 |
convert_to_trt(saved_model_path, tensorrt_path)
|
73 |
|
74 |
-
tf_throughput =
|
75 |
-
tf_trt_throughput =
|
76 |
|
77 |
benchmark_str = post_optimization([tf_throughput, tf_trt_throughput])
|
78 |
benchmark_str += "\n"
|
|
|
2 |
|
3 |
import gradio as gr
|
4 |
import tensorflow as tf
|
5 |
+
from huggingface_hub import HfApi, create_repo
|
6 |
|
7 |
+
from utils import convert_to_trt
|
8 |
|
9 |
DESCRIPTION = """
|
10 |
This Space shows how to easily optimize a [ResNet50 model from Keras](https://keras.io/api/applications/) with [TensorRT](https://developer.nvidia.com/tensorrt). TensorRT is a framework to optimize deep learning models specifically for NVIDIA hardware.
|
|
|
12 |
|
13 |
* Loads a ResNet50 model from `tf.keras.applications` and serializes it as a SavedModel.
|
14 |
* Performs optimizations with TensorRT.
|
15 |
+
* Displays the benchmarks to compare the throughputs of the native TensorFlow SavedModel and its TensorRT-optimized variant.
|
16 |
+
* Pushes the optimized model to a repository on the Hugging Face Hub. For this to work, one must provide a write-access token (from hf.co/settings/tokens) to `your_hf_token`.
|
17 |
|
18 |
As a consequence, you might have to wait for a few minutes to note the results.
|
19 |
|
|
|
34 |
def push_to_hub(hf_token: str, push_dir: str) -> str:
|
35 |
try:
|
36 |
if hf_token == "":
|
37 |
+
return "No HF token provided. Model won't be pushed."
|
38 |
else:
|
39 |
hf_api = HfApi(token=hf_token)
|
40 |
user = hf_api.whoami()["name"]
|
41 |
repo_id = f"{user}/{push_dir}"
|
42 |
_ = create_repo(repo_id=repo_id, token=hf_token)
|
43 |
url = hf_api.upload_folder(folder_path=push_dir, repo_id=repo_id)
|
44 |
+
return f"Model successfully pushed: [{url}]({url})"
|
45 |
except Exception as e:
|
46 |
+
return f"{e}"
|
47 |
|
48 |
|
49 |
def post_optimization(list_of_strs: List[str]) -> str:
|
|
|
57 |
|
58 |
{tf_trt_throughput}
|
59 |
|
60 |
+
### Benchmarking information
|
61 |
+
|
62 |
+
| Parameter | Value |
|
63 |
+
|:---:|:---:|
|
64 |
+
| OS | Ubuntu 20.04.5 |
|
65 |
+
| Python | 3.8.10 |
|
66 |
+
| CUDA | 11.8 |
|
67 |
+
| TensorFlow | 2.10.1 |
|
68 |
+
| TensorRT | 8.5.1 |
|
69 |
+
| GPU | T4 |
|
70 |
+
| Benchmarking Script | [Link](https://gist.github.com/sayakpaul/ba4a4c47fcc661b9d18ea3b53e51f82e) |
|
71 |
+
|
72 |
### (TensorRT) model push
|
73 |
"""
|
74 |
return benchamrk_str
|
|
|
83 |
tensorrt_path = "trt_resnet50_keras"
|
84 |
convert_to_trt(saved_model_path, tensorrt_path)
|
85 |
|
86 |
+
tf_throughput = "Throughput: 89 images/s"
|
87 |
+
tf_trt_throughput = "Throughput: 497 images/s"
|
88 |
|
89 |
benchmark_str = post_optimization([tf_throughput, tf_trt_throughput])
|
90 |
benchmark_str += "\n"
|
utils.py
CHANGED
@@ -1,15 +1,4 @@
|
|
1 |
-
import time
|
2 |
-
from typing import Union
|
3 |
-
|
4 |
-
import numpy as np
|
5 |
-
import tensorflow as tf
|
6 |
from tensorflow.python.compiler.tensorrt import trt_convert as trt
|
7 |
-
from tensorflow.python.saved_model import tag_constants
|
8 |
-
|
9 |
-
BATCH_SIZE = 8
|
10 |
-
BATCH_INPUT = tf.random.normal((BATCH_SIZE, 224, 224, 3))
|
11 |
-
N_WARMUP_RUN = 25
|
12 |
-
N_RUN = 100
|
13 |
|
14 |
|
15 |
def convert_to_trt(input_model_path: str, trt_model_path: str) -> None:
|
@@ -27,43 +16,3 @@ def convert_to_trt(input_model_path: str, trt_model_path: str) -> None:
|
|
27 |
converter.convert()
|
28 |
converter.save(output_saved_model_dir=trt_model_path)
|
29 |
print("Done Converting to TF-TRT FP32")
|
30 |
-
|
31 |
-
|
32 |
-
def benchmark(model: Union[tf.keras.Model, str]) -> str:
|
33 |
-
"""Benchmarking utility for a TensorFlow model and its optimized
|
34 |
-
TRT version.
|
35 |
-
|
36 |
-
Args:
|
37 |
-
model: Either a TensorFlow model of instance `tf.keras.Model` or a path to
|
38 |
-
the Saved TensorRT model.
|
39 |
-
|
40 |
-
Returns:
|
41 |
-
a string containing throughput information for the given model.
|
42 |
-
|
43 |
-
References:
|
44 |
-
* https://github.com/tensorflow/tensorrt/blob/master/tftrt/benchmarking-python/image_classification/NGC-TFv2-TF-TRT-inference-from-Keras-saved-model.ipynb
|
45 |
-
"""
|
46 |
-
elapsed_time = []
|
47 |
-
|
48 |
-
if isinstance(model, tf.keras.Model):
|
49 |
-
predict_fn = model.predict
|
50 |
-
else:
|
51 |
-
saved_model_loaded = tf.saved_model.load(model, tags=[tag_constants.SERVING])
|
52 |
-
predict_fn = saved_model_loaded.signatures["serving_default"]
|
53 |
-
|
54 |
-
for i in range(N_WARMUP_RUN):
|
55 |
-
_ = predict_fn(BATCH_INPUT)
|
56 |
-
|
57 |
-
for i in range(N_RUN):
|
58 |
-
start_time = time.time()
|
59 |
-
_ = predict_fn(BATCH_INPUT)
|
60 |
-
end_time = time.time()
|
61 |
-
elapsed_time = np.append(elapsed_time, end_time - start_time)
|
62 |
-
if i % 50 == 0:
|
63 |
-
print("Step {}: {:4.1f}ms".format(i, (elapsed_time[-50:].mean()) * 1000))
|
64 |
-
|
65 |
-
return_str = "Throughput: {:.0f} images/s".format(
|
66 |
-
N_RUN * BATCH_SIZE / elapsed_time.sum()
|
67 |
-
)
|
68 |
-
print(return_str)
|
69 |
-
return return_str
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
from tensorflow.python.compiler.tensorrt import trt_convert as trt
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
|
4 |
def convert_to_trt(input_model_path: str, trt_model_path: str) -> None:
|
|
|
16 |
converter.convert()
|
17 |
converter.save(output_saved_model_dir=trt_model_path)
|
18 |
print("Done Converting to TF-TRT FP32")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|