sayakpaul HF staff commited on
Commit
bf49c9f
1 Parent(s): c37b8d0

fix: remove benchmarking and kept it simple.

Browse files
Files changed (2) hide show
  1. main.py +21 -9
  2. utils.py +0 -51
main.py CHANGED
@@ -2,9 +2,9 @@ from typing import List
2
 
3
  import gradio as gr
4
  import tensorflow as tf
5
- from huggingface_hub import HfApi, Repository, create_repo
6
 
7
- from utils import benchmark, convert_to_trt
8
 
9
  DESCRIPTION = """
10
  This Space shows how to easily optimize a [ResNet50 model from Keras](https://keras.io/api/applications/) with [TensorRT](https://developer.nvidia.com/tensorrt). TensorRT is a framework to optimize deep learning models specifically for NVIDIA hardware.
@@ -12,8 +12,8 @@ This Space does the following things:
12
 
13
  * Loads a ResNet50 model from `tf.keras.applications` and serializes it as a SavedModel.
14
  * Performs optimizations with TensorRT.
15
- * Runs and displays the benchmarks to compare the throughputs of the native TensorFlow SavedModel and its TensorRT-optimized variant.
16
- * Optionally, pushes the optimized model to a repository on the Hugging Face Hub. For this to work, one must provide a write-access token (from hf.co/settings/tokens) to `your_hf_token`.
17
 
18
  As a consequence, you might have to wait for a few minutes to note the results.
19
 
@@ -34,16 +34,16 @@ model = tf.keras.applications.ResNet50(weights="imagenet")
34
  def push_to_hub(hf_token: str, push_dir: str) -> str:
35
  try:
36
  if hf_token == "":
37
- return "### No HF token provided. Model won't be pushed."
38
  else:
39
  hf_api = HfApi(token=hf_token)
40
  user = hf_api.whoami()["name"]
41
  repo_id = f"{user}/{push_dir}"
42
  _ = create_repo(repo_id=repo_id, token=hf_token)
43
  url = hf_api.upload_folder(folder_path=push_dir, repo_id=repo_id)
44
- return f"### Model successfully pushed: [{url}]({url})"
45
  except Exception as e:
46
- return f"### {e}"
47
 
48
 
49
  def post_optimization(list_of_strs: List[str]) -> str:
@@ -57,6 +57,18 @@ def post_optimization(list_of_strs: List[str]) -> str:
57
 
58
  {tf_trt_throughput}
59
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  ### (TensorRT) model push
61
  """
62
  return benchamrk_str
@@ -71,8 +83,8 @@ def run(hf_token: str) -> str:
71
  tensorrt_path = "trt_resnet50_keras"
72
  convert_to_trt(saved_model_path, tensorrt_path)
73
 
74
- tf_throughput = benchmark(model)
75
- tf_trt_throughput = benchmark(tensorrt_path)
76
 
77
  benchmark_str = post_optimization([tf_throughput, tf_trt_throughput])
78
  benchmark_str += "\n"
2
 
3
  import gradio as gr
4
  import tensorflow as tf
5
+ from huggingface_hub import HfApi, create_repo
6
 
7
+ from utils import convert_to_trt
8
 
9
  DESCRIPTION = """
10
  This Space shows how to easily optimize a [ResNet50 model from Keras](https://keras.io/api/applications/) with [TensorRT](https://developer.nvidia.com/tensorrt). TensorRT is a framework to optimize deep learning models specifically for NVIDIA hardware.
12
 
13
  * Loads a ResNet50 model from `tf.keras.applications` and serializes it as a SavedModel.
14
  * Performs optimizations with TensorRT.
15
+ * Displays the benchmarks to compare the throughputs of the native TensorFlow SavedModel and its TensorRT-optimized variant.
16
+ * Pushes the optimized model to a repository on the Hugging Face Hub. For this to work, one must provide a write-access token (from hf.co/settings/tokens) to `your_hf_token`.
17
 
18
  As a consequence, you might have to wait for a few minutes to note the results.
19
 
34
  def push_to_hub(hf_token: str, push_dir: str) -> str:
35
  try:
36
  if hf_token == "":
37
+ return "No HF token provided. Model won't be pushed."
38
  else:
39
  hf_api = HfApi(token=hf_token)
40
  user = hf_api.whoami()["name"]
41
  repo_id = f"{user}/{push_dir}"
42
  _ = create_repo(repo_id=repo_id, token=hf_token)
43
  url = hf_api.upload_folder(folder_path=push_dir, repo_id=repo_id)
44
+ return f"Model successfully pushed: [{url}]({url})"
45
  except Exception as e:
46
+ return f"{e}"
47
 
48
 
49
  def post_optimization(list_of_strs: List[str]) -> str:
57
 
58
  {tf_trt_throughput}
59
 
60
+ ### Benchmarking information
61
+
62
+ | Parameter | Value |
63
+ |:---:|:---:|
64
+ | OS | Ubuntu 20.04.5 |
65
+ | Python | 3.8.10 |
66
+ | CUDA | 11.8 |
67
+ | TensorFlow | 2.10.1 |
68
+ | TensorRT | 8.5.1 |
69
+ | GPU | T4 |
70
+ | Benchmarking Script | [Link](https://gist.github.com/sayakpaul/ba4a4c47fcc661b9d18ea3b53e51f82e) |
71
+
72
  ### (TensorRT) model push
73
  """
74
  return benchamrk_str
83
  tensorrt_path = "trt_resnet50_keras"
84
  convert_to_trt(saved_model_path, tensorrt_path)
85
 
86
+ tf_throughput = "Throughput: 89 images/s"
87
+ tf_trt_throughput = "Throughput: 497 images/s"
88
 
89
  benchmark_str = post_optimization([tf_throughput, tf_trt_throughput])
90
  benchmark_str += "\n"
utils.py CHANGED
@@ -1,15 +1,4 @@
1
- import time
2
- from typing import Union
3
-
4
- import numpy as np
5
- import tensorflow as tf
6
  from tensorflow.python.compiler.tensorrt import trt_convert as trt
7
- from tensorflow.python.saved_model import tag_constants
8
-
9
- BATCH_SIZE = 8
10
- BATCH_INPUT = tf.random.normal((BATCH_SIZE, 224, 224, 3))
11
- N_WARMUP_RUN = 25
12
- N_RUN = 100
13
 
14
 
15
  def convert_to_trt(input_model_path: str, trt_model_path: str) -> None:
@@ -27,43 +16,3 @@ def convert_to_trt(input_model_path: str, trt_model_path: str) -> None:
27
  converter.convert()
28
  converter.save(output_saved_model_dir=trt_model_path)
29
  print("Done Converting to TF-TRT FP32")
30
-
31
-
32
- def benchmark(model: Union[tf.keras.Model, str]) -> str:
33
- """Benchmarking utility for a TensorFlow model and its optimized
34
- TRT version.
35
-
36
- Args:
37
- model: Either a TensorFlow model of instance `tf.keras.Model` or a path to
38
- the Saved TensorRT model.
39
-
40
- Returns:
41
- a string containing throughput information for the given model.
42
-
43
- References:
44
- * https://github.com/tensorflow/tensorrt/blob/master/tftrt/benchmarking-python/image_classification/NGC-TFv2-TF-TRT-inference-from-Keras-saved-model.ipynb
45
- """
46
- elapsed_time = []
47
-
48
- if isinstance(model, tf.keras.Model):
49
- predict_fn = model.predict
50
- else:
51
- saved_model_loaded = tf.saved_model.load(model, tags=[tag_constants.SERVING])
52
- predict_fn = saved_model_loaded.signatures["serving_default"]
53
-
54
- for i in range(N_WARMUP_RUN):
55
- _ = predict_fn(BATCH_INPUT)
56
-
57
- for i in range(N_RUN):
58
- start_time = time.time()
59
- _ = predict_fn(BATCH_INPUT)
60
- end_time = time.time()
61
- elapsed_time = np.append(elapsed_time, end_time - start_time)
62
- if i % 50 == 0:
63
- print("Step {}: {:4.1f}ms".format(i, (elapsed_time[-50:].mean()) * 1000))
64
-
65
- return_str = "Throughput: {:.0f} images/s".format(
66
- N_RUN * BATCH_SIZE / elapsed_time.sum()
67
- )
68
- print(return_str)
69
- return return_str
 
 
 
 
 
1
  from tensorflow.python.compiler.tensorrt import trt_convert as trt
 
 
 
 
 
 
2
 
3
 
4
  def convert_to_trt(input_model_path: str, trt_model_path: str) -> None:
16
  converter.convert()
17
  converter.save(output_saved_model_dir=trt_model_path)
18
  print("Done Converting to TF-TRT FP32")