danielhn commited on
Commit
1158f50
1 Parent(s): ca29c17

Updated to latest page version

Browse files
Files changed (2) hide show
  1. app.py +96 -16
  2. graphs.py +185 -10
app.py CHANGED
@@ -6,13 +6,13 @@ import graphs
6
  from streamlit_helpers import add_filter, slider_filter, Collapsable
7
 
8
  st.set_page_config(
9
- page_title="ML Agility tracker",
10
  page_icon="⚡",
11
  layout="wide",
12
  )
13
 
14
  # dashboard title
15
- st.title("ML Agility tracker ⚡")
16
 
17
 
18
  def add_faq() -> None:
@@ -21,10 +21,92 @@ def add_faq() -> None:
21
  """
22
  faq = Collapsable()
23
  faq.add_section(
24
- "Why is this so empty?",
25
  (
26
- "Because the FAQ of huggingface website still needs to be written. "
27
- "We don't use the same FAQ as in our internal dashboard."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  ),
29
  )
30
 
@@ -72,6 +154,13 @@ with st.sidebar:
72
 
73
  st.markdown("## Summary Results")
74
 
 
 
 
 
 
 
 
75
  cols = st.columns(2)
76
  with cols[0]:
77
  st.markdown("""#### Workload origin""")
@@ -81,18 +170,9 @@ with cols[1]:
81
  st.markdown("""#### Parameter Size Distribution""")
82
  graphs.parameter_histogram(report, show_assembled=False)
83
 
84
-
85
- st.markdown("""#### Benchmark results""")
86
- baseline = st.selectbox("Baseline", ("x86", "nvidia", "groq"))
87
- graphs.speedup_text_summary(report, baseline)
88
- graphs.speedup_bar_chart(report, baseline)
89
-
90
  # FAQ Block
91
- cols = st.columns(2)
92
- with cols[0]:
93
-
94
- st.markdown("""## About this workload analysis (FAQ)""")
95
- add_faq()
96
 
97
  # Detailed data view (table)
98
  st.markdown("## Detailed Data View")
 
6
  from streamlit_helpers import add_filter, slider_filter, Collapsable
7
 
8
  st.set_page_config(
9
+ page_title="MLAgility tracker",
10
  page_icon="⚡",
11
  layout="wide",
12
  )
13
 
14
  # dashboard title
15
+ st.title("MLAgility tracker ⚡")
16
 
17
 
18
  def add_faq() -> None:
 
21
  """
22
  faq = Collapsable()
23
  faq.add_section(
24
+ "How is MLAgility different from MLPerf?",
25
  (
26
+ "Deep learning pioneers have been judging their progress with the Machine Learning "
27
+ "Performance (MLPerf) inference benchmark, but have found that the corpus of models "
28
+ "is small enough that it allows vendors to primarily compete by hand-optimizing "
29
+ "kernels. MLAgility offers a complementary approach to MLPerf by examining the "
30
+ "capability of vendors to provide turnkey solutions to a larger corpus of "
31
+ "off-the-shelf models. By providing a workflow that is representative of the "
32
+ "mass adoption customer on a variety of ML accelerators and effectively disallowing "
33
+ "hand-crafted kernels, MLAgility bridges the gap between MLPerf and the mass adoption "
34
+ "of hardware acceleration."
35
+ ),
36
+ )
37
+ faq.add_section(
38
+ "Why now for MLAgility?",
39
+ (
40
+ "Deep learning algorithms and their associated DL hardware accelerators are "
41
+ "transitioning from early adoption into mass adoption. Production DL is now "
42
+ "becoming available to the masses, with a desire to customize models to tackle "
43
+ "their specific problems, and then take the path of least resistance into "
44
+ "production. A market for turnkey solutions, starting with a model as input and "
45
+ "provision a cost- and latency-effective acceleration solution, often in the cloud, "
46
+ "as output, has emerged."
47
+ ),
48
+ )
49
+ faq.add_section(
50
+ "Which tool was used to generate those results?",
51
+ (
52
+ "All MLAgility results have been generated using the <b>benchit</b> tool v1.0.0, which is part "
53
+ "of the MLAgility Github Repository. You can learn more about it "
54
+ '<a href="https://github.com/groq/mlagility">here</a>.'
55
+ ),
56
+ )
57
+ faq.add_section(
58
+ "What is the experimental setup for each of the devices?",
59
+ [
60
+ "<b>x86</b>: Intel(R) Xeon(R) X40 CPU @ 2.00GHz on Google Cloud (custom: n2, 80 vCPU, 64.00 GiB) and OnnxRuntime version 1.14.0.",
61
+ "<b>nvidia</b>: NVIDIA A100 40GB on Google Cloud (a2-highgpu-1g) and TensorRT version 22.12-py3.",
62
+ "<b>groq</b>: GroqChip 1 on selfhosted GroqNode server, GroqFlow version 3.0.2 TestPyPI package, and GroqWare™ Suite version 0.9.2.",
63
+ (
64
+ "You can find more details about the methodology "
65
+ '<a href="https://github.com/groq/mlagility/blob/main/docs/tools_user_guide.md">here</a>.'
66
+ ),
67
+ ],
68
+ )
69
+ faq.add_section(
70
+ "What are the current key limitations of those results?",
71
+ [
72
+ (
73
+ "Groq's latency is computed using GroqModel.estimate_latency(), which takes"
74
+ " into account deterministic compute time and estimates an ideal runtime with"
75
+ " ideal I/O time. It does not take into account runtime performance."
76
+ ),
77
+ "Results currently only represent batch 1 performance on a limited number of models, "
78
+ "devices, vendors, and runtimes. You can learn more about future directions by reading "
79
+ 'the "What are the future directions of MLAgility?" FAQ section.',
80
+ ],
81
+ )
82
+ faq.add_section(
83
+ "What are the future directions of MLAgility?",
84
+ [
85
+ "Include additional classes of models (e.g. LLMs, GNNs, DLRMs).",
86
+ "Perform experiments that include sweeps over batch and input sizes.",
87
+ "Increase the number of devices from existing vendors (e.g. T4, A10, and H100).",
88
+ "Include devices from additional vendors (e.g. ARM, and AMD)."
89
+ "Include the number of runtimes supported (e.g. ORT and PyTorch for CUDA, PyTorch for x86).",
90
+ ],
91
+ )
92
+ faq.add_section(
93
+ "Who runs MLAgility?",
94
+ (
95
+ "MLAgility is currently maintained by the following individuals (in alphabetical order): "
96
+ "Daniel Holanda Noronha, Jeremy Fowers, Kalin Ovtcharov, and Ramakrishnan Sivakumar. We are actively seeking collaborators from across the industry."
97
+ ),
98
+ )
99
+ faq.add_section(
100
+ "License and Liability",
101
+ (
102
+ 'THE MLAGILITY BENCHMARK IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR '
103
+ "IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, "
104
+ "FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE "
105
+ "AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER "
106
+ "LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, "
107
+ "OUT OF OR IN CONNECTION WITH THE BENCHMARK OR THE USE OR OTHER DEALINGS IN THE "
108
+ "BENCHMARK. Read more about it "
109
+ '<a href="https://github.com/groq/mlagility/blob/main/LICENSE">here</a>.'
110
  ),
111
  )
112
 
 
154
 
155
  st.markdown("## Summary Results")
156
 
157
+ graphs.device_funnel(report)
158
+
159
+ st.markdown("""#### Benchmark results""")
160
+ baseline = st.selectbox("Baseline", ("x86", "nvidia", "groq"))
161
+ graphs.speedup_text_summary(report, baseline)
162
+ graphs.speedup_bar_chart(report, baseline)
163
+
164
  cols = st.columns(2)
165
  with cols[0]:
166
  st.markdown("""#### Workload origin""")
 
170
  st.markdown("""#### Parameter Size Distribution""")
171
  graphs.parameter_histogram(report, show_assembled=False)
172
 
 
 
 
 
 
 
173
  # FAQ Block
174
+ st.markdown("""## About this workload analysis (FAQ)""")
175
+ add_faq()
 
 
 
176
 
177
  # Detailed data view (table)
178
  st.markdown("## Detailed Data View")
graphs.py CHANGED
@@ -18,9 +18,9 @@ colors = {
18
  "ocean_green": "#3ba272",
19
  }
20
  device_colors = {
21
- "x86": colors["blue"],
22
- "nvidia": colors["green"],
23
- "groq": colors["orange"],
24
  }
25
 
26
 
@@ -35,6 +35,19 @@ class StageCount:
35
  self.assembles = int(np.sum(df["assembles"]))
36
 
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  def stages_count_summary(current_df: pd.DataFrame, prev_df: pd.DataFrame) -> None:
39
  """
40
  Show count of how many models compile, assemble, etc
@@ -476,14 +489,14 @@ def speedup_bar_chart(df: pd.DataFrame, baseline) -> None:
476
  )
477
 
478
 
479
- def kpi_to_markdown(compute_ratio, device, is_baseline=False, color="blue"):
480
 
481
  title = f"""<br><br>
482
  <p style="font-family:sans-serif; font-size: 20px;text-align: center;">Median {device} Acceleration ({len(compute_ratio)} models):</p>"""
483
  if is_baseline:
484
  return (
485
  title
486
- + f"""<p style="font-family:sans-serif; color:{colors[color]}; font-size: 26px;text-align: center;"> {1}x (Baseline)</p>"""
487
  )
488
 
489
  if len(compute_ratio) > 0:
@@ -497,8 +510,8 @@ def kpi_to_markdown(compute_ratio, device, is_baseline=False, color="blue"):
497
 
498
  return (
499
  title
500
- + f"""<p style="font-family:sans-serif; color:{colors[color]}; font-size: 26px;text-align: center;"> {kpi_median}x</p>
501
- <p style="font-family:sans-serif; color:{colors[color]}; font-size: 20px;text-align: center;"> min {kpi_min}x; max {kpi_max}x</p>
502
  """
503
  )
504
 
@@ -523,19 +536,19 @@ def speedup_text_summary(df: pd.DataFrame, baseline) -> None:
523
  x86_text = kpi_to_markdown(
524
  x86_compute_ratio,
525
  device="Intel(R) Xeon(R) X40 CPU @ 2.00GHz",
526
- color="blue",
527
  is_baseline=baseline == "x86",
528
  )
529
  groq_text = kpi_to_markdown(
530
  groq_compute_ratio,
531
  device="GroqChip 1",
532
- color="orange",
533
  is_baseline=baseline == "groq",
534
  )
535
  nvidia_text = kpi_to_markdown(
536
  nvidia_compute_ratio,
537
  device="NVIDIA A100-PCIE-40GB",
538
- color="green",
539
  is_baseline=baseline == "nvidia",
540
  )
541
 
@@ -613,3 +626,165 @@ def results_table(df: pd.DataFrame):
613
  df = df[[model_name in x for x in df["Model Name"]]]
614
 
615
  st.dataframe(df, height=min((len(df) + 1) * 35, 35 * 21))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  "ocean_green": "#3ba272",
19
  }
20
  device_colors = {
21
+ "x86": "#0071c5",
22
+ "nvidia": "#76b900",
23
+ "groq": "#F55036",
24
  }
25
 
26
 
 
35
  self.assembles = int(np.sum(df["assembles"]))
36
 
37
 
38
+ class DeviceStageCount:
39
+ def __init__(self, df: pd.DataFrame) -> None:
40
+ self.all_models = len(df)
41
+ self.base_onnx = int(np.sum(df["onnx_exported"]))
42
+ self.optimized_onnx = int(np.sum(df["onnx_optimized"]))
43
+ self.fp16_onnx = int(np.sum(df["onnx_converted"]))
44
+ self.x86 = df.loc[df.x86_latency != "-", "x86_latency"].count()
45
+ self.nvidia = df.loc[df.nvidia_latency != "-", "nvidia_latency"].count()
46
+ self.groq = df.loc[
47
+ df.groq_estimated_latency != "-", "groq_estimated_latency"
48
+ ].count()
49
+
50
+
51
  def stages_count_summary(current_df: pd.DataFrame, prev_df: pd.DataFrame) -> None:
52
  """
53
  Show count of how many models compile, assemble, etc
 
489
  )
490
 
491
 
492
+ def kpi_to_markdown(compute_ratio, device, is_baseline=False, color="#FFFFFF"):
493
 
494
  title = f"""<br><br>
495
  <p style="font-family:sans-serif; font-size: 20px;text-align: center;">Median {device} Acceleration ({len(compute_ratio)} models):</p>"""
496
  if is_baseline:
497
  return (
498
  title
499
+ + f"""<p style="font-family:sans-serif; color:{color}; font-size: 26px;text-align: center;"> {1}x (Baseline)</p>"""
500
  )
501
 
502
  if len(compute_ratio) > 0:
 
510
 
511
  return (
512
  title
513
+ + f"""<p style="font-family:sans-serif; color:{color}; font-size: 26px;text-align: center;"> {kpi_median}x</p>
514
+ <p style="font-family:sans-serif; color:{color}; font-size: 20px;text-align: center;"> min {kpi_min}x; max {kpi_max}x</p>
515
  """
516
  )
517
 
 
536
  x86_text = kpi_to_markdown(
537
  x86_compute_ratio,
538
  device="Intel(R) Xeon(R) X40 CPU @ 2.00GHz",
539
+ color=device_colors["x86"],
540
  is_baseline=baseline == "x86",
541
  )
542
  groq_text = kpi_to_markdown(
543
  groq_compute_ratio,
544
  device="GroqChip 1",
545
+ color=device_colors["groq"],
546
  is_baseline=baseline == "groq",
547
  )
548
  nvidia_text = kpi_to_markdown(
549
  nvidia_compute_ratio,
550
  device="NVIDIA A100-PCIE-40GB",
551
+ color=device_colors["nvidia"],
552
  is_baseline=baseline == "nvidia",
553
  )
554
 
 
626
  df = df[[model_name in x for x in df["Model Name"]]]
627
 
628
  st.dataframe(df, height=min((len(df) + 1) * 35, 35 * 21))
629
+
630
+
631
+ def device_funnel(df: pd.DataFrame) -> None:
632
+ """
633
+ Show count of how many models compile, assemble, etc
634
+ """
635
+ summ = DeviceStageCount(df)
636
+
637
+ stages = [
638
+ "All models",
639
+ "Export to ONNX",
640
+ "Optimize ONNX file",
641
+ "Convert to FP16",
642
+ "Acquire Performance",
643
+ ]
644
+ cols = st.columns(len(stages))
645
+
646
+ for idx, stage in enumerate(stages):
647
+ with cols[idx]:
648
+ st.markdown(stage)
649
+
650
+ # Show Sankey graph with percentages
651
+ sk_val = {
652
+ "All models": f"{summ.all_models} models - 100%",
653
+ "Convert to ONNX": f"{summ.base_onnx} models - "
654
+ + str(int(100 * summ.base_onnx / summ.all_models))
655
+ + "%",
656
+ "Optimize ONNX file": f"{summ.optimized_onnx} models - "
657
+ + str(int(100 * summ.optimized_onnx / summ.all_models))
658
+ + "%",
659
+ "Converts to FP16": f"{summ.fp16_onnx} models - "
660
+ + str(int(100 * summ.fp16_onnx / summ.all_models))
661
+ + "%",
662
+ "Acquires Nvidia Perf": f"{summ.nvidia} models - "
663
+ + str(int(100 * summ.nvidia / summ.all_models))
664
+ + "% (Nvidia)",
665
+ "Acquires Groq Perf": f"{summ.groq} models - "
666
+ + str(int(100 * summ.groq / summ.all_models))
667
+ + "% (Groq)",
668
+ "Acquires x86 Perf": f"{summ.x86} models - "
669
+ + str(int(100 * summ.x86 / summ.all_models))
670
+ + "% (x86)",
671
+ }
672
+ option = {
673
+ "series": {
674
+ "type": "sankey",
675
+ "animationDuration": 1,
676
+ "top": "0%",
677
+ "bottom": "20%",
678
+ "left": "0%",
679
+ "right": "19%",
680
+ "darkMode": "true",
681
+ "nodeWidth": 2,
682
+ "textStyle": {"fontSize": 16},
683
+ "nodeAlign": "left",
684
+ "lineStyle": {"curveness": 0},
685
+ "layoutIterations": 0,
686
+ "nodeGap": 12,
687
+ "layout": "none",
688
+ "emphasis": {"focus": "adjacency"},
689
+ "data": [
690
+ {
691
+ "name": "All models",
692
+ "value": sk_val["All models"],
693
+ "itemStyle": {"color": "white", "borderColor": "white"},
694
+ },
695
+ {
696
+ "name": "Convert to ONNX",
697
+ "value": sk_val["Convert to ONNX"],
698
+ "itemStyle": {"color": "white", "borderColor": "white"},
699
+ },
700
+ {
701
+ "name": "Optimize ONNX file",
702
+ "value": sk_val["Optimize ONNX file"],
703
+ "itemStyle": {"color": "white", "borderColor": "white"},
704
+ },
705
+ {
706
+ "name": "Converts to FP16",
707
+ "value": sk_val["Converts to FP16"],
708
+ "itemStyle": {"color": "white", "borderColor": "white"},
709
+ },
710
+ {
711
+ "name": "Acquires Nvidia Perf",
712
+ "value": sk_val["Acquires Nvidia Perf"],
713
+ "itemStyle": {
714
+ "color": device_colors["nvidia"],
715
+ "borderColor": device_colors["nvidia"],
716
+ },
717
+ },
718
+ {
719
+ "name": "Acquires Groq Perf",
720
+ "value": sk_val["Acquires Groq Perf"],
721
+ "itemStyle": {
722
+ "color": device_colors["groq"],
723
+ "borderColor": device_colors["groq"],
724
+ },
725
+ },
726
+ {
727
+ "name": "Acquires x86 Perf",
728
+ "value": sk_val["Acquires x86 Perf"],
729
+ "itemStyle": {
730
+ "color": device_colors["x86"],
731
+ "borderColor": device_colors["x86"],
732
+ },
733
+ },
734
+ ],
735
+ "label": {
736
+ "position": "insideTopLeft",
737
+ "borderWidth": 0,
738
+ "fontSize": 16,
739
+ "color": "white",
740
+ "textBorderWidth": 0,
741
+ "formatter": "{c}",
742
+ },
743
+ "links": [
744
+ {
745
+ "source": "All models",
746
+ "target": "Convert to ONNX",
747
+ "value": summ.all_models,
748
+ },
749
+ {
750
+ "source": "Convert to ONNX",
751
+ "target": "Optimize ONNX file",
752
+ "value": summ.optimized_onnx,
753
+ },
754
+ {
755
+ "source": "Optimize ONNX file",
756
+ "target": "Converts to FP16",
757
+ "value": summ.fp16_onnx,
758
+ },
759
+ {
760
+ "source": "Converts to FP16",
761
+ "target": "Acquires Nvidia Perf",
762
+ "value": int(
763
+ summ.nvidia
764
+ * summ.fp16_onnx
765
+ / (summ.x86 + summ.nvidia + summ.groq)
766
+ ),
767
+ },
768
+ {
769
+ "source": "Converts to FP16",
770
+ "target": "Acquires Groq Perf",
771
+ "value": int(
772
+ summ.groq
773
+ * summ.fp16_onnx
774
+ / (summ.x86 + summ.nvidia + summ.groq)
775
+ ),
776
+ },
777
+ {
778
+ "source": "Converts to FP16",
779
+ "target": "Acquires x86 Perf",
780
+ "value": int(
781
+ summ.x86 * summ.fp16_onnx / (summ.x86 + summ.nvidia + summ.groq)
782
+ ),
783
+ },
784
+ ],
785
+ }
786
+ }
787
+ st_echarts(
788
+ options=option,
789
+ height="70px",
790
+ )