harmdevries commited on
Commit
32aafee
·
1 Parent(s): 52a11ab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -12
app.py CHANGED
@@ -34,7 +34,7 @@ TFLOPS = GPU_EFFICIENCY*TFLOPS
34
 
35
  # in ms
36
  def calc_exec_time(comp_flop, mem_bytes, include_overhead=True):
37
- exec_time = comp_flop/TFLOPS + mem_bytes/GB_S
38
  exec_time *= 1000
39
  if include_overhead:
40
  exec_time = max(exec_time, THREAD_OVERHEAD)
@@ -169,24 +169,21 @@ st.latex("A \in \mathbb{R}^{MxK}, B \in R^{KxN}, C \in \mathbb{R}^{MxN}")
169
  st.markdown('''
170
  To execute this operation on the GPU, we need to
171
  1. Read A, B from memory
172
- 2. Perform math operations
173
  3. Write C to memory
174
  ''')
175
 
176
-
177
- st.latex('''
178
- For float16 operations (2 bytes), we can estimate the memory access time of A as follows:
179
- T_mem(A) = 2*M*K / BW_mem
180
- where BW_mem is the memory bandwidth of the GPU (e.g. 1935 GB/s for A100)
181
- ''')
182
-
183
  st.markdown("For float16 operations (2 bytes), we can estimate the memory access time of A as follows:")
184
  st.latex("T_{mem}(A) = 2*M*K / BW_{mem}")
185
- st.markdown("where BW_mem is the memory bandwidth of the GPU (e.g. 1935 GB/s for A100)")
186
-
187
-
188
 
 
 
 
189
 
 
 
190
 
191
  breakdown = st.checkbox("Show breakdown per operation")
192
  if breakdown:
 
34
 
35
  # in ms
36
  def calc_exec_time(comp_flop, mem_bytes, include_overhead=True):
37
+ exec_time = max(comp_flop/TFLOPS, mem_bytes/GB_S)
38
  exec_time *= 1000
39
  if include_overhead:
40
  exec_time = max(exec_time, THREAD_OVERHEAD)
 
169
  st.markdown('''
170
  To execute this operation on the GPU, we need to
171
  1. Read A, B from memory
172
+ 2. Perform matrix multiplication
173
  3. Write C to memory
174
  ''')
175
 
 
 
 
 
 
 
 
176
  st.markdown("For float16 operations (2 bytes), we can estimate the memory access time of A as follows:")
177
  st.latex("T_{mem}(A) = 2*M*K / BW_{mem}")
178
+ st.markdown("where BW_mem is the memory bandwidth of the GPU (e.g. 1935 GB/s for an A100 GPU)")
179
+ st.markdown("The total time on memory access is T_mem = T_mem(A) + T_mem(B) + T_mem(C)")
 
180
 
181
+ st.markdown("We can estimate the compute time for the math operations as follows:")
182
+ st.latex("T_{math}(A \cdot B) = 2*M*K*N / BW_{math}")
183
+ st.markdown("where BW_math is the number of floating point operations per second (e.g. 312 TFLOPS for an A100 GPU)")
184
 
185
+ st.markdown("If we assume we can *perfectly* overlap memory access with math operations, then the estimated execution time for the operation is:")
186
+ st.latex("max(T_math, T_mem)")
187
 
188
  breakdown = st.checkbox("Show breakdown per operation")
189
  if breakdown: