Spaces:
Runtime error
Runtime error
Vokturz
commited on
Commit
·
d37299b
1
Parent(s):
0cc3d3a
added Apple vendor
Browse files- data/gpu_specs.csv +19 -0
- src/app.py +38 -17
data/gpu_specs.csv
CHANGED
@@ -932,3 +932,22 @@ Data Center GPU Max 1100,Ponte Vecchio,"Jan 10th, 2023",PCIe 5.0 x16,"48 GB, HBM
|
|
932 |
Data Center GPU Max 1350,Ponte Vecchio,"Jan 10th, 2023",PCIe 5.0 x16,"96 GB, HBM2e, 8192 bit",750 MHz,1200 MHz,14336 / 896 / 0,96.0,Intel,2023
|
933 |
Data Center GPU Max 1550,Ponte Vecchio,"Jan 10th, 2023",PCIe 5.0 x16,"128 GB, HBM2e, 8192 bit",900 MHz,1600 MHz,16384 / 1024 / 0,128.0,Intel,2023
|
934 |
Data Center GPU Max Subsystem,Ponte Vecchio,"Jan 10th, 2023",PCIe 5.0 x16,"128 GB, HBM2e, 8192 bit",900 MHz,1565 MHz,16384 / 1024 / 0,128.0,Intel,2023
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
932 |
Data Center GPU Max 1350,Ponte Vecchio,"Jan 10th, 2023",PCIe 5.0 x16,"96 GB, HBM2e, 8192 bit",750 MHz,1200 MHz,14336 / 896 / 0,96.0,Intel,2023
|
933 |
Data Center GPU Max 1550,Ponte Vecchio,"Jan 10th, 2023",PCIe 5.0 x16,"128 GB, HBM2e, 8192 bit",900 MHz,1600 MHz,16384 / 1024 / 0,128.0,Intel,2023
|
934 |
Data Center GPU Max Subsystem,Ponte Vecchio,"Jan 10th, 2023",PCIe 5.0 x16,"128 GB, HBM2e, 8192 bit",900 MHz,1565 MHz,16384 / 1024 / 0,128.0,Intel,2023
|
935 |
+
M1 8 GB,M1,"Nov 10th, 2020",None,"8 GB, LPDDR4X, 128 bit",None,None,None,8.0,Apple,2020
|
936 |
+
M1 16 GB,M1,"Nov 10th, 2020",None,"16 GB, LPDDR4X, 128 bit",None,None,None,16.0,Apple,2020
|
937 |
+
M1 Pro 16 GB,M1 Pro,"Oct 18th, 2021",None,"16 GB, LPDDR5, 256 bit",None,None,None,16.0,Apple,2021
|
938 |
+
M1 Pro 32 GB,M1 Pro,"Oct 18th, 2021",None,"32 GB, LPDDR5, 256 bit",None,None,None,32.0,Apple,2021
|
939 |
+
M1 Max 32 GB,M1 Max,"Oct 18th, 2021",None,"32 GB, LPDDR5, 512 bit",None,None,None,32.0,Apple,2021
|
940 |
+
M1 Max 64 GB,M1 Max,"Oct 18th, 2021",None,"64 GB, LPDDR5, 512 bit",None,None,None,64.0,Apple,2021
|
941 |
+
M1 Ultra 64 GB,M1 Ultra,"Mar 18th, 2022",None,"64 GB, LPDDR5, 1024 bit",None,None,None,64.0,Apple,2022
|
942 |
+
M1 Ultra 128 GB,M1 Ultra,"Mar 18th, 2022",None,"128 GB, LPDDR5, 1024 bit",None,None,None,128.0,Apple,2022
|
943 |
+
M2 8 GB,M2,"Jun 24th, 2022",None,"8 GB, LPDDR5, 128 bit",None,None,None,8.0,Apple,2022
|
944 |
+
M2 16 GB,M2,"Jun 10th, 2020",None,"16 GB, LPDDR5, 128 bit",None,None,None,16.0,Apple,2022
|
945 |
+
M2 24 GB,M2,"Jun 10th, 2020",None,"24 GB, LPDDR5, 128 bit",None,None,None,24.0,Apple,2022
|
946 |
+
M2 Pro 32 GB,M2 Pro,"Jan 17th, 2023",None,"32 GB, LPDDR5, 256 bit",None,None,None,32.0,Apple,2023
|
947 |
+
M2 Pro 64 GB,M2 Pro,"Jan 17th, 2023",None,"64 GB, LPDDR5, 256 bit",None,None,None,64.0,Apple,2023
|
948 |
+
M2 Max 32 GB,M2 Max,"Jan 17th, 2020",None,"32 GB, LPDDR5, 512 bit",None,None,None,32.0,Apple,2023
|
949 |
+
M2 Max 64 GB,M2 Max,"Jan 17th, 2020",None,"64 GB, LPDDR5, 512 bit",None,None,None,64.0,Apple,2023
|
950 |
+
M2 Max 96 GB,M2 Max,"Jan 17th, 2020",None,"96 GB, LPDDR5, 512 bit",None,None,None,96.0,Apple,2023
|
951 |
+
M2 Ultra 64 GB,M2 Ultra,"Jun 13th, 2023",None,"64 GB, LPDDR5, 1024 bit",None,None,None,64.0,Apple,2023
|
952 |
+
M2 Ultra 128 GB,M2 Ultra,"Jun 13th, 2023",None,"128 GB, LPDDR5, 1024 bit",None,None,None,128.0,Apple,2023
|
953 |
+
M2 Ultra 192 GB,M2 Ultra,"Jun 13th, 2023",None,"192 GB, LPDDR5, 1024 bit",None,None,None,192.0,Apple,2023
|
src/app.py
CHANGED
@@ -27,22 +27,32 @@ def get_mistralai_table():
|
|
27 |
model = get_model("mistralai/Mistral-7B-v0.1", library="transformers", access_token="")
|
28 |
return calculate_memory(model, ["float32", "float16/bfloat16", "int8", "int4"])
|
29 |
|
30 |
-
def show_gpu_info(info, trainable_params=0):
|
31 |
for var in ['Inference', 'Full Training Adam', 'LoRa Fine-tuning']:
|
32 |
_info = info.loc[var]
|
33 |
-
if
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
else:
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
|
|
|
|
46 |
func(msg, icon=icon)
|
47 |
|
48 |
|
@@ -65,7 +75,6 @@ with col.expander("Information", expanded=True):
|
|
65 |
st.latex(r"""\text{Memory}_\text{Inference} \approx \text{Model Size} \times 1.2""")
|
66 |
st.markdown("""- For LoRa Fine-tuning, I'm asuming a **16-bit** dtype of trainable parameters. The formula (in terms of GB) is""")
|
67 |
st.latex(r"\text{Memory}_\text{LoRa} \approx \text{Model Size} + \left(\text{ \# trainable Params}_\text{Billions}\times\frac{16}{8} \times 4\right) \times 1.2")
|
68 |
-
st.markdown("- You can understand `int4` as models in `GPTQ-4bit`, `AWQ-4bit` or `Q4_0 GGUF/GGML` formats")
|
69 |
|
70 |
access_token = st.sidebar.text_input("Access token")
|
71 |
model_name = st.sidebar.text_input("Model name", value="mistralai/Mistral-7B-v0.1")
|
@@ -89,7 +98,7 @@ if model_name not in st.session_state:
|
|
89 |
st.session_state['actual_model'] = model_name
|
90 |
|
91 |
|
92 |
-
gpu_vendor = st.sidebar.selectbox("GPU Vendor", ["NVIDIA", "AMD", "Intel"])
|
93 |
# year = st.sidebar.selectbox("Filter by Release Year", list(range(2014, 2024))[::-1], index=None)
|
94 |
gpu_info = gpu_specs[gpu_specs['Vendor'] == gpu_vendor].sort_values('Product Name')
|
95 |
# if year:
|
@@ -122,6 +131,10 @@ _memory_table.columns = ['Inference', 'Full Training Adam', 'LoRa Fine-tuning']
|
|
122 |
_memory_table = _memory_table.stack().reset_index()
|
123 |
_memory_table.columns = ['dtype', 'Variable', 'Number of GPUs']
|
124 |
col1, col2 = st.columns([1,1.3])
|
|
|
|
|
|
|
|
|
125 |
with col1:
|
126 |
st.write(f"#### [{model_name}](https://huggingface.co/{model_name}) ({custom_ceil(memory_table.iloc[3,0],1):.1f}B)")
|
127 |
|
@@ -129,15 +142,23 @@ with col1:
|
|
129 |
tabs = st.tabs(dtypes)
|
130 |
for dtype, tab in zip(dtypes, tabs):
|
131 |
with tab:
|
|
|
|
|
|
|
132 |
info = _memory_table[_memory_table['dtype'] == dtype].set_index('Variable')
|
133 |
-
show_gpu_info(info, lora_pct)
|
134 |
st.write(memory_table.iloc[[0, 1, 2, 4]])
|
135 |
with col2:
|
|
|
|
|
|
|
|
|
136 |
num_colors= 4
|
137 |
colors = [px.colors.sequential.RdBu[int(i*(len(px.colors.sequential.RdBu)-1)/(num_colors-1))] for i in range(num_colors)]
|
138 |
fig = px.bar(_memory_table, x='Variable', y='Number of GPUs', color='dtype', barmode='group', color_discrete_sequence=colors)
|
139 |
-
fig.update_layout(title=dict(text=f"Number of GPUs required for<br> {get_name(gpu)}", font=dict(size=25))
|
140 |
, xaxis_tickfont_size=14, yaxis_tickfont_size=16, yaxis_dtick='1')
|
141 |
st.plotly_chart(fig, use_container_width=True)
|
142 |
|
143 |
|
|
|
|
27 |
model = get_model("mistralai/Mistral-7B-v0.1", library="transformers", access_token="")
|
28 |
return calculate_memory(model, ["float32", "float16/bfloat16", "int8", "int4"])
|
29 |
|
30 |
+
def show_gpu_info(info, trainable_params=0, vendor=""):
|
31 |
for var in ['Inference', 'Full Training Adam', 'LoRa Fine-tuning']:
|
32 |
_info = info.loc[var]
|
33 |
+
if vendor != "Apple":
|
34 |
+
if _info['Number of GPUs'] >= 3:
|
35 |
+
func = st.error
|
36 |
+
icon = "⛔"
|
37 |
+
elif _info['Number of GPUs'] == 2:
|
38 |
+
func = st.warning
|
39 |
+
icon = "⚠️"
|
40 |
+
else:
|
41 |
+
func = st.success
|
42 |
+
icon = "✅"
|
43 |
+
|
44 |
+
msg = f"You require **{_info['Number of GPUs']}** GPUs for **{var}**"
|
45 |
+
if var == 'LoRa Fine-tuning':
|
46 |
+
msg += f" ({trainable_params}%)"
|
47 |
else:
|
48 |
+
if _info['Number of GPUs']==1:
|
49 |
+
msg = f"You can run **{var}**"
|
50 |
+
func = st.success
|
51 |
+
icon = "✅"
|
52 |
+
else:
|
53 |
+
msg = f"You cannot run **{var}**"
|
54 |
+
func = st.error
|
55 |
+
icon = "⛔"
|
56 |
func(msg, icon=icon)
|
57 |
|
58 |
|
|
|
75 |
st.latex(r"""\text{Memory}_\text{Inference} \approx \text{Model Size} \times 1.2""")
|
76 |
st.markdown("""- For LoRa Fine-tuning, I'm asuming a **16-bit** dtype of trainable parameters. The formula (in terms of GB) is""")
|
77 |
st.latex(r"\text{Memory}_\text{LoRa} \approx \text{Model Size} + \left(\text{ \# trainable Params}_\text{Billions}\times\frac{16}{8} \times 4\right) \times 1.2")
|
|
|
78 |
|
79 |
access_token = st.sidebar.text_input("Access token")
|
80 |
model_name = st.sidebar.text_input("Model name", value="mistralai/Mistral-7B-v0.1")
|
|
|
98 |
st.session_state['actual_model'] = model_name
|
99 |
|
100 |
|
101 |
+
gpu_vendor = st.sidebar.selectbox("GPU Vendor", ["NVIDIA", "AMD", "Intel", "Apple"])
|
102 |
# year = st.sidebar.selectbox("Filter by Release Year", list(range(2014, 2024))[::-1], index=None)
|
103 |
gpu_info = gpu_specs[gpu_specs['Vendor'] == gpu_vendor].sort_values('Product Name')
|
104 |
# if year:
|
|
|
131 |
_memory_table = _memory_table.stack().reset_index()
|
132 |
_memory_table.columns = ['dtype', 'Variable', 'Number of GPUs']
|
133 |
col1, col2 = st.columns([1,1.3])
|
134 |
+
|
135 |
+
if gpu_vendor == "Apple":
|
136 |
+
col.warning("""For M1/M2 Apple chips, PyTorch uses [Metal Performance Shaders (MPS)](https://huggingface.co/docs/accelerate/usage_guides/mps) as backend.\\
|
137 |
+
Remember that Apple M1/M2 chips share memory between CPU and GPU.""", icon="⚠️")
|
138 |
with col1:
|
139 |
st.write(f"#### [{model_name}](https://huggingface.co/{model_name}) ({custom_ceil(memory_table.iloc[3,0],1):.1f}B)")
|
140 |
|
|
|
142 |
tabs = st.tabs(dtypes)
|
143 |
for dtype, tab in zip(dtypes, tabs):
|
144 |
with tab:
|
145 |
+
if dtype in ["int4", "int8"]:
|
146 |
+
_dtype = dtype.replace("int", "")
|
147 |
+
st.markdown(f"`int{_dtype}` refers to models in `GPTQ-{_dtype}bit`, `AWQ-{_dtype}bit` or `Q{_dtype}_0 GGUF/GGML`")
|
148 |
info = _memory_table[_memory_table['dtype'] == dtype].set_index('Variable')
|
149 |
+
show_gpu_info(info, lora_pct, gpu_vendor)
|
150 |
st.write(memory_table.iloc[[0, 1, 2, 4]])
|
151 |
with col2:
|
152 |
+
extra = ""
|
153 |
+
if gpu_vendor == "Apple":
|
154 |
+
st.warning("This graph is irrelevant for M1/M2 chips as they can't run in parallel.", icon="⚠️")
|
155 |
+
extra = "⚠️"
|
156 |
num_colors= 4
|
157 |
colors = [px.colors.sequential.RdBu[int(i*(len(px.colors.sequential.RdBu)-1)/(num_colors-1))] for i in range(num_colors)]
|
158 |
fig = px.bar(_memory_table, x='Variable', y='Number of GPUs', color='dtype', barmode='group', color_discrete_sequence=colors)
|
159 |
+
fig.update_layout(title=dict(text=f"{extra} Number of GPUs required for<br> {get_name(gpu)}", font=dict(size=25))
|
160 |
, xaxis_tickfont_size=14, yaxis_tickfont_size=16, yaxis_dtick='1')
|
161 |
st.plotly_chart(fig, use_container_width=True)
|
162 |
|
163 |
|
164 |
+
|