can-it-run-llm

Sleeping

App Files Files Community

Vokturz commited on Oct 5, 2023

Commit

d37299b

•

1 Parent(s): 0cc3d3a

added Apple vendor

Browse files

Files changed (2) hide show

data/gpu_specs.csv +19 -0
src/app.py +38 -17

data/gpu_specs.csv CHANGED Viewed

@@ -932,3 +932,22 @@ Data Center GPU Max 1100,Ponte Vecchio,"Jan 10th, 2023",PCIe 5.0 x16,"48 GB, HBM
 Data Center GPU Max 1350,Ponte Vecchio,"Jan 10th, 2023",PCIe 5.0 x16,"96 GB, HBM2e, 8192 bit",750 MHz,1200 MHz,14336 / 896 / 0,96.0,Intel,2023
 Data Center GPU Max 1550,Ponte Vecchio,"Jan 10th, 2023",PCIe 5.0 x16,"128 GB, HBM2e, 8192 bit",900 MHz,1600 MHz,16384 / 1024 / 0,128.0,Intel,2023
 Data Center GPU Max Subsystem,Ponte Vecchio,"Jan 10th, 2023",PCIe 5.0 x16,"128 GB, HBM2e, 8192 bit",900 MHz,1565 MHz,16384 / 1024 / 0,128.0,Intel,2023

 Data Center GPU Max 1350,Ponte Vecchio,"Jan 10th, 2023",PCIe 5.0 x16,"96 GB, HBM2e, 8192 bit",750 MHz,1200 MHz,14336 / 896 / 0,96.0,Intel,2023
 Data Center GPU Max 1550,Ponte Vecchio,"Jan 10th, 2023",PCIe 5.0 x16,"128 GB, HBM2e, 8192 bit",900 MHz,1600 MHz,16384 / 1024 / 0,128.0,Intel,2023
 Data Center GPU Max Subsystem,Ponte Vecchio,"Jan 10th, 2023",PCIe 5.0 x16,"128 GB, HBM2e, 8192 bit",900 MHz,1565 MHz,16384 / 1024 / 0,128.0,Intel,2023
+M1 8 GB,M1,"Nov 10th, 2020",None,"8 GB, LPDDR4X, 128 bit",None,None,None,8.0,Apple,2020
+M1 16 GB,M1,"Nov 10th, 2020",None,"16 GB, LPDDR4X, 128 bit",None,None,None,16.0,Apple,2020
+M1 Pro 16 GB,M1 Pro,"Oct 18th, 2021",None,"16 GB, LPDDR5, 256 bit",None,None,None,16.0,Apple,2021
+M1 Pro 32 GB,M1 Pro,"Oct 18th, 2021",None,"32 GB, LPDDR5, 256 bit",None,None,None,32.0,Apple,2021
+M1 Max 32 GB,M1 Max,"Oct 18th, 2021",None,"32 GB, LPDDR5, 512 bit",None,None,None,32.0,Apple,2021
+M1 Max 64 GB,M1 Max,"Oct 18th, 2021",None,"64 GB, LPDDR5, 512 bit",None,None,None,64.0,Apple,2021
+M1 Ultra 64 GB,M1 Ultra,"Mar 18th, 2022",None,"64 GB, LPDDR5, 1024 bit",None,None,None,64.0,Apple,2022
+M1 Ultra 128 GB,M1 Ultra,"Mar 18th, 2022",None,"128 GB, LPDDR5, 1024 bit",None,None,None,128.0,Apple,2022
+M2 8 GB,M2,"Jun 24th, 2022",None,"8 GB, LPDDR5, 128 bit",None,None,None,8.0,Apple,2022
+M2 16 GB,M2,"Jun 10th, 2020",None,"16 GB, LPDDR5, 128 bit",None,None,None,16.0,Apple,2022
+M2 24 GB,M2,"Jun 10th, 2020",None,"24 GB, LPDDR5, 128 bit",None,None,None,24.0,Apple,2022
+M2 Pro 32 GB,M2 Pro,"Jan 17th, 2023",None,"32 GB, LPDDR5, 256 bit",None,None,None,32.0,Apple,2023
+M2 Pro 64 GB,M2 Pro,"Jan 17th, 2023",None,"64 GB, LPDDR5, 256 bit",None,None,None,64.0,Apple,2023
+M2 Max 32 GB,M2 Max,"Jan 17th, 2020",None,"32 GB, LPDDR5, 512 bit",None,None,None,32.0,Apple,2023
+M2 Max 64 GB,M2 Max,"Jan 17th, 2020",None,"64 GB, LPDDR5, 512 bit",None,None,None,64.0,Apple,2023
+M2 Max 96 GB,M2 Max,"Jan 17th, 2020",None,"96 GB, LPDDR5, 512 bit",None,None,None,96.0,Apple,2023
+M2 Ultra 64 GB,M2 Ultra,"Jun 13th, 2023",None,"64 GB, LPDDR5, 1024 bit",None,None,None,64.0,Apple,2023
+M2 Ultra 128 GB,M2 Ultra,"Jun 13th, 2023",None,"128 GB, LPDDR5, 1024 bit",None,None,None,128.0,Apple,2023
+M2 Ultra 192 GB,M2 Ultra,"Jun 13th, 2023",None,"192 GB, LPDDR5, 1024 bit",None,None,None,192.0,Apple,2023

src/app.py CHANGED Viewed

@@ -27,22 +27,32 @@ def get_mistralai_table():
     model = get_model("mistralai/Mistral-7B-v0.1", library="transformers", access_token="")
     return calculate_memory(model, ["float32", "float16/bfloat16", "int8", "int4"])
-def show_gpu_info(info, trainable_params=0):
     for var in ['Inference', 'Full Training Adam', 'LoRa Fine-tuning']:
         _info = info.loc[var]
-        if _info['Number of GPUs'] >= 3:
-            func = st.error
-            icon = "⛔"
-        elif _info['Number of GPUs'] == 2:
-            func = st.warning
-            icon = "⚠️"
         else:
-            func = st.success
-            icon = "✅"
-        msg = f"You require **{_info['Number of GPUs']}** GPUs for **{var}**"
-        if var == 'LoRa Fine-tuning':
-            msg += f" ({trainable_params}%)"
         func(msg, icon=icon)
@@ -65,7 +75,6 @@ with col.expander("Information", expanded=True):
     st.latex(r"""\text{Memory}_\text{Inference} \approx \text{Model Size} \times 1.2""")
     st.markdown("""- For LoRa Fine-tuning, I'm asuming a **16-bit** dtype of trainable parameters. The formula (in terms of GB) is""")
     st.latex(r"\text{Memory}_\text{LoRa} \approx \text{Model Size} + \left(\text{ \# trainable Params}_\text{Billions}\times\frac{16}{8} \times 4\right) \times 1.2")
-    st.markdown("- You can understand `int4` as models in `GPTQ-4bit`, `AWQ-4bit` or `Q4_0 GGUF/GGML` formats")
 access_token = st.sidebar.text_input("Access token")
 model_name = st.sidebar.text_input("Model name", value="mistralai/Mistral-7B-v0.1")
@@ -89,7 +98,7 @@ if model_name not in st.session_state:
     st.session_state['actual_model'] = model_name
-gpu_vendor = st.sidebar.selectbox("GPU Vendor", ["NVIDIA", "AMD", "Intel"])
 # year = st.sidebar.selectbox("Filter by Release Year", list(range(2014, 2024))[::-1], index=None)
 gpu_info = gpu_specs[gpu_specs['Vendor'] == gpu_vendor].sort_values('Product Name')
 # if year:
@@ -122,6 +131,10 @@ _memory_table.columns = ['Inference', 'Full Training Adam', 'LoRa Fine-tuning']
 _memory_table = _memory_table.stack().reset_index()
 _memory_table.columns = ['dtype', 'Variable', 'Number of GPUs']
 col1, col2 = st.columns([1,1.3])
 with col1:
     st.write(f"####  [{model_name}](https://huggingface.co/{model_name}) ({custom_ceil(memory_table.iloc[3,0],1):.1f}B)")
@@ -129,15 +142,23 @@ with col1:
     tabs = st.tabs(dtypes)
     for dtype, tab in zip(dtypes, tabs):
         with tab:
             info = _memory_table[_memory_table['dtype'] == dtype].set_index('Variable')
-            show_gpu_info(info, lora_pct)
     st.write(memory_table.iloc[[0, 1, 2, 4]])
 with col2:
     num_colors= 4
     colors = [px.colors.sequential.RdBu[int(i*(len(px.colors.sequential.RdBu)-1)/(num_colors-1))] for i in range(num_colors)]
     fig = px.bar(_memory_table, x='Variable', y='Number of GPUs', color='dtype', barmode='group', color_discrete_sequence=colors)
-    fig.update_layout(title=dict(text=f"Number of GPUs required for<br> {get_name(gpu)}", font=dict(size=25))
                     , xaxis_tickfont_size=14, yaxis_tickfont_size=16, yaxis_dtick='1')
     st.plotly_chart(fig, use_container_width=True)

     model = get_model("mistralai/Mistral-7B-v0.1", library="transformers", access_token="")
     return calculate_memory(model, ["float32", "float16/bfloat16", "int8", "int4"])
+def show_gpu_info(info, trainable_params=0, vendor=""):
     for var in ['Inference', 'Full Training Adam', 'LoRa Fine-tuning']:
         _info = info.loc[var]
+        if vendor != "Apple":
+            if _info['Number of GPUs'] >= 3:
+                func = st.error
+                icon = "⛔"
+            elif _info['Number of GPUs'] == 2:
+                func = st.warning
+                icon = "⚠️"
+            else:
+                func = st.success
+                icon = "✅"
+            msg = f"You require **{_info['Number of GPUs']}** GPUs for **{var}**"
+            if var == 'LoRa Fine-tuning':
+                msg += f" ({trainable_params}%)"
         else:
+            if _info['Number of GPUs']==1:
+                msg = f"You can run **{var}**"
+                func = st.success
+                icon = "✅"
+            else:
+                msg = f"You cannot run **{var}**"
+                func = st.error
+                icon = "⛔"
         func(msg, icon=icon)
     st.latex(r"""\text{Memory}_\text{Inference} \approx \text{Model Size} \times 1.2""")
     st.markdown("""- For LoRa Fine-tuning, I'm asuming a **16-bit** dtype of trainable parameters. The formula (in terms of GB) is""")
     st.latex(r"\text{Memory}_\text{LoRa} \approx \text{Model Size} + \left(\text{ \# trainable Params}_\text{Billions}\times\frac{16}{8} \times 4\right) \times 1.2")
 access_token = st.sidebar.text_input("Access token")
 model_name = st.sidebar.text_input("Model name", value="mistralai/Mistral-7B-v0.1")
     st.session_state['actual_model'] = model_name
+gpu_vendor = st.sidebar.selectbox("GPU Vendor", ["NVIDIA", "AMD", "Intel", "Apple"])
 # year = st.sidebar.selectbox("Filter by Release Year", list(range(2014, 2024))[::-1], index=None)
 gpu_info = gpu_specs[gpu_specs['Vendor'] == gpu_vendor].sort_values('Product Name')
 # if year:
 _memory_table = _memory_table.stack().reset_index()
 _memory_table.columns = ['dtype', 'Variable', 'Number of GPUs']
 col1, col2 = st.columns([1,1.3])
+if gpu_vendor == "Apple":
+    col.warning("""For M1/M2 Apple chips, PyTorch uses [Metal Performance Shaders (MPS)](https://huggingface.co/docs/accelerate/usage_guides/mps) as backend.\\
+Remember that Apple M1/M2 chips share memory between CPU and GPU.""", icon="⚠️")
 with col1:
     st.write(f"####  [{model_name}](https://huggingface.co/{model_name}) ({custom_ceil(memory_table.iloc[3,0],1):.1f}B)")
     tabs = st.tabs(dtypes)
     for dtype, tab in zip(dtypes, tabs):
         with tab:
+            if dtype in ["int4", "int8"]:
+                _dtype = dtype.replace("int", "")
+                st.markdown(f"`int{_dtype}` refers to models in `GPTQ-{_dtype}bit`, `AWQ-{_dtype}bit` or `Q{_dtype}_0 GGUF/GGML`")
             info = _memory_table[_memory_table['dtype'] == dtype].set_index('Variable')
+            show_gpu_info(info, lora_pct, gpu_vendor)
     st.write(memory_table.iloc[[0, 1, 2, 4]])
 with col2:
+    extra = ""
+    if gpu_vendor == "Apple":
+        st.warning("This graph is irrelevant for M1/M2 chips as they can't run in parallel.", icon="⚠️")
+        extra = "⚠️"
     num_colors= 4
     colors = [px.colors.sequential.RdBu[int(i*(len(px.colors.sequential.RdBu)-1)/(num_colors-1))] for i in range(num_colors)]
     fig = px.bar(_memory_table, x='Variable', y='Number of GPUs', color='dtype', barmode='group', color_discrete_sequence=colors)
+    fig.update_layout(title=dict(text=f"{extra} Number of GPUs required for<br> {get_name(gpu)}", font=dict(size=25))
                     , xaxis_tickfont_size=14, yaxis_tickfont_size=16, yaxis_dtick='1')
     st.plotly_chart(fig, use_container_width=True)