can-it-run-llm

Sleeping

App Files Files Community

Vokturz commited on Oct 3, 2023

Commit

3fe032d

1 Parent(s): 1917818

first version

Browse files

Files changed (4) hide show

README.md +3 -3
data/gpu_specs.csv +0 -0
requirements.txt +5 -0
src/utils.py +103 -0

README.md CHANGED Viewed

@@ -1,11 +1,11 @@
 ---
-title: Can It Run Llm
-emoji: ⚡
 colorFrom: red
 colorTo: purple
 sdk: streamlit
 sdk_version: 1.26.0
-app_file: app.py
 pinned: false
 license: gpl-3.0
 ---

 ---
+title: Can It Run? LLM GPU check
+emoji: 🚀
 colorFrom: red
 colorTo: purple
 sdk: streamlit
 sdk_version: 1.26.0
+app_file: src/app.py
 pinned: false
 license: gpl-3.0
 ---

data/gpu_specs.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+accelerate @ git+https://github.com/huggingface/accelerate
+transformers @ git+https://github.com/huggingface/transformers
+huggingface_hub
+pandas
+plotly

src/utils.py ADDED Viewed

	@@ -0,0 +1,103 @@

+# using https://huggingface.co/spaces/hf-accelerate/model-memory-usage/blob/main/src/model_utils.py
+import torch
+from accelerate.commands.estimate import check_has_model, create_empty_model
+from urllib.parse import urlparse
+from accelerate.utils import calculate_maximum_sizes
+from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError
+import streamlit as st
+DTYPE_MODIFIER = {"float32": 1, "float16/bfloat16": 2, "int8": 4, "int4": 8}
+def translate_llama2(text):
+    "Translates llama-2 to its hf counterpart"
+    if not text.endswith("-hf"):
+        return text + "-hf"
+    return text
+def get_model(model_name: str, library: str, access_token: str):
+    "Finds and grabs model from the Hub, and initializes on `meta`"
+    if "meta-llama" in model_name:
+        model_name = translate_llama2(model_name)
+    if library == "auto":
+        library = None
+    model_name = extract_from_url(model_name)
+    try:
+        model = create_empty_model(model_name, library_name=library, trust_remote_code=True, access_token=access_token)
+    except GatedRepoError:
+        st.error(
+            f"Model `{model_name}` is a gated model, please ensure to pass in your access token and try again if you have access. You can find your access token here : https://huggingface.co/settings/tokens. "
+        )
+        st.stop()
+    except RepositoryNotFoundError:
+        st.error(f"Model `{model_name}` was not found on the Hub, please try another model name.")
+        st.stop()
+    except ValueError:
+        st.error(
+            f"Model `{model_name}` does not have any library metadata on the Hub, please manually select a library_name to use (such as `transformers`)"
+        )
+        st.stop()
+    except (RuntimeError, OSError) as e:
+        library = check_has_model(e)
+        if library != "unknown":
+            st.error(
+                f"Tried to load `{model_name}` with `{library}` but a possible model to load was not found inside the repo."
+            )
+            st.stop()
+        st.error(
+            f"Model `{model_name}` had an error, please open a discussion on the model's page with the error message and name: `{e}`"
+        )
+        st.stop()
+    except ImportError:
+        # hacky way to check if it works with `trust_remote_code=False`
+        model = create_empty_model(
+            model_name, library_name=library, trust_remote_code=False, access_token=access_token
+        )
+    except Exception as e:
+        st.error(
+            f"Model `{model_name}` had an error, please open a discussion on the model's page with the error message and name: `{e}`"
+        )
+        st.stop()
+    return model
+def extract_from_url(name: str):
+    "Checks if `name` is a URL, and if so converts it to a model name"
+    is_url = False
+    try:
+        result = urlparse(name)
+        is_url = all([result.scheme, result.netloc])
+    except Exception:
+        is_url = False
+    # Pass through if not a URL
+    if not is_url:
+        return name
+    else:
+        path = result.path
+        return path[1:]
+def calculate_memory(model: torch.nn.Module, options: list):
+    "Calculates the memory usage for a model init on `meta` device"
+    total_size, largest_layer = calculate_maximum_sizes(model)
+    num_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
+    data = []
+    for dtype in options:
+        dtype_total_size = total_size
+        dtype_largest_layer = largest_layer[0]
+        modifier = DTYPE_MODIFIER[dtype]
+        dtype_total_size /= modifier
+        dtype_largest_layer /= modifier
+        dtype_training_size = dtype_total_size * 4 / (1024**3)
+        dtype_inference = dtype_total_size * 1.2  / (1024**3)
+        dtype_total_size = dtype_total_size  / (1024**3)
+        data.append(
+            {
+                "dtype": dtype,
+                "Total Size (GB)": dtype_total_size,
+                "Inference (GB)" : dtype_inference,
+                "Training using Adam (GB)": dtype_training_size,
+                "Parameters (Billion)" : num_parameters / 1e9
+            }
+        )
+    return data