Vokturz commited on
Commit
3fe032d
1 Parent(s): 1917818

first version

Browse files
Files changed (4) hide show
  1. README.md +3 -3
  2. data/gpu_specs.csv +0 -0
  3. requirements.txt +5 -0
  4. src/utils.py +103 -0
README.md CHANGED
@@ -1,11 +1,11 @@
1
  ---
2
- title: Can It Run Llm
3
- emoji:
4
  colorFrom: red
5
  colorTo: purple
6
  sdk: streamlit
7
  sdk_version: 1.26.0
8
- app_file: app.py
9
  pinned: false
10
  license: gpl-3.0
11
  ---
 
1
  ---
2
+ title: Can It Run? LLM GPU check
3
+ emoji: 🚀
4
  colorFrom: red
5
  colorTo: purple
6
  sdk: streamlit
7
  sdk_version: 1.26.0
8
+ app_file: src/app.py
9
  pinned: false
10
  license: gpl-3.0
11
  ---
data/gpu_specs.csv ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ accelerate @ git+https://github.com/huggingface/accelerate
2
+ transformers @ git+https://github.com/huggingface/transformers
3
+ huggingface_hub
4
+ pandas
5
+ plotly
src/utils.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # using https://huggingface.co/spaces/hf-accelerate/model-memory-usage/blob/main/src/model_utils.py
2
+
3
+ import torch
4
+ from accelerate.commands.estimate import check_has_model, create_empty_model
5
+ from urllib.parse import urlparse
6
+ from accelerate.utils import calculate_maximum_sizes
7
+ from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError
8
+ import streamlit as st
9
+
10
+ DTYPE_MODIFIER = {"float32": 1, "float16/bfloat16": 2, "int8": 4, "int4": 8}
11
+
12
+ def translate_llama2(text):
13
+ "Translates llama-2 to its hf counterpart"
14
+ if not text.endswith("-hf"):
15
+ return text + "-hf"
16
+ return text
17
+
18
+ def get_model(model_name: str, library: str, access_token: str):
19
+ "Finds and grabs model from the Hub, and initializes on `meta`"
20
+ if "meta-llama" in model_name:
21
+ model_name = translate_llama2(model_name)
22
+ if library == "auto":
23
+ library = None
24
+ model_name = extract_from_url(model_name)
25
+ try:
26
+ model = create_empty_model(model_name, library_name=library, trust_remote_code=True, access_token=access_token)
27
+ except GatedRepoError:
28
+ st.error(
29
+ f"Model `{model_name}` is a gated model, please ensure to pass in your access token and try again if you have access. You can find your access token here : https://huggingface.co/settings/tokens. "
30
+ )
31
+ st.stop()
32
+ except RepositoryNotFoundError:
33
+ st.error(f"Model `{model_name}` was not found on the Hub, please try another model name.")
34
+ st.stop()
35
+ except ValueError:
36
+ st.error(
37
+ f"Model `{model_name}` does not have any library metadata on the Hub, please manually select a library_name to use (such as `transformers`)"
38
+ )
39
+ st.stop()
40
+ except (RuntimeError, OSError) as e:
41
+ library = check_has_model(e)
42
+ if library != "unknown":
43
+ st.error(
44
+ f"Tried to load `{model_name}` with `{library}` but a possible model to load was not found inside the repo."
45
+ )
46
+ st.stop()
47
+ st.error(
48
+ f"Model `{model_name}` had an error, please open a discussion on the model's page with the error message and name: `{e}`"
49
+ )
50
+ st.stop()
51
+ except ImportError:
52
+ # hacky way to check if it works with `trust_remote_code=False`
53
+ model = create_empty_model(
54
+ model_name, library_name=library, trust_remote_code=False, access_token=access_token
55
+ )
56
+ except Exception as e:
57
+ st.error(
58
+ f"Model `{model_name}` had an error, please open a discussion on the model's page with the error message and name: `{e}`"
59
+ )
60
+ st.stop()
61
+ return model
62
+
63
+ def extract_from_url(name: str):
64
+ "Checks if `name` is a URL, and if so converts it to a model name"
65
+ is_url = False
66
+ try:
67
+ result = urlparse(name)
68
+ is_url = all([result.scheme, result.netloc])
69
+ except Exception:
70
+ is_url = False
71
+ # Pass through if not a URL
72
+ if not is_url:
73
+ return name
74
+ else:
75
+ path = result.path
76
+ return path[1:]
77
+
78
+ def calculate_memory(model: torch.nn.Module, options: list):
79
+ "Calculates the memory usage for a model init on `meta` device"
80
+ total_size, largest_layer = calculate_maximum_sizes(model)
81
+ num_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
82
+ data = []
83
+ for dtype in options:
84
+ dtype_total_size = total_size
85
+ dtype_largest_layer = largest_layer[0]
86
+
87
+ modifier = DTYPE_MODIFIER[dtype]
88
+ dtype_total_size /= modifier
89
+ dtype_largest_layer /= modifier
90
+
91
+ dtype_training_size = dtype_total_size * 4 / (1024**3)
92
+ dtype_inference = dtype_total_size * 1.2 / (1024**3)
93
+ dtype_total_size = dtype_total_size / (1024**3)
94
+ data.append(
95
+ {
96
+ "dtype": dtype,
97
+ "Total Size (GB)": dtype_total_size,
98
+ "Inference (GB)" : dtype_inference,
99
+ "Training using Adam (GB)": dtype_training_size,
100
+ "Parameters (Billion)" : num_parameters / 1e9
101
+ }
102
+ )
103
+ return data