Spaces:
Runtime error
Runtime error
Upload folder using huggingface_hub
Browse files- README.md +2 -8
- __init__.py +0 -0
- __pycache__/hub_model_stats_utils.cpython-311.pyc +0 -0
- __pycache__/hub_utils.cpython-310.pyc +0 -0
- __pycache__/hub_utils.cpython-311.pyc +0 -0
- __pycache__/model_utils.cpython-310.pyc +0 -0
- __pycache__/model_utils.cpython-311.pyc +0 -0
- app.py +86 -0
- config_json_utils.py +62 -0
- hub_model_stats_utils.py +28 -0
- hub_utils.py +44 -0
- model_utils.py +101 -0
README.md
CHANGED
@@ -1,12 +1,6 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
|
4 |
-
colorFrom: green
|
5 |
-
colorTo: blue
|
6 |
sdk: gradio
|
7 |
sdk_version: 4.16.0
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
---
|
11 |
-
|
12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: Model_Memory_and_Popularity
|
3 |
+
app_file: app.py
|
|
|
|
|
4 |
sdk: gradio
|
5 |
sdk_version: 4.16.0
|
|
|
|
|
6 |
---
|
|
|
|
__init__.py
ADDED
File without changes
|
__pycache__/hub_model_stats_utils.cpython-311.pyc
ADDED
Binary file (1.53 kB). View file
|
|
__pycache__/hub_utils.cpython-310.pyc
ADDED
Binary file (2.5 kB). View file
|
|
__pycache__/hub_utils.cpython-311.pyc
ADDED
Binary file (3.54 kB). View file
|
|
__pycache__/model_utils.cpython-310.pyc
ADDED
Binary file (3.07 kB). View file
|
|
__pycache__/model_utils.cpython-311.pyc
ADDED
Binary file (4.83 kB). View file
|
|
app.py
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import pandas as pd
|
3 |
+
from hub_utils import check_for_discussion, report_results
|
4 |
+
from model_utils import calculate_memory, get_model
|
5 |
+
from huggingface_hub.utils import HfHubHTTPError
|
6 |
+
from hub_model_stats_utils import get_model_type_downloads
|
7 |
+
|
8 |
+
# We need to store them as globals because gradio doesn't have a way for us to pass them into the button
|
9 |
+
MODEL = None
|
10 |
+
TASK_INP = None
|
11 |
+
|
12 |
+
def get_mem_results(model_name: str, library: str, options: list, access_token: str):
|
13 |
+
global MODEL
|
14 |
+
MODEL = get_model(model_name, library, access_token)
|
15 |
+
try:
|
16 |
+
has_discussion = check_for_discussion(model_name)
|
17 |
+
except HfHubHTTPError:
|
18 |
+
has_discussion = True
|
19 |
+
title = f"## Memory usage for '{model_name}'"
|
20 |
+
data = calculate_memory(MODEL, options)
|
21 |
+
return [title, gr.update(visible=True, value=pd.DataFrame(data)), gr.update(visible=not has_discussion)]
|
22 |
+
|
23 |
+
|
24 |
+
with gr.Blocks() as demo:
|
25 |
+
gr.Markdown("""<h1>Positron Model Universe Explorer</h1>""")
|
26 |
+
with gr.Tab("Model Size"):
|
27 |
+
gr.Markdown(
|
28 |
+
"""<h1>Model Memory Calculator</h1>
|
29 |
+
|
30 |
+
This tool will help you calculate how much vRAM is needed to train and perform big model inference
|
31 |
+
on a model hosted on the 🤗 Hugging Face Hub. The minimum recommended vRAM needed for a model
|
32 |
+
s denoted as the size of the "largest layer", and training of a model is roughly 4x its size (for Adam)."""
|
33 |
+
)
|
34 |
+
|
35 |
+
out_text = gr.Markdown()
|
36 |
+
mem_out = gr.DataFrame(
|
37 |
+
headers=["dtype", "Largest Layer", "Total Size", "Training using Adam"],
|
38 |
+
interactive=False,
|
39 |
+
visible=True,
|
40 |
+
)
|
41 |
+
with gr.Row():
|
42 |
+
model_name_inp = gr.Textbox(label="Model Name or URL", value="TheBloke/Nous-Hermes-13B-GPTQ")
|
43 |
+
with gr.Row():
|
44 |
+
library = gr.Radio(["auto", "transformers", "timm"], label="Library", value="auto")
|
45 |
+
options = gr.CheckboxGroup(
|
46 |
+
["float32", "float16/bfloat16", "int8", "int4"],
|
47 |
+
value="float32",
|
48 |
+
label="Model Precision",
|
49 |
+
)
|
50 |
+
access_token = gr.Textbox(label="API Token", placeholder="Optional (for gated models)")
|
51 |
+
with gr.Row():
|
52 |
+
mem_btn = gr.Button("Calculate Memory Usage")
|
53 |
+
post_to_hub = gr.Button(
|
54 |
+
value="Report results in this model repo's discussions!\n(Will open in a new tab)", visible=False
|
55 |
+
)
|
56 |
+
|
57 |
+
mem_btn.click(
|
58 |
+
get_mem_results,
|
59 |
+
inputs=[model_name_inp, library, options, access_token],
|
60 |
+
outputs=[out_text, mem_out, post_to_hub],
|
61 |
+
)
|
62 |
+
|
63 |
+
with gr.Tab("Model Type"):
|
64 |
+
gr.Markdown(
|
65 |
+
"""<h1>Models by Model Task</h1>"""
|
66 |
+
)
|
67 |
+
|
68 |
+
with gr.Row():
|
69 |
+
task_inp = gr.Dropdown(choices = ["text-generation", "question-answering", "text-classification", "unconditional-image-generation"],
|
70 |
+
value="text-generation", interactive=True, filterable=True, label="Model Task", show_label=True
|
71 |
+
)
|
72 |
+
stat_inp = gr.Radio(choices = ["downloads", "likes"], value = "downloads", label = "Sorting By", show_label=True, interactive = True)
|
73 |
+
|
74 |
+
with gr.Row():
|
75 |
+
stats_btn = gr.Button("Pull Stats from HF API")
|
76 |
+
|
77 |
+
stats_btn.click(
|
78 |
+
get_model_type_downloads,
|
79 |
+
inputs=[task_inp],
|
80 |
+
outputs=[stats_out]
|
81 |
+
)
|
82 |
+
|
83 |
+
stats_out = gr.Dataframe(row_count=10)
|
84 |
+
# gr.Dataframe(get_model_type_likes(), row_count=10)
|
85 |
+
|
86 |
+
demo.launch(share=True)
|
config_json_utils.py
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pathlib import Path
|
2 |
+
import json
|
3 |
+
from collections import Counter
|
4 |
+
import operator
|
5 |
+
|
6 |
+
import numpy as np
|
7 |
+
import matplotlib.pyplot as plt
|
8 |
+
from string import ascii_lowercase as alc
|
9 |
+
from string import ascii_uppercase as auc
|
10 |
+
|
11 |
+
# assign directory
|
12 |
+
directory = 'HFFiles'
|
13 |
+
arch_list = []
|
14 |
+
invalid_counter = 0
|
15 |
+
|
16 |
+
# iterate over files in directory
|
17 |
+
files = Path(directory).glob('*.json')
|
18 |
+
for file in files:
|
19 |
+
f = open(file)
|
20 |
+
try:
|
21 |
+
data = json.load(f)
|
22 |
+
except:
|
23 |
+
# print(str(file) + " does not look like valid JSON.")
|
24 |
+
invalid_counter += 1
|
25 |
+
continue
|
26 |
+
try:
|
27 |
+
for i in data['architectures']:
|
28 |
+
# print(i)
|
29 |
+
arch_list.append(i[0])
|
30 |
+
except:
|
31 |
+
# print(str(file) + " does not contain architectures key.")
|
32 |
+
invalid_counter += 1
|
33 |
+
|
34 |
+
f.close()
|
35 |
+
|
36 |
+
my_dict = Counter(arch_list)
|
37 |
+
# print(my_dict)
|
38 |
+
print("\n")
|
39 |
+
print(str(invalid_counter) + " JSON files are invalid.")
|
40 |
+
|
41 |
+
for i in alc:
|
42 |
+
del my_dict[i]
|
43 |
+
for i in auc:
|
44 |
+
del my_dict[i]
|
45 |
+
|
46 |
+
causal_dict = {k:v for (k,v) in my_dict.items() if "Causal" in k}
|
47 |
+
|
48 |
+
sorted_causal_dict = dict(sorted(causal_dict.items(),key=operator.itemgetter(1),reverse=True))
|
49 |
+
sorted_causal_dict = {key:val for key, val in sorted_causal_dict.items() if val != 1}
|
50 |
+
|
51 |
+
print(sorted_causal_dict)
|
52 |
+
|
53 |
+
labels, values = zip(*sorted_causal_dict.items())
|
54 |
+
|
55 |
+
indexes = np.arange(len(labels))
|
56 |
+
width = 1
|
57 |
+
|
58 |
+
plt.figure(figsize=(16,9))
|
59 |
+
plt.bar(indexes, values, width)
|
60 |
+
plt.xticks(indexes + width * 0.5, labels)
|
61 |
+
plt.xticks(rotation=90)
|
62 |
+
plt.show()
|
hub_model_stats_utils.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
from huggingface_hub import HfApi, ModelFilter
|
3 |
+
|
4 |
+
def call_hub_api(model_task: str):
|
5 |
+
api = HfApi()
|
6 |
+
models = api.list_models(
|
7 |
+
filter=ModelFilter(
|
8 |
+
task=model_task
|
9 |
+
)
|
10 |
+
)
|
11 |
+
return list(models)
|
12 |
+
|
13 |
+
|
14 |
+
def get_model_type_downloads(model_task: str):
|
15 |
+
models = call_hub_api(model_task)
|
16 |
+
|
17 |
+
dict_list = []
|
18 |
+
for cur_model in models:
|
19 |
+
this_dict = {"id":cur_model.id, "downloads":cur_model.downloads, "likes":cur_model.likes, "author": cur_model.author, "created": cur_model.created_at, "modified": cur_model.last_modified}
|
20 |
+
dict_list.append(this_dict)
|
21 |
+
|
22 |
+
df = pd.DataFrame(dict_list)
|
23 |
+
return df.sort_values("downloads", ascending=False).head(50)
|
24 |
+
|
25 |
+
# df_likes_desc = df.sort_values("likes", ascending=False)
|
26 |
+
# df_likes_desc.head(10)
|
27 |
+
|
28 |
+
# df_likes_desc['id'].str.split('/', n=1, expand=True)
|
hub_utils.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Utilities related to searching and posting on the Hub
|
2 |
+
import os
|
3 |
+
import webbrowser
|
4 |
+
|
5 |
+
import pandas as pd
|
6 |
+
from huggingface_hub import HfApi
|
7 |
+
from model_utils import calculate_memory, extract_from_url, get_model
|
8 |
+
|
9 |
+
|
10 |
+
def check_for_discussion(model_name: str):
|
11 |
+
"Checks if an automated discussion has been opened on the model by `model-sizer-bot`"
|
12 |
+
api = HfApi(token=os.environ.get("HUGGINGFACE_API_LOGIN", None))
|
13 |
+
model_name = extract_from_url(model_name)
|
14 |
+
discussions = list(api.get_repo_discussions(model_name))
|
15 |
+
return any(
|
16 |
+
discussion.title == "[AUTOMATED] Model Memory Requirements" and discussion.author == "model-sizer-bot"
|
17 |
+
for discussion in discussions
|
18 |
+
)
|
19 |
+
|
20 |
+
|
21 |
+
def report_results(model_name, library, access_token):
|
22 |
+
"Reports the results of a memory calculation to the model's discussion page, and opens a new tab to it afterwards"
|
23 |
+
model = get_model(model_name, library, access_token)
|
24 |
+
data = calculate_memory(model, ["float32", "float16/bfloat16", "int8", "int4"])
|
25 |
+
df = pd.DataFrame(data).to_markdown(index=False)
|
26 |
+
|
27 |
+
post = f"""# Model Memory Requirements\n
|
28 |
+
|
29 |
+
You will need about {data[1]} VRAM to load this model for inference, and {data[3]} VRAM to train it using Adam.
|
30 |
+
|
31 |
+
These calculations were measured from the [Model Memory Utility Space](https://hf.co/spaces/hf-accelerate/model-memory-utility) on the Hub.
|
32 |
+
|
33 |
+
The minimum recommended vRAM needed for this model assumes using [Accelerate or `device_map="auto"`](https://huggingface.co/docs/accelerate/usage_guides/big_modeling) and is denoted by the size of the "largest layer".
|
34 |
+
When performing inference, expect to add up to an additional 20% to this, as found by [EleutherAI](https://blog.eleuther.ai/transformer-math/). More tests will be performed in the future to get a more accurate benchmark for each model.
|
35 |
+
|
36 |
+
When training with `Adam`, you can expect roughly 4x the reported results to be used. (1x for the model, 1x for the gradients, and 2x for the optimizer).
|
37 |
+
|
38 |
+
## Results:
|
39 |
+
|
40 |
+
{df}
|
41 |
+
"""
|
42 |
+
api = HfApi(token=os.environ.get("HUGGINGFACE_API_LOGIN", None))
|
43 |
+
discussion = api.create_discussion(model_name, "[AUTOMATED] Model Memory Requirements", description=post)
|
44 |
+
webbrowser.open_new_tab(discussion.url)
|
model_utils.py
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Utilities related to loading in and working with models/specific models
|
2 |
+
from urllib.parse import urlparse
|
3 |
+
|
4 |
+
import gradio as gr
|
5 |
+
import torch
|
6 |
+
from accelerate.commands.estimate import check_has_model, create_empty_model
|
7 |
+
from accelerate.utils import calculate_maximum_sizes, convert_bytes
|
8 |
+
from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError
|
9 |
+
|
10 |
+
|
11 |
+
DTYPE_MODIFIER = {"float32": 1, "float16/bfloat16": 2, "int8": 4, "int4": 8}
|
12 |
+
|
13 |
+
|
14 |
+
def extract_from_url(name: str):
|
15 |
+
"Checks if `name` is a URL, and if so converts it to a model name"
|
16 |
+
is_url = False
|
17 |
+
try:
|
18 |
+
result = urlparse(name)
|
19 |
+
is_url = all([result.scheme, result.netloc])
|
20 |
+
except Exception:
|
21 |
+
is_url = False
|
22 |
+
# Pass through if not a URL
|
23 |
+
if not is_url:
|
24 |
+
return name
|
25 |
+
else:
|
26 |
+
path = result.path
|
27 |
+
return path[1:]
|
28 |
+
|
29 |
+
|
30 |
+
def translate_llama2(text):
|
31 |
+
"Translates llama-2 to its hf counterpart"
|
32 |
+
if not text.endswith("-hf"):
|
33 |
+
return text + "-hf"
|
34 |
+
return text
|
35 |
+
|
36 |
+
|
37 |
+
def get_model(model_name: str, library: str, access_token: str):
|
38 |
+
"Finds and grabs model from the Hub, and initializes on `meta`"
|
39 |
+
if "meta-llama" in model_name:
|
40 |
+
model_name = translate_llama2(model_name)
|
41 |
+
if library == "auto":
|
42 |
+
library = None
|
43 |
+
model_name = extract_from_url(model_name)
|
44 |
+
try:
|
45 |
+
model = create_empty_model(model_name, library_name=library, trust_remote_code=True, access_token=access_token)
|
46 |
+
except GatedRepoError:
|
47 |
+
raise gr.Error(
|
48 |
+
f"Model `{model_name}` is a gated model, please ensure to pass in your access token and try again if you have access. You can find your access token here : https://huggingface.co/settings/tokens. "
|
49 |
+
)
|
50 |
+
except RepositoryNotFoundError:
|
51 |
+
raise gr.Error(f"Model `{model_name}` was not found on the Hub, please try another model name.")
|
52 |
+
except ValueError:
|
53 |
+
raise gr.Error(
|
54 |
+
f"Model `{model_name}` does not have any library metadata on the Hub, please manually select a library_name to use (such as `transformers`)"
|
55 |
+
)
|
56 |
+
except (RuntimeError, OSError) as e:
|
57 |
+
library = check_has_model(e)
|
58 |
+
if library != "unknown":
|
59 |
+
raise gr.Error(
|
60 |
+
f"Tried to load `{model_name}` with `{library}` but a possible model to load was not found inside the repo."
|
61 |
+
)
|
62 |
+
raise gr.Error(
|
63 |
+
f"Model `{model_name}` had an error, please open a discussion on the model's page with the error message and name: `{e}`"
|
64 |
+
)
|
65 |
+
except ImportError:
|
66 |
+
# hacky way to check if it works with `trust_remote_code=False`
|
67 |
+
model = create_empty_model(
|
68 |
+
model_name, library_name=library, trust_remote_code=False, access_token=access_token
|
69 |
+
)
|
70 |
+
except Exception as e:
|
71 |
+
raise gr.Error(
|
72 |
+
f"Model `{model_name}` had an error, please open a discussion on the model's page with the error message and name: `{e}`"
|
73 |
+
)
|
74 |
+
return model
|
75 |
+
|
76 |
+
|
77 |
+
def calculate_memory(model: torch.nn.Module, options: list):
|
78 |
+
"Calculates the memory usage for a model init on `meta` device"
|
79 |
+
total_size, largest_layer = calculate_maximum_sizes(model)
|
80 |
+
|
81 |
+
data = []
|
82 |
+
for dtype in options:
|
83 |
+
dtype_total_size = total_size
|
84 |
+
dtype_largest_layer = largest_layer[0]
|
85 |
+
|
86 |
+
modifier = DTYPE_MODIFIER[dtype]
|
87 |
+
dtype_total_size /= modifier
|
88 |
+
dtype_largest_layer /= modifier
|
89 |
+
|
90 |
+
dtype_training_size = convert_bytes(dtype_total_size * 4)
|
91 |
+
dtype_total_size = convert_bytes(dtype_total_size)
|
92 |
+
dtype_largest_layer = convert_bytes(dtype_largest_layer)
|
93 |
+
data.append(
|
94 |
+
{
|
95 |
+
"dtype": dtype,
|
96 |
+
"Largest Layer or Residual Group": dtype_largest_layer,
|
97 |
+
"Total Size": dtype_total_size,
|
98 |
+
"Training using Adam": dtype_training_size,
|
99 |
+
}
|
100 |
+
)
|
101 |
+
return data
|