patrickvonplaten
commited on
Commit
•
20283f2
1
Parent(s):
62d55e9
up
Browse files
app.py
CHANGED
@@ -5,14 +5,32 @@ import streamlit as st
|
|
5 |
import altair as alt
|
6 |
import numpy as np
|
7 |
import datetime
|
|
|
|
|
8 |
from transformers.models.auto.configuration_auto import CONFIG_MAPPING_NAMES
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
-
|
|
|
|
|
11 |
|
12 |
today = datetime.date.today()
|
13 |
year, week, _ = today.isocalendar()
|
14 |
|
15 |
-
DATASET_REPO_URL =
|
|
|
|
|
16 |
DATA_FILENAME = f"data_{week}_{year}.csv"
|
17 |
DATA_FILE = os.path.join("data", DATA_FILENAME)
|
18 |
|
@@ -23,24 +41,45 @@ def retrieve_model_stats():
|
|
23 |
total_downloads = 0
|
24 |
|
25 |
for model_name in list(CONFIG_MAPPING_NAMES.keys()):
|
26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
models = hf_api.list_models(filter=model_name)
|
28 |
|
29 |
model_stats["num_models"] = len(models)
|
30 |
-
model_stats["num_downloads"] = sum(
|
|
|
|
|
31 |
if len(models) > 0:
|
32 |
-
model_stats["download_per_model"] = round(
|
|
|
|
|
33 |
total_downloads += model_stats["num_downloads"]
|
34 |
|
35 |
# save in overall dict
|
36 |
all_stats[model_name] = model_stats
|
37 |
|
38 |
for model_name in list(CONFIG_MAPPING_NAMES.keys()):
|
39 |
-
all_stats[model_name]["%_of_all_downloads"] =
|
|
|
|
|
40 |
downloads = all_stats[model_name]["num_downloads"]
|
41 |
all_stats[model_name]["num_downloads"] = f"{downloads:,}"
|
42 |
|
43 |
-
sorted_results = dict(
|
|
|
|
|
44 |
dataframe = pd.DataFrame.from_dict(sorted_results, orient="index")
|
45 |
|
46 |
# give header to model names
|
@@ -64,29 +103,43 @@ if not os.path.isfile(DATA_FILE):
|
|
64 |
with open(DATA_FILE, "r") as f:
|
65 |
dataframe = pd.read_csv(DATA_FILE)
|
66 |
|
67 |
-
int_downloads = np.array(
|
|
|
|
|
68 |
|
69 |
st.title(f"Transformers stats for year {year} and week {week}")
|
70 |
# print top 20 downloads
|
71 |
-
source = pd.DataFrame(
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
)
|
79 |
st.title("Top 20 downloads last 30 days")
|
80 |
st.altair_chart(bar_chart, use_container_width=True)
|
81 |
|
82 |
# print bottom 20 downloads
|
83 |
-
source = pd.DataFrame(
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
)
|
91 |
st.title("Bottom 20 downloads last 30 days")
|
92 |
st.altair_chart(bar_chart, use_container_width=True)
|
|
|
5 |
import altair as alt
|
6 |
import numpy as np
|
7 |
import datetime
|
8 |
+
from huggingface_hub import Repository
|
9 |
+
|
10 |
from transformers.models.auto.configuration_auto import CONFIG_MAPPING_NAMES
|
11 |
+
from transformers.models.auto.modeling_auto import (
|
12 |
+
MODEL_FOR_CTC_MAPPING_NAMES,
|
13 |
+
MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES,
|
14 |
+
MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING_NAMES,
|
15 |
+
MODEL_FOR_VISION_2_SEQ_MAPPING_NAMES,
|
16 |
+
MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING_NAMES,
|
17 |
+
MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING_NAMES,
|
18 |
+
MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING_NAMES,
|
19 |
+
MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES,
|
20 |
+
MODEL_FOR_CAUSAL_IMAGE_MODELING_MAPPING_NAMES,
|
21 |
+
MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING_NAMES,
|
22 |
+
)
|
23 |
|
24 |
+
audio_models = MODEL_FOR_CTC_MAPPING_NAMES.keys() + MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES.keys() + MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING_NAMES.keys()
|
25 |
+
|
26 |
+
vision_models = MODEL_FOR_VISION_2_SEQ_MAPPING_NAMES.keys() + MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING_NAMES.keys() + MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING_NAMES.keys() + MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING_NAMES.keys() + MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES.keys() + MODEL_FOR_CAUSAL_IMAGE_MODELING_MAPPING_NAMES.keys() + MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING_NAMES.keys()
|
27 |
|
28 |
today = datetime.date.today()
|
29 |
year, week, _ = today.isocalendar()
|
30 |
|
31 |
+
DATASET_REPO_URL = (
|
32 |
+
"https://huggingface.co/datasets/patrickvonplaten/model-archs-downloads-space-data"
|
33 |
+
)
|
34 |
DATA_FILENAME = f"data_{week}_{year}.csv"
|
35 |
DATA_FILE = os.path.join("data", DATA_FILENAME)
|
36 |
|
|
|
41 |
total_downloads = 0
|
42 |
|
43 |
for model_name in list(CONFIG_MAPPING_NAMES.keys()):
|
44 |
+
if model_name in audio_models:
|
45 |
+
modality = "audio"
|
46 |
+
elif model_name in vision_models:
|
47 |
+
modality = "vision"
|
48 |
+
else:
|
49 |
+
modality = "text"
|
50 |
+
|
51 |
+
model_stats = {
|
52 |
+
"num_downloads": 0,
|
53 |
+
"%_of_all_downloads": 0,
|
54 |
+
"num_models": 0,
|
55 |
+
"download_per_model": 0,
|
56 |
+
"modality": modality,
|
57 |
+
}
|
58 |
models = hf_api.list_models(filter=model_name)
|
59 |
|
60 |
model_stats["num_models"] = len(models)
|
61 |
+
model_stats["num_downloads"] = sum(
|
62 |
+
[m.downloads for m in models if hasattr(m, "downloads")]
|
63 |
+
)
|
64 |
if len(models) > 0:
|
65 |
+
model_stats["download_per_model"] = round(
|
66 |
+
model_stats["num_downloads"] / len(models), 2
|
67 |
+
)
|
68 |
total_downloads += model_stats["num_downloads"]
|
69 |
|
70 |
# save in overall dict
|
71 |
all_stats[model_name] = model_stats
|
72 |
|
73 |
for model_name in list(CONFIG_MAPPING_NAMES.keys()):
|
74 |
+
all_stats[model_name]["%_of_all_downloads"] = (
|
75 |
+
round(all_stats[model_name]["num_downloads"] / total_downloads, 5) * 100
|
76 |
+
) # noqa: E501
|
77 |
downloads = all_stats[model_name]["num_downloads"]
|
78 |
all_stats[model_name]["num_downloads"] = f"{downloads:,}"
|
79 |
|
80 |
+
sorted_results = dict(
|
81 |
+
reversed(sorted(all_stats.items(), key=lambda d: d[1]["%_of_all_downloads"]))
|
82 |
+
)
|
83 |
dataframe = pd.DataFrame.from_dict(sorted_results, orient="index")
|
84 |
|
85 |
# give header to model names
|
|
|
103 |
with open(DATA_FILE, "r") as f:
|
104 |
dataframe = pd.read_csv(DATA_FILE)
|
105 |
|
106 |
+
int_downloads = np.array(
|
107 |
+
[int(x.replace(",", "")) for x in dataframe["num_downloads"].values]
|
108 |
+
)
|
109 |
|
110 |
st.title(f"Transformers stats for year {year} and week {week}")
|
111 |
# print top 20 downloads
|
112 |
+
source = pd.DataFrame(
|
113 |
+
{
|
114 |
+
"Number of total downloads": int_downloads[:20],
|
115 |
+
"Model architecture name": dataframe["model_names"].values[:20],
|
116 |
+
}
|
117 |
+
)
|
118 |
+
bar_chart = (
|
119 |
+
alt.Chart(source)
|
120 |
+
.mark_bar()
|
121 |
+
.encode(
|
122 |
+
y="Number of total downloads",
|
123 |
+
x=alt.X("Model architecture name", sort=None),
|
124 |
+
)
|
125 |
)
|
126 |
st.title("Top 20 downloads last 30 days")
|
127 |
st.altair_chart(bar_chart, use_container_width=True)
|
128 |
|
129 |
# print bottom 20 downloads
|
130 |
+
source = pd.DataFrame(
|
131 |
+
{
|
132 |
+
"Number of total downloads": int_downloads[-20:],
|
133 |
+
"Model architecture name": dataframe["model_names"].values[-20:],
|
134 |
+
}
|
135 |
+
)
|
136 |
+
bar_chart = (
|
137 |
+
alt.Chart(source)
|
138 |
+
.mark_bar()
|
139 |
+
.encode(
|
140 |
+
y="Number of total downloads",
|
141 |
+
x=alt.X("Model architecture name", sort=None),
|
142 |
+
)
|
143 |
)
|
144 |
st.title("Bottom 20 downloads last 30 days")
|
145 |
st.altair_chart(bar_chart, use_container_width=True)
|