patrickvonplaten commited on
Commit
20283f2
1 Parent(s): 62d55e9
Files changed (1) hide show
  1. app.py +75 -22
app.py CHANGED
@@ -5,14 +5,32 @@ import streamlit as st
5
  import altair as alt
6
  import numpy as np
7
  import datetime
 
 
8
  from transformers.models.auto.configuration_auto import CONFIG_MAPPING_NAMES
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
- from huggingface_hub import Repository
 
 
11
 
12
  today = datetime.date.today()
13
  year, week, _ = today.isocalendar()
14
 
15
- DATASET_REPO_URL = "https://huggingface.co/datasets/patrickvonplaten/model-archs-downloads-space-data"
 
 
16
  DATA_FILENAME = f"data_{week}_{year}.csv"
17
  DATA_FILE = os.path.join("data", DATA_FILENAME)
18
 
@@ -23,24 +41,45 @@ def retrieve_model_stats():
23
  total_downloads = 0
24
 
25
  for model_name in list(CONFIG_MAPPING_NAMES.keys()):
26
- model_stats = {"num_downloads": 0, "%_of_all_downloads": 0, "num_models": 0, "download_per_model": 0}
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  models = hf_api.list_models(filter=model_name)
28
 
29
  model_stats["num_models"] = len(models)
30
- model_stats["num_downloads"] = sum([m.downloads for m in models if hasattr(m, "downloads")])
 
 
31
  if len(models) > 0:
32
- model_stats["download_per_model"] = round(model_stats["num_downloads"] / len(models), 2)
 
 
33
  total_downloads += model_stats["num_downloads"]
34
 
35
  # save in overall dict
36
  all_stats[model_name] = model_stats
37
 
38
  for model_name in list(CONFIG_MAPPING_NAMES.keys()):
39
- all_stats[model_name]["%_of_all_downloads"] = round(all_stats[model_name]["num_downloads"] / total_downloads, 5) * 100 # noqa: E501
 
 
40
  downloads = all_stats[model_name]["num_downloads"]
41
  all_stats[model_name]["num_downloads"] = f"{downloads:,}"
42
 
43
- sorted_results = dict(reversed(sorted(all_stats.items(), key=lambda d: d[1]["%_of_all_downloads"])))
 
 
44
  dataframe = pd.DataFrame.from_dict(sorted_results, orient="index")
45
 
46
  # give header to model names
@@ -64,29 +103,43 @@ if not os.path.isfile(DATA_FILE):
64
  with open(DATA_FILE, "r") as f:
65
  dataframe = pd.read_csv(DATA_FILE)
66
 
67
- int_downloads = np.array([int(x.replace(",", "")) for x in dataframe["num_downloads"].values])
 
 
68
 
69
  st.title(f"Transformers stats for year {year} and week {week}")
70
  # print top 20 downloads
71
- source = pd.DataFrame({
72
- 'Number of total downloads': int_downloads[:20],
73
- 'Model architecture name': dataframe["model_names"].values[:20],
74
- })
75
- bar_chart = alt.Chart(source).mark_bar().encode(
76
- y="Number of total downloads",
77
- x=alt.X("Model architecture name", sort=None),
 
 
 
 
 
 
78
  )
79
  st.title("Top 20 downloads last 30 days")
80
  st.altair_chart(bar_chart, use_container_width=True)
81
 
82
  # print bottom 20 downloads
83
- source = pd.DataFrame({
84
- 'Number of total downloads': int_downloads[-20:],
85
- 'Model architecture name': dataframe["model_names"].values[-20:],
86
- })
87
- bar_chart = alt.Chart(source).mark_bar().encode(
88
- y="Number of total downloads",
89
- x=alt.X("Model architecture name", sort=None),
 
 
 
 
 
 
90
  )
91
  st.title("Bottom 20 downloads last 30 days")
92
  st.altair_chart(bar_chart, use_container_width=True)
 
5
  import altair as alt
6
  import numpy as np
7
  import datetime
8
+ from huggingface_hub import Repository
9
+
10
  from transformers.models.auto.configuration_auto import CONFIG_MAPPING_NAMES
11
+ from transformers.models.auto.modeling_auto import (
12
+ MODEL_FOR_CTC_MAPPING_NAMES,
13
+ MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES,
14
+ MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING_NAMES,
15
+ MODEL_FOR_VISION_2_SEQ_MAPPING_NAMES,
16
+ MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING_NAMES,
17
+ MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING_NAMES,
18
+ MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING_NAMES,
19
+ MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES,
20
+ MODEL_FOR_CAUSAL_IMAGE_MODELING_MAPPING_NAMES,
21
+ MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING_NAMES,
22
+ )
23
 
24
+ audio_models = MODEL_FOR_CTC_MAPPING_NAMES.keys() + MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES.keys() + MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING_NAMES.keys()
25
+
26
+ vision_models = MODEL_FOR_VISION_2_SEQ_MAPPING_NAMES.keys() + MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING_NAMES.keys() + MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING_NAMES.keys() + MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING_NAMES.keys() + MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES.keys() + MODEL_FOR_CAUSAL_IMAGE_MODELING_MAPPING_NAMES.keys() + MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING_NAMES.keys()
27
 
28
  today = datetime.date.today()
29
  year, week, _ = today.isocalendar()
30
 
31
+ DATASET_REPO_URL = (
32
+ "https://huggingface.co/datasets/patrickvonplaten/model-archs-downloads-space-data"
33
+ )
34
  DATA_FILENAME = f"data_{week}_{year}.csv"
35
  DATA_FILE = os.path.join("data", DATA_FILENAME)
36
 
 
41
  total_downloads = 0
42
 
43
  for model_name in list(CONFIG_MAPPING_NAMES.keys()):
44
+ if model_name in audio_models:
45
+ modality = "audio"
46
+ elif model_name in vision_models:
47
+ modality = "vision"
48
+ else:
49
+ modality = "text"
50
+
51
+ model_stats = {
52
+ "num_downloads": 0,
53
+ "%_of_all_downloads": 0,
54
+ "num_models": 0,
55
+ "download_per_model": 0,
56
+ "modality": modality,
57
+ }
58
  models = hf_api.list_models(filter=model_name)
59
 
60
  model_stats["num_models"] = len(models)
61
+ model_stats["num_downloads"] = sum(
62
+ [m.downloads for m in models if hasattr(m, "downloads")]
63
+ )
64
  if len(models) > 0:
65
+ model_stats["download_per_model"] = round(
66
+ model_stats["num_downloads"] / len(models), 2
67
+ )
68
  total_downloads += model_stats["num_downloads"]
69
 
70
  # save in overall dict
71
  all_stats[model_name] = model_stats
72
 
73
  for model_name in list(CONFIG_MAPPING_NAMES.keys()):
74
+ all_stats[model_name]["%_of_all_downloads"] = (
75
+ round(all_stats[model_name]["num_downloads"] / total_downloads, 5) * 100
76
+ ) # noqa: E501
77
  downloads = all_stats[model_name]["num_downloads"]
78
  all_stats[model_name]["num_downloads"] = f"{downloads:,}"
79
 
80
+ sorted_results = dict(
81
+ reversed(sorted(all_stats.items(), key=lambda d: d[1]["%_of_all_downloads"]))
82
+ )
83
  dataframe = pd.DataFrame.from_dict(sorted_results, orient="index")
84
 
85
  # give header to model names
 
103
  with open(DATA_FILE, "r") as f:
104
  dataframe = pd.read_csv(DATA_FILE)
105
 
106
+ int_downloads = np.array(
107
+ [int(x.replace(",", "")) for x in dataframe["num_downloads"].values]
108
+ )
109
 
110
  st.title(f"Transformers stats for year {year} and week {week}")
111
  # print top 20 downloads
112
+ source = pd.DataFrame(
113
+ {
114
+ "Number of total downloads": int_downloads[:20],
115
+ "Model architecture name": dataframe["model_names"].values[:20],
116
+ }
117
+ )
118
+ bar_chart = (
119
+ alt.Chart(source)
120
+ .mark_bar()
121
+ .encode(
122
+ y="Number of total downloads",
123
+ x=alt.X("Model architecture name", sort=None),
124
+ )
125
  )
126
  st.title("Top 20 downloads last 30 days")
127
  st.altair_chart(bar_chart, use_container_width=True)
128
 
129
  # print bottom 20 downloads
130
+ source = pd.DataFrame(
131
+ {
132
+ "Number of total downloads": int_downloads[-20:],
133
+ "Model architecture name": dataframe["model_names"].values[-20:],
134
+ }
135
+ )
136
+ bar_chart = (
137
+ alt.Chart(source)
138
+ .mark_bar()
139
+ .encode(
140
+ y="Number of total downloads",
141
+ x=alt.X("Model architecture name", sort=None),
142
+ )
143
  )
144
  st.title("Bottom 20 downloads last 30 days")
145
  st.altair_chart(bar_chart, use_container_width=True)