File size: 7,436 Bytes
4cb150c 0acccaf 75abe88 0acccaf 75abe88 20283f2 4cb150c 20283f2 00744e7 20283f2 00744e7 3c56e06 20283f2 568085d 00744e7 20283f2 00744e7 ad76580 75abe88 20283f2 db24106 20283f2 00744e7 75abe88 99e7a02 75abe88 0acccaf 20283f2 d616e01 75abe88 20283f2 75abe88 00744e7 20283f2 00744e7 75abe88 0acccaf 20283f2 75abe88 20283f2 0acccaf 4cb150c 0acccaf 4cb150c 99e7a02 4cb150c 75abe88 99e7a02 75abe88 4cb150c 75abe88 4cb150c 75abe88 4cb150c 75abe88 0acccaf 20283f2 0acccaf 99e7a02 0acccaf 20283f2 0acccaf 62d55e9 0acccaf 20283f2 0acccaf 62d55e9 0acccaf 99e7a02 f4adb38 99e7a02 f4adb38 99e7a02 f4adb38 99e7a02 f4adb38 99e7a02 f4adb38 99e7a02 f4adb38 0acccaf 62d55e9 4cb150c f4adb38 99e7a02 00744e7 99e7a02 00744e7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 |
from huggingface_hub import HfApi
import pandas as pd
import os
import streamlit as st
import altair as alt
import numpy as np
import datetime
from huggingface_hub import Repository
from transformers.models.auto.configuration_auto import CONFIG_MAPPING_NAMES
from transformers.models.auto.modeling_auto import (
MODEL_FOR_CTC_MAPPING_NAMES,
MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES,
MODEL_FOR_AUDIO_FRAME_CLASSIFICATION_MAPPING_NAMES,
MODEL_FOR_AUDIO_XVECTOR_MAPPING_NAMES,
MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING_NAMES,
MODEL_FOR_VISION_2_SEQ_MAPPING_NAMES,
MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING_NAMES,
MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING_NAMES,
MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES,
MODEL_FOR_CAUSAL_IMAGE_MODELING_MAPPING_NAMES,
MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING_NAMES,
MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING_NAMES,
MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING_NAMES,
MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES,
MODEL_FOR_BACKBONE_MAPPING_NAMES,
MODEL_FOR_ZERO_SHOT_IMAGE_CLASSIFICATION_MAPPING_NAMES,
)
audio_models = list(MODEL_FOR_CTC_MAPPING_NAMES.keys()) + list(MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES.keys()) + \
list(MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING_NAMES.keys()) + list(MODEL_FOR_AUDIO_FRAME_CLASSIFICATION_MAPPING_NAMES.keys()) + \
list(MODEL_FOR_AUDIO_XVECTOR_MAPPING_NAMES.keys())
vision_models = list(MODEL_FOR_VISION_2_SEQ_MAPPING_NAMES.keys()) + list(MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING_NAMES.keys()) + \
list(MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING_NAMES.keys()) + list(MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES.keys()) + \
list(MODEL_FOR_CAUSAL_IMAGE_MODELING_MAPPING_NAMES.keys()) + list(MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING_NAMES.keys()) + \
list(MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING_NAMES.keys()) + list(MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING_NAMES.keys()) + \
list(MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES.keys()) + list(MODEL_FOR_BACKBONE_MAPPING_NAMES.keys()) + \
list(MODEL_FOR_ZERO_SHOT_IMAGE_CLASSIFICATION_MAPPING_NAMES.keys())
today = datetime.date.today()
year, week, _ = today.isocalendar()
DATASET_REPO_URL = (
"https://huggingface.co/datasets/huggingface/transformers-stats-space-data"
)
DATA_FILENAME = f"data_{week}_{year}.csv"
DATA_FILE = os.path.join("data", DATA_FILENAME)
HF_TOKEN = os.environ.get("HF_TOKEN")
print("is none?", HF_TOKEN is None)
def retrieve_model_stats():
hf_api = HfApi()
all_stats = {}
total_downloads = 0
for model_name in list(CONFIG_MAPPING_NAMES.keys()):
if model_name in audio_models:
modality = "audio"
elif model_name in vision_models:
modality = "vision"
else:
modality = "text"
model_stats = {
"num_downloads": 0,
"%_of_all_downloads": 0,
"num_models": 0,
"download_per_model": 0,
"modality": modality,
}
models = list(hf_api.list_models(filter=model_name))
model_stats["num_models"] = len(models)
model_stats["num_downloads"] = sum(
[m.downloads for m in models if hasattr(m, "downloads")]
)
if len(models) > 0:
model_stats["download_per_model"] = int(
model_stats["num_downloads"] / len(models)
)
else:
model_stats["download_per_model"] = model_stats["num_downloads"]
total_downloads += model_stats["num_downloads"]
# save in overall dict
all_stats[model_name] = model_stats
for model_name in list(CONFIG_MAPPING_NAMES.keys()):
all_stats[model_name]["%_of_all_downloads"] = (
round(all_stats[model_name]["num_downloads"] / total_downloads, 5) * 100
) # noqa: E501
downloads = all_stats[model_name]["num_downloads"]
all_stats[model_name]["num_downloads"] = f"{downloads:,}"
sorted_results = dict(
reversed(sorted(all_stats.items(), key=lambda d: d[1]["%_of_all_downloads"]))
)
dataframe = pd.DataFrame.from_dict(sorted_results, orient="index")
# give header to model names
result = "model_names" + dataframe.to_csv()
return result
repo = Repository(local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN)
if not os.path.isfile(DATA_FILE):
st.title("You are the first this week!!! Please wait until the new data is generated and written")
result = retrieve_model_stats()
if not os.path.isfile(DATA_FILE):
with open(DATA_FILE, "w") as f:
f.write(result)
commit_url = repo.push_to_hub()
print(commit_url)
with open(DATA_FILE, "r") as f:
dataframe = pd.read_csv(DATA_FILE)
int_downloads = np.array(
[int(x.replace(",", "")) for x in dataframe["num_downloads"].values]
)
st.title(f"Stats for year {year} and week {week}")
# print top 20 downloads
source = pd.DataFrame(
{
"Number of total downloads": int_downloads[:20],
"Model architecture name": dataframe["model_names"].values[:20],
}
)
bar_chart = (
alt.Chart(source)
.mark_bar()
.encode(
y="Number of total downloads",
x=alt.X("Model architecture name", sort=None),
)
)
st.title("Top 20 downloads last 30 days")
st.altair_chart(bar_chart, use_container_width=True)
# print bottom 20 downloads
source = pd.DataFrame(
{
"Number of total downloads": int_downloads[-20:],
"Model architecture name": dataframe["model_names"].values[-20:],
}
)
bar_chart = (
alt.Chart(source)
.mark_bar()
.encode(
y="Number of total downloads",
x=alt.X("Model architecture name", sort=None),
)
)
st.title("Bottom 20 downloads last 30 days")
st.altair_chart(bar_chart, use_container_width=True)
# print vision
df_vision = dataframe[dataframe["modality"] == "vision"]
vision_int_downloads = np.array(
[int(x.replace(",", "")) for x in df_vision["num_downloads"].values]
)
source = pd.DataFrame(
{
"Number of total downloads": vision_int_downloads,
"Model architecture name": df_vision["model_names"].values,
}
)
bar_chart = (
alt.Chart(source)
.mark_bar()
.encode(
y="Number of total downloads",
x=alt.X("Model architecture name", sort=None),
)
)
st.title("Vision downloads last 30 days")
st.altair_chart(bar_chart, use_container_width=True)
# print audio
df_audio = dataframe[dataframe["modality"] == "audio"]
audio_int_downloads = np.array(
[int(x.replace(",", "")) for x in df_audio["num_downloads"].values]
)
source = pd.DataFrame(
{
"Number of total downloads": audio_int_downloads,
"Model architecture name": df_audio["model_names"].values,
}
)
bar_chart = (
alt.Chart(source)
.mark_bar()
.encode(
y="Number of total downloads",
x=alt.X("Model architecture name", sort=None),
)
)
st.title("Audio downloads last 30 days")
st.altair_chart(bar_chart, use_container_width=True)
# print all stats
st.title("All stats last 30 days")
st.table(dataframe)
st.title("Vision stats last 30 days")
st.table(dataframe[dataframe["modality"] == "vision"].drop("modality", axis=1))
st.title("Audio stats last 30 days")
st.table(dataframe[dataframe["modality"] == "audio"].drop("modality", axis=1))
|