davanstrien HF staff commited on
Commit
182c1d0
1 Parent(s): 9fa15d3
Files changed (3) hide show
  1. app.py +72 -0
  2. requirements.in +5 -0
  3. requirements.txt +186 -0
app.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import list_models
2
+ from cachetools import cached, TTLCache
3
+ from toolz import groupby, valmap
4
+ import gradio as gr
5
+ from tqdm.auto import tqdm
6
+ import pandas as pd
7
+
8
+
9
+ @cached(TTLCache(maxsize=10, ttl=60 * 60 * 3))
10
+ def get_all_models():
11
+ models = list(tqdm(iter(list_models(cardData=True))))
12
+ return [model for model in models if model is not None]
13
+
14
+
15
+ def has_base_model_info(model):
16
+ try:
17
+ if card_data := model.cardData:
18
+ if base_model := card_data.get("base_model"):
19
+ if isinstance(base_model, str):
20
+ return True
21
+ except AttributeError:
22
+ return False
23
+ return False
24
+
25
+
26
+ grouped_by_has_base_model_info = groupby(has_base_model_info, get_all_models())
27
+ print(valmap(len, grouped_by_has_base_model_info))
28
+
29
+ summary = f"""{len(grouped_by_has_base_model_info.get(True))} models have base model info.
30
+ {len(grouped_by_has_base_model_info.get(False))} models don't have base model info.
31
+ Currently {round(len(grouped_by_has_base_model_info.get(True))/len(get_all_models())*100,2)}% of models have base model info."""
32
+
33
+ models_with_base_model_info = grouped_by_has_base_model_info.get(True)
34
+ base_models = [
35
+ model.cardData.get("base_model") for model in models_with_base_model_info
36
+ ]
37
+ df = pd.DataFrame(
38
+ pd.DataFrame({"base_model": base_models}).value_counts()
39
+ ).reset_index()
40
+
41
+ grouped_by_base_model = groupby(
42
+ lambda x: x.cardData.get("base_model"), models_with_base_model_info
43
+ )
44
+
45
+ all_base_models = df["base_model"].to_list()
46
+
47
+
48
+ def return_models_for_base_model(base_model):
49
+ models = grouped_by_base_model.get(base_model)
50
+ # sort models by downloads
51
+ models = sorted(models, key=lambda x: x.downloads, reverse=True)
52
+ results = ""
53
+ results += f"## {base_model} children\n\n"
54
+ results += f"{base_model} has {len(models)} children\n\n"
55
+ for model in models:
56
+ url = f"https://huggingface.co/{model.modelId}"
57
+ results += (
58
+ f"[{model.modelId}]({url}) | number of downloads {model.downloads}" + "\n\n"
59
+ )
60
+ return results
61
+
62
+
63
+ with gr.Blocks() as demo:
64
+ gr.Markdown("### Models with base model info")
65
+ gr.Markdown(summary)
66
+ gr.Markdown("### Find all models trained from a base model")
67
+ base_model = gr.Dropdown(all_base_models, label="Base Model")
68
+ results = gr.Markdown()
69
+ base_model.change(return_models_for_base_model, base_model, results)
70
+
71
+ # gr.DataFrame(df)
72
+ demo.launch()
requirements.in ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ cachetools
2
+ gradio
3
+ huggingface_hub
4
+ toolz
5
+ tqdm
requirements.txt ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # This file is autogenerated by pip-compile with Python 3.11
3
+ # by the following command:
4
+ #
5
+ # pip-compile requirements.in
6
+ #
7
+ aiofiles==23.2.1
8
+ # via gradio
9
+ altair==5.1.1
10
+ # via gradio
11
+ annotated-types==0.5.0
12
+ # via pydantic
13
+ anyio==3.7.1
14
+ # via
15
+ # fastapi
16
+ # httpcore
17
+ # starlette
18
+ attrs==23.1.0
19
+ # via
20
+ # jsonschema
21
+ # referencing
22
+ cachetools==5.3.1
23
+ # via -r requirements.in
24
+ certifi==2023.7.22
25
+ # via
26
+ # httpcore
27
+ # httpx
28
+ # requests
29
+ charset-normalizer==3.2.0
30
+ # via requests
31
+ click==8.1.7
32
+ # via uvicorn
33
+ contourpy==1.1.0
34
+ # via matplotlib
35
+ cycler==0.11.0
36
+ # via matplotlib
37
+ fastapi==0.103.1
38
+ # via gradio
39
+ ffmpy==0.3.1
40
+ # via gradio
41
+ filelock==3.12.3
42
+ # via huggingface-hub
43
+ fonttools==4.42.1
44
+ # via matplotlib
45
+ fsspec==2023.9.0
46
+ # via
47
+ # gradio-client
48
+ # huggingface-hub
49
+ gradio==3.42.0
50
+ # via -r requirements.in
51
+ gradio-client==0.5.0
52
+ # via gradio
53
+ h11==0.14.0
54
+ # via
55
+ # httpcore
56
+ # uvicorn
57
+ httpcore==0.17.3
58
+ # via httpx
59
+ httpx==0.24.1
60
+ # via
61
+ # gradio
62
+ # gradio-client
63
+ huggingface-hub==0.16.4
64
+ # via
65
+ # -r requirements.in
66
+ # gradio
67
+ # gradio-client
68
+ idna==3.4
69
+ # via
70
+ # anyio
71
+ # httpx
72
+ # requests
73
+ importlib-resources==6.0.1
74
+ # via gradio
75
+ jinja2==3.1.2
76
+ # via
77
+ # altair
78
+ # gradio
79
+ jsonschema==4.19.0
80
+ # via altair
81
+ jsonschema-specifications==2023.7.1
82
+ # via jsonschema
83
+ kiwisolver==1.4.5
84
+ # via matplotlib
85
+ markupsafe==2.1.3
86
+ # via
87
+ # gradio
88
+ # jinja2
89
+ matplotlib==3.7.2
90
+ # via gradio
91
+ numpy==1.25.2
92
+ # via
93
+ # altair
94
+ # contourpy
95
+ # gradio
96
+ # matplotlib
97
+ # pandas
98
+ orjson==3.9.5
99
+ # via gradio
100
+ packaging==23.1
101
+ # via
102
+ # altair
103
+ # gradio
104
+ # gradio-client
105
+ # huggingface-hub
106
+ # matplotlib
107
+ pandas==2.1.0
108
+ # via
109
+ # altair
110
+ # gradio
111
+ pillow==10.0.0
112
+ # via
113
+ # gradio
114
+ # matplotlib
115
+ pydantic==2.3.0
116
+ # via
117
+ # fastapi
118
+ # gradio
119
+ pydantic-core==2.6.3
120
+ # via pydantic
121
+ pydub==0.25.1
122
+ # via gradio
123
+ pyparsing==3.0.9
124
+ # via matplotlib
125
+ python-dateutil==2.8.2
126
+ # via
127
+ # matplotlib
128
+ # pandas
129
+ python-multipart==0.0.6
130
+ # via gradio
131
+ pytz==2023.3
132
+ # via pandas
133
+ pyyaml==6.0.1
134
+ # via
135
+ # gradio
136
+ # huggingface-hub
137
+ referencing==0.30.2
138
+ # via
139
+ # jsonschema
140
+ # jsonschema-specifications
141
+ requests==2.31.0
142
+ # via
143
+ # gradio
144
+ # gradio-client
145
+ # huggingface-hub
146
+ rpds-py==0.10.2
147
+ # via
148
+ # jsonschema
149
+ # referencing
150
+ semantic-version==2.10.0
151
+ # via gradio
152
+ six==1.16.0
153
+ # via python-dateutil
154
+ sniffio==1.3.0
155
+ # via
156
+ # anyio
157
+ # httpcore
158
+ # httpx
159
+ starlette==0.27.0
160
+ # via fastapi
161
+ toolz==0.12.0
162
+ # via
163
+ # -r requirements.in
164
+ # altair
165
+ tqdm==4.66.1
166
+ # via
167
+ # -r requirements.in
168
+ # huggingface-hub
169
+ typing-extensions==4.7.1
170
+ # via
171
+ # fastapi
172
+ # gradio
173
+ # gradio-client
174
+ # huggingface-hub
175
+ # pydantic
176
+ # pydantic-core
177
+ tzdata==2023.3
178
+ # via pandas
179
+ urllib3==2.0.4
180
+ # via requests
181
+ uvicorn==0.23.2
182
+ # via gradio
183
+ websockets==11.0.3
184
+ # via
185
+ # gradio
186
+ # gradio-client