ybelkada commited on
Commit
af674e3
·
verified ·
1 Parent(s): 28ca6f7

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -0
app.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # A simple script that loops over all public models and get their libary_name
2
+ import gradio as gr
3
+ import pandas as pd
4
+ import numpy as np
5
+
6
+ from collections import Counter
7
+
8
+ from huggingface_hub import HfApi
9
+ from datasets import load_dataset
10
+
11
+ api = HfApi()
12
+ list_models = api.list_models()
13
+
14
+ def fetch_dataset_and_init():
15
+ dataset = load_dataset("librarian-bots/model_cards_with_metadata", split="train")
16
+ library_names = dataset["library_name"]
17
+
18
+ string_counts = Counter(library_names)
19
+ string_counts_series = pd.Series(string_counts)
20
+
21
+ # Sort the series in descending order
22
+ df = string_counts_series.sort_values(ascending=False).to_frame()
23
+ df.columns = ["count"]
24
+ df = df.reset_index()
25
+ df_log = df.copy()
26
+ df_log['count'] = np.log(df_log['count'])
27
+
28
+ return df, df_log
29
+
30
+ df, df_log = fetch_dataset_and_init()
31
+
32
+ def get_current_nb_models():
33
+ # We need this hack since `list_models` returns a generator..
34
+ total_models = sum(1 for _ in list_models)
35
+ diff_models = total_models - len(df)
36
+ return str(diff_models)
37
+
38
+ plot_height = 512
39
+ plot_width = 1512
40
+ top_k = len(df)
41
+
42
+ def bar_plot_fn(display, top_k):
43
+ if display == "simple":
44
+ return gr.BarPlot(
45
+ df[:top_k],
46
+ x="index",
47
+ y="count",
48
+ tooltip=["index", "count"],
49
+ height=plot_height,
50
+ width=plot_width
51
+ )
52
+ elif display == "log":
53
+ return gr.BarPlot(
54
+ df_log[:top_k],
55
+ x="index",
56
+ y="count",
57
+ tooltip=["index", "count"],
58
+ height=plot_height,
59
+ width=plot_width
60
+ )
61
+
62
+
63
+ with gr.Blocks() as bar_plot:
64
+ with gr.Column():
65
+ with gr.Column():
66
+ display = gr.Dropdown(
67
+ choices=[
68
+ "simple",
69
+ "log",
70
+ ],
71
+ value="simple",
72
+ label="Type of Bar Plot",
73
+ )
74
+ top_k = gr.Slider(
75
+ label="Select top-K most used library_name",
76
+ value=len(df),
77
+ minimum=1,
78
+ maximum=len(df),
79
+ step=1,
80
+ )
81
+ with gr.Column():
82
+ plot = gr.BarPlot()
83
+
84
+ with gr.Row():
85
+ fetch_button = gr.Button(value="Fetch number of models without model cards (takes up to 1min to fetch everything)")
86
+ text_box = gr.Textbox(value="", label="Number of models without model cards")
87
+
88
+ top_k.change(bar_plot_fn, inputs=[display, top_k], outputs=plot)
89
+ display.change(bar_plot_fn, inputs=[display, top_k], outputs=plot)
90
+ fetch_button.click(get_current_nb_models, outputs=[text_box])
91
+ bar_plot.load(fn=bar_plot_fn, inputs=[display, top_k], outputs=plot)
92
+
93
+ bar_plot.launch()