ybelkada commited on
Commit
1a3c9e6
1 Parent(s): bf0b4e7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -101
app.py CHANGED
@@ -1,115 +1,89 @@
1
- # A simple script that loops over all public models and get their libary_name
 
 
 
2
  import gradio as gr
3
  import pandas as pd
4
- import numpy as np
5
-
6
- from collections import Counter
7
-
8
- from huggingface_hub import HfApi
9
- from datasets import load_dataset
10
 
11
  api = HfApi()
12
- list_models = api.list_models()
13
-
14
- def fetch_dataset_and_init():
15
- dataset = load_dataset("librarian-bots/model_cards_with_metadata", split="train")
16
- library_names = dataset["library_name"]
17
-
18
- string_counts = Counter(library_names)
19
- string_counts_series = pd.Series(string_counts)
20
-
21
- # Sort the series in descending order
22
- df = string_counts_series.sort_values(ascending=False).to_frame()
23
- df.columns = ["count"]
24
- df = df.reset_index()
25
- df = df.rename(columns={"index": "library_name"})
26
-
27
- df.replace(to_replace=[None], value="No library_name", inplace=True)
28
- df_log = df.copy()
29
- df_log['count'] = np.log(df_log['count'])
30
-
31
- return df, df_log
32
-
33
- df, df_log = fetch_dataset_and_init()
34
-
35
- def get_current_nb_models():
36
- # We need this hack since `list_models` returns a generator..
37
- total_models = sum(1 for _ in list_models)
38
- diff_models = total_models - df["count"].sum()
39
- return str(diff_models)
40
-
41
- plot_height = 512
42
- plot_width = 1512
43
- select_box = ["all"]
44
- top_k = len(df)
45
-
46
- def bar_plot_fn(display, top_k, select_box):
47
- if display == "simple":
48
- if select_box is not None and ("all" not in select_box or select_box != ["all"]):
49
- current_df = df[df["library_name"].isin(select_box)]
50
- else:
51
- current_df = df[:top_k]
52
-
53
- return gr.BarPlot(
54
- current_df,
55
- x="library_name",
56
- y="count",
57
- tooltip=["library_name", "count"],
58
- height=plot_height,
59
- width=plot_width
60
- )
61
- elif display == "log":
62
- if select_box is not None and ("all" not in select_box or select_box != ["all"]):
63
- current_df = df_log[df_log["library_name"].isin(select_box)]
64
- else:
65
- current_df = df_log[:top_k]
66
-
67
- return gr.BarPlot(
68
- current_df,
69
- x="library_name",
70
- y="count",
71
- tooltip=["library_name", "count"],
72
- height=plot_height,
73
- width=plot_width
74
- )
75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
  with gr.Blocks() as bar_plot:
78
  with gr.Column():
 
 
 
 
 
 
 
 
 
 
 
79
  with gr.Column():
80
  display = gr.Dropdown(
81
- choices=[
82
- "simple",
83
- "log",
84
- ],
85
- value="simple",
86
  label="Type of Bar Plot",
 
87
  )
88
- top_k = gr.Slider(
89
- label="Select top-K most used library_name (This leads to a no-op if you selected something else than 'all' in the columns below)",
90
- value=len(df),
91
- minimum=1,
92
- maximum=len(df),
93
- step=1,
94
- )
95
- with gr.Column():
96
- plot = gr.BarPlot()
97
-
98
- with gr.Row():
99
- fetch_button = gr.Button(value="Fetch current number of models without model cards (takes up to 1min to fetch everything)")
100
- text_box = gr.Textbox(value="", label="Number of models without model cards")
101
-
102
- with gr.Column():
103
- select_box = gr.Dropdown(
104
- ["all"] + df["library_name"].tolist(), value=["all"], multiselect=True, label="Libraries to inspect", info="Select specific libraries to inspect"
105
- )
106
-
107
-
108
- top_k.change(bar_plot_fn, inputs=[display, top_k, select_box], outputs=plot)
109
- display.change(bar_plot_fn, inputs=[display, top_k, select_box], outputs=plot)
110
- select_box.change(bar_plot_fn, inputs=[display, top_k, select_box], outputs=plot)
111
-
112
- fetch_button.click(get_current_nb_models, outputs=[text_box])
113
- bar_plot.load(fn=bar_plot_fn, inputs=[display, top_k], outputs=plot)
114
 
115
  bar_plot.launch()
 
1
+ from datasets import load_dataset
2
+ from huggingface_hub import ModelCard
3
+ from huggingface_hub import HfApi
4
+
5
  import gradio as gr
6
  import pandas as pd
 
 
 
 
 
 
7
 
8
  api = HfApi()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
+ repo_id = "librarian-bots/model_cards_with_metadata"
11
+
12
+ dataset = load_dataset(repo_id, split='train')
13
+ dataset = dataset.filter(lambda x: x['library_name']=='transformers')
14
+
15
+ list_commits = api.list_repo_commits(repo_id, repo_type="dataset")
16
+ commits_date_dict = {commit.created_at.strftime("%m/%d/%Y"):commit.commit_id for commit in list_commits}
17
+ current_date = "latest"
18
+
19
+ def get_data(commit_date="latest"):
20
+ ds_kwargs = {}
21
+ if commit_date != "latest":
22
+ current_date = commit_date
23
+ commit_id = commits_date_dict[commit_date]
24
+ ds_kwargs = {"revision": commit_id}
25
+
26
+ dataset = load_dataset(repo_id, split='train', **ds_kwargs)
27
+ dataset = dataset.filter(lambda x: x['library_name']=='transformers')
28
+
29
+ def pipeline_tag_not_in_card(card):
30
+ try:
31
+ model_card_data = ModelCard(card).data
32
+ if model_card_data.library_name is None:
33
+ return True
34
+ return False
35
+ except AttributeError:
36
+ return False
37
+ except Exception:
38
+ return False
39
+
40
+ ds = dataset.map(lambda x: {"missing_library_name": pipeline_tag_not_in_card(x['card'])}, num_proc=4)
41
+
42
+
43
+ data = pd.DataFrame(
44
+ {
45
+ "name": ["Total Number of transformers Model", "Total number of models with missing 'library_name: transformers' in model card."],
46
+ "count": [len(ds), sum(ds["missing_library_name"])],
47
+ }
48
+ )
49
+ return data
50
+
51
+ def fetch_fn(commit_date="latest"):
52
+ data = get_data(commit_date=commit_date)
53
+ return gr.BarPlot(
54
+ data,
55
+ x="name",
56
+ y="count",
57
+ title="Count of Model cards with the correct library_name tag",
58
+ height=256,
59
+ width=1024,
60
+ tooltip=["name", "count"],
61
+ vertical=False
62
+ )
63
+
64
+ data = get_data()
65
 
66
  with gr.Blocks() as bar_plot:
67
  with gr.Column():
68
+ with gr.Row():
69
+ plot = gr.BarPlot(
70
+ data,
71
+ x="name",
72
+ y="count",
73
+ title=f"Count of Model cards with the correct library_name tag at the date {current_date}",
74
+ height=256,
75
+ width=1024,
76
+ tooltip=["name", "count"],
77
+ vertical=False
78
+ )
79
  with gr.Column():
80
  display = gr.Dropdown(
81
+ choices=list(commits_date_dict.keys()),
82
+ value="latest",
 
 
 
83
  label="Type of Bar Plot",
84
+
85
  )
86
+
87
+ display.change(fetch_fn, inputs=display, outputs=plot)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
  bar_plot.launch()