Tristan Thrush commited on
Commit
f0d92dc
1 Parent(s): 3e8882d

huge speedup in data getter

Browse files
Files changed (1) hide show
  1. app.py +13 -26
app.py CHANGED
@@ -1,5 +1,4 @@
1
  import pandas as pd
2
- from tqdm.auto import tqdm
3
  import streamlit as st
4
  from huggingface_hub import HfApi, hf_hub_download
5
  from huggingface_hub.repocard import metadata_load
@@ -10,23 +9,10 @@ from os.path import exists
10
  import threading
11
 
12
 
13
- def get_model_ids(author=None):
14
  api = HfApi()
15
- if author is None:
16
- models = api.list_models(filter="model-index")
17
- else:
18
- models = api.list_models(filter="model-index", author="autoevaluate")
19
- model_ids = [x.modelId for x in models]
20
- return model_ids
21
-
22
-
23
- def get_metadata(model_id):
24
- try:
25
- readme_path = hf_hub_download(model_id, filename="README.md")
26
- return metadata_load(readme_path)
27
- except Exception:
28
- # 404 README.md not found or problem loading it
29
- return None
30
 
31
 
32
  def parse_metric_value(value):
@@ -83,27 +69,28 @@ def parse_metrics_rows(meta, only_verified=False):
83
  continue
84
  yield row
85
 
86
- @st.cache(ttl=3600)
87
  def get_data_wrapper():
88
 
89
- def get_data():
90
  data = []
91
  verified_data = []
92
- model_ids = get_model_ids()
93
- model_ids_from_autoeval = set(get_model_ids(author="autoevaluate"))
94
- for model_id in tqdm(model_ids):
95
- meta = get_metadata(model_id)
 
96
  if meta is None:
97
  continue
98
  for row in parse_metrics_rows(meta):
99
  if row is None:
100
  continue
101
- row["model_id"] = model_id
102
  data.append(row)
103
  for row in parse_metrics_rows(meta, only_verified=True):
104
  if row is None:
105
  continue
106
- row["model_id"] = model_id
107
  verified_data.append(row)
108
  dataframe = pd.DataFrame.from_records(data)
109
  dataframe.to_pickle("cache.pkl")
@@ -136,7 +123,7 @@ only_verified_results = st.sidebar.checkbox(
136
  "Filter for Verified Results",
137
  )
138
 
139
- selectable_datasets = sorted(list(set(dataframe.dataset.tolist() + verified_dataframe.dataset.tolist())), key=lambda name: name.lower())
140
 
141
  if only_verified_results:
142
  dataframe = verified_dataframe
1
  import pandas as pd
 
2
  import streamlit as st
3
  from huggingface_hub import HfApi, hf_hub_download
4
  from huggingface_hub.repocard import metadata_load
9
  import threading
10
 
11
 
12
+ def get_model_infos():
13
  api = HfApi()
14
+ model_infos = api.list_models(filter="model-index", cardData=True)
15
+ return model_infos
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
 
18
  def parse_metric_value(value):
69
  continue
70
  yield row
71
 
72
+ @st.cache(ttl=10)
73
  def get_data_wrapper():
74
 
75
+ def get_data(dataframe=None, verified_dataframe=None):
76
  data = []
77
  verified_data = []
78
+ print("getting model infos")
79
+ model_infos = get_model_infos()
80
+ print("got model infos")
81
+ for model_info in model_infos:
82
+ meta = model_info.cardData
83
  if meta is None:
84
  continue
85
  for row in parse_metrics_rows(meta):
86
  if row is None:
87
  continue
88
+ row["model_id"] = model_info.id
89
  data.append(row)
90
  for row in parse_metrics_rows(meta, only_verified=True):
91
  if row is None:
92
  continue
93
+ row["model_id"] = model_info.id
94
  verified_data.append(row)
95
  dataframe = pd.DataFrame.from_records(data)
96
  dataframe.to_pickle("cache.pkl")
123
  "Filter for Verified Results",
124
  )
125
 
126
+ selectable_datasets = sorted(list(set(dataframe.dataset.tolist())), key=lambda name: name.lower())
127
 
128
  if only_verified_results:
129
  dataframe = verified_dataframe