albertvillanova HF staff commited on
Commit
d0f55c6
·
verified ·
1 Parent(s): da4a3b1

Load results asynchronously

Browse files
Files changed (2) hide show
  1. src/hub.py +16 -0
  2. src/results.py +11 -11
src/hub.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import httpx
2
+ from huggingface_hub import hf_hub_url
3
+
4
+
5
+ client = httpx.AsyncClient()
6
+
7
+
8
+ async def load_file(path):
9
+ url = to_url(path)
10
+ r = await client.get(url)
11
+ return r.json()
12
+
13
+
14
+ def to_url(path):
15
+ _, org_name, ds_name, filename = path.split("/", 3)
16
+ return hf_hub_url(repo_id=f"{org_name}/{ds_name}", filename=filename, repo_type="dataset")
src/results.py CHANGED
@@ -1,4 +1,4 @@
1
- import json
2
 
3
  import gradio as gr
4
  import numpy as np
@@ -6,6 +6,7 @@ import pandas as pd
6
  from huggingface_hub import HfFileSystem
7
 
8
  from src.constants import RESULTS_DATASET_ID, TASKS
 
9
 
10
 
11
  def fetch_result_paths():
@@ -28,25 +29,24 @@ def update_load_results_component():
28
  return (gr.Button("Load", interactive=True), ) * 2
29
 
30
 
31
- def load_results_dataframe(model_id, result_paths_per_model=None):
32
  if not model_id or not result_paths_per_model:
33
  return
34
  result_paths = result_paths_per_model[model_id]
35
- fs = HfFileSystem()
36
  data = {"results": {}, "configs": {}}
37
- for path in result_paths:
38
- with fs.open(path, "r") as f:
39
- d = json.load(f)
40
- data["results"].update(d["results"])
41
- data["configs"].update(d["configs"])
42
- model_name = d.get("model_name", "Model")
43
  df = pd.json_normalize([data])
44
  # df.columns = df.columns.str.split(".") # .split return a list instead of a tuple
45
  return df.set_index(pd.Index([model_name])).reset_index()
46
 
47
 
48
- def load_results_dataframes(*model_ids, result_paths_per_model=None):
49
- return [load_results_dataframe(model_id, result_paths_per_model=result_paths_per_model) for model_id in model_ids]
 
50
 
51
 
52
  def display_results(task, *dfs):
 
1
+ import asyncio
2
 
3
  import gradio as gr
4
  import numpy as np
 
6
  from huggingface_hub import HfFileSystem
7
 
8
  from src.constants import RESULTS_DATASET_ID, TASKS
9
+ from src.hub import load_file
10
 
11
 
12
  def fetch_result_paths():
 
29
  return (gr.Button("Load", interactive=True), ) * 2
30
 
31
 
32
+ async def load_results_dataframe(model_id, result_paths_per_model=None):
33
  if not model_id or not result_paths_per_model:
34
  return
35
  result_paths = result_paths_per_model[model_id]
36
+ results = await asyncio.gather(*[load_file(path) for path in result_paths])
37
  data = {"results": {}, "configs": {}}
38
+ for result in results:
39
+ data["results"].update(result["results"])
40
+ data["configs"].update(result["configs"])
41
+ model_name = result.get("model_name", "Model")
 
 
42
  df = pd.json_normalize([data])
43
  # df.columns = df.columns.str.split(".") # .split return a list instead of a tuple
44
  return df.set_index(pd.Index([model_name])).reset_index()
45
 
46
 
47
+ async def load_results_dataframes(*model_ids, result_paths_per_model=None):
48
+ result = await asyncio.gather(*[load_results_dataframe(model_id, result_paths_per_model) for model_id in model_ids])
49
+ return result
50
 
51
 
52
  def display_results(task, *dfs):