Muennighoff commited on
Commit
46022eb
1 Parent(s): 4025917

Support sharded safetensors size (#63)

Browse files

- Add tqdm for external models (13b0b9b4d03857bc4a801f0f13f303a8fdc4e8e6)
- Add "model.safetensors.index.json" support (57d567f4a5fe6734a43b23fe5d40f1e6fe099f5c)

Files changed (2) hide show
  1. app.py +15 -1
  2. requirements.txt +2 -1
app.py CHANGED
@@ -6,6 +6,7 @@ import gradio as gr
6
  from huggingface_hub import get_hf_file_metadata, HfApi, hf_hub_download, hf_hub_url
7
  from huggingface_hub.repocard import metadata_load
8
  import pandas as pd
 
9
 
10
  TASKS = [
11
  "BitextMining",
@@ -786,7 +787,9 @@ def add_task(examples):
786
  examples["mteb_task"] = "Unknown"
787
  return examples
788
 
789
- for model in EXTERNAL_MODELS:
 
 
790
  ds = load_dataset("mteb/results", model)
791
  # For local debugging:
792
  #, download_mode='force_redownload', verification_mode="no_checks")
@@ -834,6 +837,17 @@ def get_dim_seq_size(model):
834
  url = hf_hub_url(model.modelId, filename="model.safetensors")
835
  meta = get_hf_file_metadata(url)
836
  size = round(meta.size / 1e9, 2)
 
 
 
 
 
 
 
 
 
 
 
837
  return dim, seq, size
838
 
839
  def make_datasets_clickable(df):
6
  from huggingface_hub import get_hf_file_metadata, HfApi, hf_hub_download, hf_hub_url
7
  from huggingface_hub.repocard import metadata_load
8
  import pandas as pd
9
+ from tqdm.autonotebook import tqdm
10
 
11
  TASKS = [
12
  "BitextMining",
787
  examples["mteb_task"] = "Unknown"
788
  return examples
789
 
790
+ pbar = tqdm(EXTERNAL_MODELS, desc="Fetching external model results")
791
+ for model in pbar:
792
+ pbar.set_description(f"Fetching external model results for {model!r}")
793
  ds = load_dataset("mteb/results", model)
794
  # For local debugging:
795
  #, download_mode='force_redownload', verification_mode="no_checks")
837
  url = hf_hub_url(model.modelId, filename="model.safetensors")
838
  meta = get_hf_file_metadata(url)
839
  size = round(meta.size / 1e9, 2)
840
+ elif "model.safetensors.index.json" in filenames:
841
+ index_path = hf_hub_download(model.modelId, filename="model.safetensors.index.json")
842
+ """
843
+ {
844
+ "metadata": {
845
+ "total_size": 14483464192
846
+ },....
847
+ """
848
+ size = json.load(open(index_path))
849
+ if ("metadata" in size) and ("total_size" in size["metadata"]):
850
+ size = round(size["metadata"]["total_size"] / 1e9, 2)
851
  return dim, seq, size
852
 
853
  def make_datasets_clickable(df):
requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
  gradio
2
  datasets
3
  pandas
4
- huggingface_hub
 
1
  gradio
2
  datasets
3
  pandas
4
+ huggingface_hub
5
+ tqdm