Spaces:
Running
Running
import json | |
import os | |
import re | |
from collections import defaultdict | |
from datetime import datetime, timedelta, timezone | |
import huggingface_hub | |
from huggingface_hub import ModelCard | |
from huggingface_hub.hf_api import ModelInfo | |
from transformers import AutoConfig | |
from transformers.models.auto.tokenization_auto import AutoTokenizer | |
def already_submitted_models(requested_models_dir: str) -> set[str]: | |
"""Gather a list of already submitted models to avoid duplicates""" | |
depth = 1 | |
file_names = [] | |
users_to_submission_dates = defaultdict(list) | |
for root, _, files in os.walk(requested_models_dir): | |
current_depth = root.count(os.sep) - requested_models_dir.count(os.sep) | |
if current_depth == depth: | |
for file in files: | |
if not file.endswith(".json"): | |
continue | |
with open(os.path.join(root, file), "r") as f: | |
info = json.load(f) | |
file_names.append(f"{info['model']}_{info['revision']}") | |
# Select organisation | |
if info["model"].count("/") == 0 or "submitted_time" not in info: | |
continue | |
organisation, _ = info["model"].split("/") | |
users_to_submission_dates[organisation].append(info["submitted_time"]) | |
return set(file_names), users_to_submission_dates | |