Spaces:
Sleeping
Sleeping
| # upload_htmls_and_index.py | |
| import posixpath | |
| from pathlib import Path | |
| import pandas as pd | |
| from urllib.parse import unquote # add at top | |
| from huggingface_hub import HfApi | |
| REPORTS_ROOT = Path("/data/atlask/Model-Preds-Html/AudioSet-Audio").resolve() | |
| DATASET_REPO = "akazemian/audio-html" | |
| # --- replace ONLY your upload block with this (keep the rest of the file) --- | |
| from huggingface_hub import HfApi | |
| api = HfApi() | |
| REPORTS_ROOT = REPORTS_ROOT.resolve() # your existing constant | |
| # Upload per model subfolder, but call upload_large_folder on the PARENT | |
| # (older huggingface_hub versions don't support path_in_repo) | |
| for sub in sorted([p for p in REPORTS_ROOT.iterdir() if p.is_dir()]): | |
| model = sub.name | |
| print(f"[HF] upload_large_folder: {REPORTS_ROOT} (include {model}/**/*.html) -> {DATASET_REPO}") | |
| api.upload_large_folder( | |
| repo_id=DATASET_REPO, | |
| repo_type="dataset", | |
| folder_path=str(REPORTS_ROOT), # parent folder | |
| allow_patterns=[f"{model}/**/*.html"], # only this model's files | |
| ) | |
| print(f"✓ uploaded {model}") | |
| # --- end replacement --- | |
| # (B) Build index.csv from your existing library.csv (no model_name) | |
| library = pd.read_csv("library.csv") | |
| def ensure_cols(df, cols): | |
| for c in cols: | |
| if c not in df.columns: | |
| df[c] = "" | |
| return df | |
| library = ensure_cols( | |
| library, | |
| ["id","filename","path","tags","keywords","notes","uploaded_at","category","dataset"] | |
| ) | |
| def local_to_relpath(local_path: str) -> str: | |
| # Make path relative to REPORTS_ROOT and normalize to POSIX for HF | |
| rel = Path(local_path).resolve().relative_to(REPORTS_ROOT) | |
| return posixpath.join(*rel.parts) | |
| # Only keep rows that actually point to .html files under REPORTS_ROOT | |
| keep = library["path"].astype(str).str.endswith(".html", na=False) & \ | |
| library["path"].astype(str).str.startswith(str(REPORTS_ROOT), na=False) | |
| idx = library[keep].copy() | |
| # Derive relpath inside the HF dataset from the absolute local path | |
| idx["relpath"] = idx["path"].apply(local_to_relpath) | |
| index_cols = ["id","filename","relpath","category","dataset","tags","keywords","notes","uploaded_at"] | |
| index_df = idx[index_cols].copy() | |
| index_df.to_csv("index.csv", index=False) | |
| # (C) Upload index.csv to the dataset repo (small, separate commit) | |
| from huggingface_hub import CommitOperationAdd | |
| api.create_commit( | |
| repo_id=DATASET_REPO, | |
| repo_type="dataset", | |
| operations=[CommitOperationAdd(path_in_repo="index.csv", path_or_fileobj="index.csv")], | |
| commit_message=f"Add/update index.csv ({len(index_df)} rows)" | |
| ) | |
| print("Done: uploaded HTMLs (large-folder) and index.csv") | |