code update
Browse files- app.py +21 -6
- src/envs.py +13 -1
app.py
CHANGED
|
@@ -2,6 +2,7 @@ import gradio as gr
|
|
| 2 |
import pandas as pd
|
| 3 |
from apscheduler.schedulers.background import BackgroundScheduler
|
| 4 |
from huggingface_hub import snapshot_download
|
|
|
|
| 5 |
import os
|
| 6 |
os.environ['CURL_CA_BUNDLE'] = ''
|
| 7 |
|
|
@@ -34,19 +35,33 @@ from src.submission.submit import add_new_eval
|
|
| 34 |
|
| 35 |
def restart_space():
|
| 36 |
API.restart_space(repo_id=REPO_ID, token=TOKEN)
|
|
|
|
| 37 |
|
| 38 |
-
|
| 39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
snapshot_download(
|
| 41 |
-
repo_id=
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
except Exception:
|
| 44 |
restart_space()
|
| 45 |
try:
|
| 46 |
print(EVAL_RESULTS_PATH)
|
| 47 |
-
|
| 48 |
-
repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
|
| 49 |
-
)
|
| 50 |
except Exception:
|
| 51 |
restart_space()
|
| 52 |
|
|
|
|
| 2 |
import pandas as pd
|
| 3 |
from apscheduler.schedulers.background import BackgroundScheduler
|
| 4 |
from huggingface_hub import snapshot_download
|
| 5 |
+
from pathlib import Path
|
| 6 |
import os
|
| 7 |
os.environ['CURL_CA_BUNDLE'] = ''
|
| 8 |
|
|
|
|
| 35 |
|
| 36 |
def restart_space():
|
| 37 |
API.restart_space(repo_id=REPO_ID, token=TOKEN)
|
| 38 |
+
SNAPSHOT_ALLOW_PATTERNS = ["*.json", "*.md", "*.csv", "*.tsv"]
|
| 39 |
|
| 40 |
+
|
| 41 |
+
def ensure_directory(path: str) -> None:
|
| 42 |
+
Path(path).mkdir(parents=True, exist_ok=True)
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def download_dataset(repo_id: str, target_dir: str) -> None:
|
| 46 |
+
ensure_directory(target_dir)
|
| 47 |
snapshot_download(
|
| 48 |
+
repo_id=repo_id,
|
| 49 |
+
local_dir=target_dir,
|
| 50 |
+
repo_type="dataset",
|
| 51 |
+
allow_patterns=SNAPSHOT_ALLOW_PATTERNS,
|
| 52 |
+
tqdm_class=None,
|
| 53 |
+
etag_timeout=30,
|
| 54 |
)
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
try:
|
| 58 |
+
print(EVAL_REQUESTS_PATH)
|
| 59 |
+
download_dataset(QUEUE_REPO, EVAL_REQUESTS_PATH)
|
| 60 |
except Exception:
|
| 61 |
restart_space()
|
| 62 |
try:
|
| 63 |
print(EVAL_RESULTS_PATH)
|
| 64 |
+
download_dataset(RESULTS_REPO, EVAL_RESULTS_PATH)
|
|
|
|
|
|
|
| 65 |
except Exception:
|
| 66 |
restart_space()
|
| 67 |
|
src/envs.py
CHANGED
|
@@ -11,7 +11,19 @@ REPO_ID = os.environ.get("REPO_ID", f"{OWNER}/OpenLLMKyrgyzLeaderboard_v0.1")
|
|
| 11 |
QUEUE_REPO = os.environ.get("QUEUE_REPO", f"{OWNER}/requests_kg_v0.1")
|
| 12 |
RESULTS_REPO = os.environ.get("RESULTS_REPO", f"{OWNER}/results_kg_v0.1")
|
| 13 |
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
# Local caches
|
| 17 |
EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
|
|
|
|
| 11 |
QUEUE_REPO = os.environ.get("QUEUE_REPO", f"{OWNER}/requests_kg_v0.1")
|
| 12 |
RESULTS_REPO = os.environ.get("RESULTS_REPO", f"{OWNER}/results_kg_v0.1")
|
| 13 |
|
| 14 |
+
DEFAULT_HF_HOME = "/tmp/hf_home"
|
| 15 |
+
HF_HOME = os.environ.get("HF_HOME", DEFAULT_HF_HOME)
|
| 16 |
+
if "HF_HOME" not in os.environ:
|
| 17 |
+
os.environ["HF_HOME"] = HF_HOME
|
| 18 |
+
|
| 19 |
+
# Ensure cache directories live on ephemeral storage to avoid filling the persistent 50GB space quota
|
| 20 |
+
os.environ.setdefault("HF_HUB_CACHE", os.path.join(HF_HOME, "hub"))
|
| 21 |
+
|
| 22 |
+
# Create the directories eagerly so downstream code can rely on their existence
|
| 23 |
+
os.makedirs(HF_HOME, exist_ok=True)
|
| 24 |
+
os.makedirs(os.environ["HF_HUB_CACHE"], exist_ok=True)
|
| 25 |
+
|
| 26 |
+
CACHE_PATH = HF_HOME
|
| 27 |
|
| 28 |
# Local caches
|
| 29 |
EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
|