TTimur commited on
Commit
31055bb
·
1 Parent(s): 9b1e367

code update

Browse files
Files changed (2) hide show
  1. app.py +21 -6
  2. src/envs.py +13 -1
app.py CHANGED
@@ -2,6 +2,7 @@ import gradio as gr
2
  import pandas as pd
3
  from apscheduler.schedulers.background import BackgroundScheduler
4
  from huggingface_hub import snapshot_download
 
5
  import os
6
  os.environ['CURL_CA_BUNDLE'] = ''
7
 
@@ -34,19 +35,33 @@ from src.submission.submit import add_new_eval
34
 
35
  def restart_space():
36
  API.restart_space(repo_id=REPO_ID, token=TOKEN)
 
37
 
38
- try:
39
- print(EVAL_REQUESTS_PATH)
 
 
 
 
 
40
  snapshot_download(
41
- repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
 
 
 
 
 
42
  )
 
 
 
 
 
43
  except Exception:
44
  restart_space()
45
  try:
46
  print(EVAL_RESULTS_PATH)
47
- snapshot_download(
48
- repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
49
- )
50
  except Exception:
51
  restart_space()
52
 
 
2
  import pandas as pd
3
  from apscheduler.schedulers.background import BackgroundScheduler
4
  from huggingface_hub import snapshot_download
5
+ from pathlib import Path
6
  import os
7
  os.environ['CURL_CA_BUNDLE'] = ''
8
 
 
35
 
36
  def restart_space():
37
  API.restart_space(repo_id=REPO_ID, token=TOKEN)
38
+ SNAPSHOT_ALLOW_PATTERNS = ["*.json", "*.md", "*.csv", "*.tsv"]
39
 
40
+
41
+ def ensure_directory(path: str) -> None:
42
+ Path(path).mkdir(parents=True, exist_ok=True)
43
+
44
+
45
+ def download_dataset(repo_id: str, target_dir: str) -> None:
46
+ ensure_directory(target_dir)
47
  snapshot_download(
48
+ repo_id=repo_id,
49
+ local_dir=target_dir,
50
+ repo_type="dataset",
51
+ allow_patterns=SNAPSHOT_ALLOW_PATTERNS,
52
+ tqdm_class=None,
53
+ etag_timeout=30,
54
  )
55
+
56
+
57
+ try:
58
+ print(EVAL_REQUESTS_PATH)
59
+ download_dataset(QUEUE_REPO, EVAL_REQUESTS_PATH)
60
  except Exception:
61
  restart_space()
62
  try:
63
  print(EVAL_RESULTS_PATH)
64
+ download_dataset(RESULTS_REPO, EVAL_RESULTS_PATH)
 
 
65
  except Exception:
66
  restart_space()
67
 
src/envs.py CHANGED
@@ -11,7 +11,19 @@ REPO_ID = os.environ.get("REPO_ID", f"{OWNER}/OpenLLMKyrgyzLeaderboard_v0.1")
11
  QUEUE_REPO = os.environ.get("QUEUE_REPO", f"{OWNER}/requests_kg_v0.1")
12
  RESULTS_REPO = os.environ.get("RESULTS_REPO", f"{OWNER}/results_kg_v0.1")
13
 
14
- CACHE_PATH=os.getenv("HF_HOME", ".")
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  # Local caches
17
  EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
 
11
  QUEUE_REPO = os.environ.get("QUEUE_REPO", f"{OWNER}/requests_kg_v0.1")
12
  RESULTS_REPO = os.environ.get("RESULTS_REPO", f"{OWNER}/results_kg_v0.1")
13
 
14
+ DEFAULT_HF_HOME = "/tmp/hf_home"
15
+ HF_HOME = os.environ.get("HF_HOME", DEFAULT_HF_HOME)
16
+ if "HF_HOME" not in os.environ:
17
+ os.environ["HF_HOME"] = HF_HOME
18
+
19
+ # Ensure cache directories live on ephemeral storage to avoid filling the persistent 50GB space quota
20
+ os.environ.setdefault("HF_HUB_CACHE", os.path.join(HF_HOME, "hub"))
21
+
22
+ # Create the directories eagerly so downstream code can rely on their existence
23
+ os.makedirs(HF_HOME, exist_ok=True)
24
+ os.makedirs(os.environ["HF_HUB_CACHE"], exist_ok=True)
25
+
26
+ CACHE_PATH = HF_HOME
27
 
28
  # Local caches
29
  EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")