Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Alina Lozovskaia
commited on
Commit
·
87e47c2
1
Parent(s):
9b133aa
Updated app.py download_dataset function
Browse files- app.py +15 -11
- src/populate.py +0 -1
- src/tools/collections.py +1 -1
app.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
import os
|
|
|
2 |
import logging
|
3 |
import gradio as gr
|
4 |
import pandas as pd
|
@@ -56,13 +57,12 @@ enable_space_ci()
|
|
56 |
def restart_space():
|
57 |
API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)
|
58 |
|
59 |
-
|
60 |
-
|
61 |
-
"""Attempt to download dataset with retries."""
|
62 |
attempt = 0
|
63 |
while attempt < max_attempts:
|
64 |
try:
|
65 |
-
|
66 |
snapshot_download(
|
67 |
repo_id=repo_id,
|
68 |
local_dir=local_dir,
|
@@ -71,21 +71,25 @@ def download_dataset(repo_id, local_dir, repo_type="dataset", max_attempts=3):
|
|
71 |
etag_timeout=30,
|
72 |
max_workers=8,
|
73 |
)
|
|
|
74 |
return
|
75 |
except Exception as e:
|
76 |
-
|
|
|
|
|
77 |
attempt += 1
|
78 |
-
|
79 |
-
restart_space()
|
80 |
-
|
81 |
|
82 |
def init_space(full_init: bool = True):
|
83 |
"""Initializes the application space, loading only necessary data."""
|
84 |
if full_init:
|
85 |
# These downloads only occur on full initialization
|
86 |
-
|
87 |
-
|
88 |
-
|
|
|
|
|
|
|
89 |
|
90 |
# Always retrieve the leaderboard DataFrame
|
91 |
raw_data, original_df = get_leaderboard_df(
|
|
|
1 |
import os
|
2 |
+
import time
|
3 |
import logging
|
4 |
import gradio as gr
|
5 |
import pandas as pd
|
|
|
57 |
def restart_space():
|
58 |
API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)
|
59 |
|
60 |
+
def download_dataset(repo_id, local_dir, repo_type="dataset", max_attempts=3, backoff_factor=1.5):
|
61 |
+
"""Download dataset with exponential backoff retries."""
|
|
|
62 |
attempt = 0
|
63 |
while attempt < max_attempts:
|
64 |
try:
|
65 |
+
logging.info(f"Downloading {repo_id} to {local_dir}")
|
66 |
snapshot_download(
|
67 |
repo_id=repo_id,
|
68 |
local_dir=local_dir,
|
|
|
71 |
etag_timeout=30,
|
72 |
max_workers=8,
|
73 |
)
|
74 |
+
logging.info("Download successful")
|
75 |
return
|
76 |
except Exception as e:
|
77 |
+
wait_time = backoff_factor ** attempt
|
78 |
+
logging.error(f"Error downloading {repo_id}: {e}, retrying in {wait_time}s")
|
79 |
+
time.sleep(wait_time)
|
80 |
attempt += 1
|
81 |
+
raise Exception(f"Failed to download {repo_id} after {max_attempts} attempts")
|
|
|
|
|
82 |
|
83 |
def init_space(full_init: bool = True):
|
84 |
"""Initializes the application space, loading only necessary data."""
|
85 |
if full_init:
|
86 |
# These downloads only occur on full initialization
|
87 |
+
try:
|
88 |
+
download_dataset(QUEUE_REPO, EVAL_REQUESTS_PATH)
|
89 |
+
download_dataset(DYNAMIC_INFO_REPO, DYNAMIC_INFO_PATH)
|
90 |
+
download_dataset(RESULTS_REPO, EVAL_RESULTS_PATH)
|
91 |
+
except Exception:
|
92 |
+
restart_space()
|
93 |
|
94 |
# Always retrieve the leaderboard DataFrame
|
95 |
raw_data, original_df = get_leaderboard_df(
|
src/populate.py
CHANGED
@@ -52,4 +52,3 @@ def get_leaderboard_df(results_path, requests_path, dynamic_path, cols, benchmar
|
|
52 |
df = df[cols].round(decimals=2)
|
53 |
df = df[has_no_nan_values(df, benchmark_cols)]
|
54 |
return raw_data, df
|
55 |
-
|
|
|
52 |
df = df[cols].round(decimals=2)
|
53 |
df = df[has_no_nan_values(df, benchmark_cols)]
|
54 |
return raw_data, df
|
|
src/tools/collections.py
CHANGED
@@ -73,4 +73,4 @@ def update_collections(df: DataFrame):
|
|
73 |
try:
|
74 |
delete_collection_item(collection_slug=PATH_TO_COLLECTION, item_object_id=item_id, token=H4_TOKEN)
|
75 |
except HfHubHTTPError:
|
76 |
-
continue
|
|
|
73 |
try:
|
74 |
delete_collection_item(collection_slug=PATH_TO_COLLECTION, item_object_id=item_id, token=H4_TOKEN)
|
75 |
except HfHubHTTPError:
|
76 |
+
continue
|