Spaces:
Running
Running
Commit
·
4384295
1
Parent(s):
10c52b1
add namespace
Browse files- Development/Debug/run-experiments-job-namespace-debug-2025-09-29.md +54 -0
- app.py +22 -9
- tests/debug/test_run_experiments.py +118 -36
- utils/__pycache__/config.cpython-310.pyc +0 -0
- utils/config.py +102 -6
Development/Debug/run-experiments-job-namespace-debug-2025-09-29.md
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Debug Report: Job submission requires explicit Jobs token/namespace
|
| 2 |
+
Date: 2025-09-29
|
| 3 |
+
|
| 4 |
+
## Problem Statement
|
| 5 |
+
Experiment submission returned a 403 Forbidden error: missing `job.write` permission when creating Jobs under namespace `tianhaowang`.
|
| 6 |
+
|
| 7 |
+
## Symptoms
|
| 8 |
+
- UI banner displayed the raw Hugging Face Jobs API error containing the request id and permission failure.
|
| 9 |
+
- No compute job was created; submission stopped immediately after the API call.
|
| 10 |
+
|
| 11 |
+
## Reproduction Steps
|
| 12 |
+
1. Configure the Space with `SERVICE_HF_TOKEN` only (no `HF_JOBS_TOKEN` or namespace override).
|
| 13 |
+
2. Upload a dataset and press "Run Experiments".
|
| 14 |
+
3. Observe the 403 banner referencing missing `job.write` permissions for the default namespace.
|
| 15 |
+
|
| 16 |
+
## Investigation Process
|
| 17 |
+
### Hypotheses
|
| 18 |
+
- The service token lacked the `jobs:write` scope even though it could push datasets.
|
| 19 |
+
- The Jobs API defaulted to the user namespace (`whoami()['name']`) while the token carried permissions only for an organization namespace.
|
| 20 |
+
|
| 21 |
+
### Tests Performed
|
| 22 |
+
- Extended regression coverage (`tests/debug/test_run_experiments.py::test_submit_experiments_uses_configured_job_namespace`) to ensure the app forwards an explicit `job_namespace` when configured.
|
| 23 |
+
- Re-ran the full debug suite (`pytest tests/debug`) after installing pytest; all 11 tests passed locally.
|
| 24 |
+
|
| 25 |
+
### Findings
|
| 26 |
+
- `submit_experiments` always created Jobs in the implicit namespace and reused the dataset service token. This broke deployments where the compute token differs from the dataset token or where Jobs permissions are tied to an organization.
|
| 27 |
+
|
| 28 |
+
## Solution Approach
|
| 29 |
+
### Proposed Fix
|
| 30 |
+
- Enrich `SpaceConfig` with optional job-specific credentials (token from `HF_JOBS_TOKEN`, namespace from `HF_JOBS_NAMESPACE`, or inferred from `JOB_CODE_REPO` / org ownership).
|
| 31 |
+
- Use `SpaceConfig.resolve_job_token` to pick the OAuth token, jobs token, or service token in that order (no new secrets required for the common case).
|
| 32 |
+
- Deduce a namespace from the selected token (prefer organizations where the token has admin/write roles, optionally biasing toward the owner encoded in `JOB_CODE_REPO`) and pass it to `huggingface_hub.run_job` alongside the token so the API call runs under a namespace that actually grants `jobs:write`.
|
| 33 |
+
|
| 34 |
+
### Sample Code
|
| 35 |
+
```python
|
| 36 |
+
job_token = CONFIG.resolve_job_token(user_token=user_token)
|
| 37 |
+
job_namespace = CONFIG.resolve_job_namespace(job_token)
|
| 38 |
+
job = run_job(
|
| 39 |
+
image="pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel",
|
| 40 |
+
command=command,
|
| 41 |
+
flavor="a10g-small",
|
| 42 |
+
timeout=7200,
|
| 43 |
+
env=env,
|
| 44 |
+
token=job_token,
|
| 45 |
+
namespace=job_namespace,
|
| 46 |
+
)
|
| 47 |
+
```
|
| 48 |
+
|
| 49 |
+
## Validation
|
| 50 |
+
- `pytest tests/debug` (11 tests) — all green.
|
| 51 |
+
|
| 52 |
+
## Prevention Recommendations
|
| 53 |
+
- When issuing service tokens, include the Jobs scope or ensure the organization grants `jobs:write`; document the expected namespace (e.g., via `JOB_CODE_REPO`) in deployment runbooks.
|
| 54 |
+
- Add CI coverage that mocks 403 responses and asserts the UI surfaces a descriptive remediation message.
|
app.py
CHANGED
|
@@ -9,6 +9,7 @@ from typing import Any, Dict, Iterable, List, Optional, Tuple
|
|
| 9 |
|
| 10 |
import gradio as gr
|
| 11 |
from huggingface_hub import inspect_job, run_job
|
|
|
|
| 12 |
|
| 13 |
from utils.config import ConfigError, load_space_config
|
| 14 |
from utils.data import DatasetSpec, load_candidate_catalog
|
|
@@ -152,7 +153,7 @@ def submit_experiments(
|
|
| 152 |
raise RuntimeError(f"Configuration error: {CONFIG_ERROR}")
|
| 153 |
assert CONFIG is not None
|
| 154 |
try:
|
| 155 |
-
|
| 156 |
except ConfigError as exc:
|
| 157 |
raise RuntimeError(
|
| 158 |
"`SERVICE_HF_TOKEN` is required to submit experiments. Configure the secret "
|
|
@@ -211,6 +212,9 @@ def submit_experiments(
|
|
| 211 |
jobs: List[Dict[str, Any]] = []
|
| 212 |
env = CONFIG.job_env(user_token=user_token)
|
| 213 |
|
|
|
|
|
|
|
|
|
|
| 214 |
for dk in dk_list:
|
| 215 |
command = [
|
| 216 |
"python",
|
|
@@ -234,14 +238,23 @@ def submit_experiments(
|
|
| 234 |
]
|
| 235 |
if test_repo:
|
| 236 |
command.extend(["--test_dataset", test_repo])
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 245 |
jobs.append(
|
| 246 |
{
|
| 247 |
"id": job.id,
|
|
|
|
| 9 |
|
| 10 |
import gradio as gr
|
| 11 |
from huggingface_hub import inspect_job, run_job
|
| 12 |
+
from huggingface_hub.utils import HfHubHTTPError
|
| 13 |
|
| 14 |
from utils.config import ConfigError, load_space_config
|
| 15 |
from utils.data import DatasetSpec, load_candidate_catalog
|
|
|
|
| 153 |
raise RuntimeError(f"Configuration error: {CONFIG_ERROR}")
|
| 154 |
assert CONFIG is not None
|
| 155 |
try:
|
| 156 |
+
CONFIG.require_service_token()
|
| 157 |
except ConfigError as exc:
|
| 158 |
raise RuntimeError(
|
| 159 |
"`SERVICE_HF_TOKEN` is required to submit experiments. Configure the secret "
|
|
|
|
| 212 |
jobs: List[Dict[str, Any]] = []
|
| 213 |
env = CONFIG.job_env(user_token=user_token)
|
| 214 |
|
| 215 |
+
job_token = CONFIG.resolve_job_token(user_token=user_token)
|
| 216 |
+
job_namespace = CONFIG.resolve_job_namespace(job_token)
|
| 217 |
+
|
| 218 |
for dk in dk_list:
|
| 219 |
command = [
|
| 220 |
"python",
|
|
|
|
| 238 |
]
|
| 239 |
if test_repo:
|
| 240 |
command.extend(["--test_dataset", test_repo])
|
| 241 |
+
try:
|
| 242 |
+
job = run_job(
|
| 243 |
+
image="pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel",
|
| 244 |
+
command=command,
|
| 245 |
+
flavor="a10g-small",
|
| 246 |
+
timeout=7200,
|
| 247 |
+
env=env,
|
| 248 |
+
token=job_token,
|
| 249 |
+
namespace=job_namespace,
|
| 250 |
+
)
|
| 251 |
+
except HfHubHTTPError as exc:
|
| 252 |
+
if getattr(exc.response, "status_code", None) == 403:
|
| 253 |
+
raise RuntimeError(
|
| 254 |
+
"`SERVICE_HF_TOKEN` lacks `jobs:write` or is not allowed to create Jobs in the selected "
|
| 255 |
+
"namespace. Sign in with Hugging Face or configure a token with Jobs permissions."
|
| 256 |
+
) from exc
|
| 257 |
+
raise
|
| 258 |
jobs.append(
|
| 259 |
{
|
| 260 |
"id": job.id,
|
tests/debug/test_run_experiments.py
CHANGED
|
@@ -1,9 +1,53 @@
|
|
| 1 |
from types import SimpleNamespace
|
| 2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
import app
|
| 4 |
from utils import hub
|
| 5 |
|
| 6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
def test_environment_diagnostics_blocks_without_service_token(monkeypatch):
|
| 8 |
monkeypatch.delenv("SERVICE_HF_TOKEN", raising=False)
|
| 9 |
monkeypatch.delenv("OAUTH_CLIENT_ID", raising=False)
|
|
@@ -140,24 +184,6 @@ def test_ensure_results_repo_uses_resolved_owner(monkeypatch):
|
|
| 140 |
|
| 141 |
|
| 142 |
def test_submit_experiments_uses_service_token_when_oauth_missing(monkeypatch):
|
| 143 |
-
class DummyConfig:
|
| 144 |
-
def __init__(self, service_token: str, results_repo: str):
|
| 145 |
-
self.service_token = service_token
|
| 146 |
-
self.results_repo = results_repo
|
| 147 |
-
|
| 148 |
-
def require_service_token(self) -> str:
|
| 149 |
-
return self.service_token
|
| 150 |
-
|
| 151 |
-
def job_env(self, user_token=None): # noqa: ANN001 - mimic production signature
|
| 152 |
-
env = {
|
| 153 |
-
"SERVICE_HF_TOKEN": self.service_token,
|
| 154 |
-
"RESULTS_REPO": self.results_repo,
|
| 155 |
-
}
|
| 156 |
-
token = user_token or self.service_token
|
| 157 |
-
env["HF_TOKEN"] = token
|
| 158 |
-
env["HUGGINGFACEHUB_API_TOKEN"] = token
|
| 159 |
-
return env
|
| 160 |
-
|
| 161 |
config = DummyConfig(service_token="service-token", results_repo="owner/results")
|
| 162 |
monkeypatch.setattr(app, "CONFIG", config)
|
| 163 |
monkeypatch.setattr(app, "CONFIG_ERROR", None)
|
|
@@ -168,6 +194,7 @@ def test_submit_experiments_uses_service_token_when_oauth_missing(monkeypatch):
|
|
| 168 |
|
| 169 |
def fake_run_job(*, image, command, env=None, secrets=None, flavor=None, timeout=None, namespace=None, token=None): # noqa: ANN001
|
| 170 |
captured["token"] = token
|
|
|
|
| 171 |
return SimpleNamespace(id="job-1", status="queued", url="")
|
| 172 |
|
| 173 |
monkeypatch.setattr(app, "run_job", fake_run_job)
|
|
@@ -187,27 +214,10 @@ def test_submit_experiments_uses_service_token_when_oauth_missing(monkeypatch):
|
|
| 187 |
|
| 188 |
assert jobs[0]["id"] == "job-1"
|
| 189 |
assert captured["token"] == "service-token"
|
|
|
|
| 190 |
|
| 191 |
|
| 192 |
def test_submit_experiments_prefers_oauth_token(monkeypatch):
|
| 193 |
-
class DummyConfig:
|
| 194 |
-
def __init__(self, service_token: str, results_repo: str):
|
| 195 |
-
self.service_token = service_token
|
| 196 |
-
self.results_repo = results_repo
|
| 197 |
-
|
| 198 |
-
def require_service_token(self) -> str:
|
| 199 |
-
return self.service_token
|
| 200 |
-
|
| 201 |
-
def job_env(self, user_token=None): # noqa: ANN001 - mimic production signature
|
| 202 |
-
env = {
|
| 203 |
-
"SERVICE_HF_TOKEN": self.service_token,
|
| 204 |
-
"RESULTS_REPO": self.results_repo,
|
| 205 |
-
}
|
| 206 |
-
token = user_token or self.service_token
|
| 207 |
-
env["HF_TOKEN"] = token
|
| 208 |
-
env["HUGGINGFACEHUB_API_TOKEN"] = token
|
| 209 |
-
return env
|
| 210 |
-
|
| 211 |
config = DummyConfig(service_token="service-token", results_repo="owner/results")
|
| 212 |
monkeypatch.setattr(app, "CONFIG", config)
|
| 213 |
monkeypatch.setattr(app, "CONFIG_ERROR", None)
|
|
@@ -218,6 +228,7 @@ def test_submit_experiments_prefers_oauth_token(monkeypatch):
|
|
| 218 |
|
| 219 |
def fake_run_job(*, image, command, env=None, secrets=None, flavor=None, timeout=None, namespace=None, token=None): # noqa: ANN001
|
| 220 |
captured["token"] = token
|
|
|
|
| 221 |
return SimpleNamespace(id="job-1", status="queued", url="")
|
| 222 |
|
| 223 |
monkeypatch.setattr(app, "run_job", fake_run_job)
|
|
@@ -239,3 +250,74 @@ def test_submit_experiments_prefers_oauth_token(monkeypatch):
|
|
| 239 |
)
|
| 240 |
|
| 241 |
assert captured["token"] == "user-token"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from types import SimpleNamespace
|
| 2 |
|
| 3 |
+
import pytest
|
| 4 |
+
import requests
|
| 5 |
+
from huggingface_hub.utils import HfHubHTTPError
|
| 6 |
+
|
| 7 |
import app
|
| 8 |
from utils import hub
|
| 9 |
|
| 10 |
|
| 11 |
+
class DummyConfig:
|
| 12 |
+
def __init__(
|
| 13 |
+
self,
|
| 14 |
+
service_token: str,
|
| 15 |
+
results_repo: str,
|
| 16 |
+
*,
|
| 17 |
+
job_token: str | None = None,
|
| 18 |
+
job_namespace: str | None = None,
|
| 19 |
+
preferred_job_namespace: str | None = None,
|
| 20 |
+
) -> None:
|
| 21 |
+
self.service_token = service_token
|
| 22 |
+
self.results_repo = results_repo
|
| 23 |
+
self.job_token = job_token
|
| 24 |
+
self.job_namespace = job_namespace
|
| 25 |
+
self.preferred_job_namespace = preferred_job_namespace
|
| 26 |
+
|
| 27 |
+
def require_service_token(self) -> str:
|
| 28 |
+
return self.service_token
|
| 29 |
+
|
| 30 |
+
def job_env(self, user_token=None): # noqa: ANN001 - mimic production signature
|
| 31 |
+
env = {
|
| 32 |
+
"SERVICE_HF_TOKEN": self.service_token,
|
| 33 |
+
"RESULTS_REPO": self.results_repo,
|
| 34 |
+
}
|
| 35 |
+
token = user_token or self.job_token or self.service_token
|
| 36 |
+
env["HF_TOKEN"] = token
|
| 37 |
+
env["HUGGINGFACEHUB_API_TOKEN"] = token
|
| 38 |
+
return env
|
| 39 |
+
|
| 40 |
+
def resolve_job_token(self, user_token=None): # noqa: ANN001 - mimic production signature
|
| 41 |
+
if user_token:
|
| 42 |
+
return user_token
|
| 43 |
+
if self.job_token:
|
| 44 |
+
return self.job_token
|
| 45 |
+
return self.service_token
|
| 46 |
+
|
| 47 |
+
def resolve_job_namespace(self, token=None): # noqa: ANN001 - mimic production signature
|
| 48 |
+
return self.job_namespace or self.preferred_job_namespace
|
| 49 |
+
|
| 50 |
+
|
| 51 |
def test_environment_diagnostics_blocks_without_service_token(monkeypatch):
|
| 52 |
monkeypatch.delenv("SERVICE_HF_TOKEN", raising=False)
|
| 53 |
monkeypatch.delenv("OAUTH_CLIENT_ID", raising=False)
|
|
|
|
| 184 |
|
| 185 |
|
| 186 |
def test_submit_experiments_uses_service_token_when_oauth_missing(monkeypatch):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
config = DummyConfig(service_token="service-token", results_repo="owner/results")
|
| 188 |
monkeypatch.setattr(app, "CONFIG", config)
|
| 189 |
monkeypatch.setattr(app, "CONFIG_ERROR", None)
|
|
|
|
| 194 |
|
| 195 |
def fake_run_job(*, image, command, env=None, secrets=None, flavor=None, timeout=None, namespace=None, token=None): # noqa: ANN001
|
| 196 |
captured["token"] = token
|
| 197 |
+
captured["namespace"] = namespace
|
| 198 |
return SimpleNamespace(id="job-1", status="queued", url="")
|
| 199 |
|
| 200 |
monkeypatch.setattr(app, "run_job", fake_run_job)
|
|
|
|
| 214 |
|
| 215 |
assert jobs[0]["id"] == "job-1"
|
| 216 |
assert captured["token"] == "service-token"
|
| 217 |
+
assert captured["namespace"] is None
|
| 218 |
|
| 219 |
|
| 220 |
def test_submit_experiments_prefers_oauth_token(monkeypatch):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 221 |
config = DummyConfig(service_token="service-token", results_repo="owner/results")
|
| 222 |
monkeypatch.setattr(app, "CONFIG", config)
|
| 223 |
monkeypatch.setattr(app, "CONFIG_ERROR", None)
|
|
|
|
| 228 |
|
| 229 |
def fake_run_job(*, image, command, env=None, secrets=None, flavor=None, timeout=None, namespace=None, token=None): # noqa: ANN001
|
| 230 |
captured["token"] = token
|
| 231 |
+
captured["namespace"] = namespace
|
| 232 |
return SimpleNamespace(id="job-1", status="queued", url="")
|
| 233 |
|
| 234 |
monkeypatch.setattr(app, "run_job", fake_run_job)
|
|
|
|
| 250 |
)
|
| 251 |
|
| 252 |
assert captured["token"] == "user-token"
|
| 253 |
+
assert captured["namespace"] is None
|
| 254 |
+
|
| 255 |
+
|
| 256 |
+
def test_submit_experiments_uses_configured_job_namespace(monkeypatch):
|
| 257 |
+
config = DummyConfig(
|
| 258 |
+
service_token="service-token",
|
| 259 |
+
results_repo="owner/results",
|
| 260 |
+
job_token="job-token",
|
| 261 |
+
job_namespace="curation-org",
|
| 262 |
+
)
|
| 263 |
+
monkeypatch.setattr(app, "CONFIG", config)
|
| 264 |
+
monkeypatch.setattr(app, "CONFIG_ERROR", None)
|
| 265 |
+
monkeypatch.setattr(app, "ensure_uploaded_dataset", lambda *_, **__: "user/dataset")
|
| 266 |
+
monkeypatch.setattr(app, "ensure_results_repo", lambda cfg: cfg.results_repo)
|
| 267 |
+
|
| 268 |
+
captured = {}
|
| 269 |
+
|
| 270 |
+
def fake_run_job(*, image, command, env=None, secrets=None, flavor=None, timeout=None, namespace=None, token=None): # noqa: ANN001
|
| 271 |
+
captured["token"] = token
|
| 272 |
+
captured["namespace"] = namespace
|
| 273 |
+
return SimpleNamespace(id="job-1", status="queued", url="")
|
| 274 |
+
|
| 275 |
+
monkeypatch.setattr(app, "run_job", fake_run_job)
|
| 276 |
+
|
| 277 |
+
app.submit_experiments(
|
| 278 |
+
d0_files=[],
|
| 279 |
+
d0_id="user/dataset",
|
| 280 |
+
task="classification",
|
| 281 |
+
model="meta-llama/Llama-3.1-8B-Instruct",
|
| 282 |
+
metrics=["f1"],
|
| 283 |
+
dk_list=["candidate/dataset"],
|
| 284 |
+
sizes=[1000],
|
| 285 |
+
target_size=1000,
|
| 286 |
+
test_files=None,
|
| 287 |
+
test_id="",
|
| 288 |
+
)
|
| 289 |
+
|
| 290 |
+
assert captured["token"] == "job-token"
|
| 291 |
+
assert captured["namespace"] == "curation-org"
|
| 292 |
+
|
| 293 |
+
|
| 294 |
+
def test_submit_experiments_handles_jobs_permission_error(monkeypatch):
|
| 295 |
+
config = DummyConfig(service_token="service-token", results_repo="owner/results")
|
| 296 |
+
monkeypatch.setattr(app, "CONFIG", config)
|
| 297 |
+
monkeypatch.setattr(app, "CONFIG_ERROR", None)
|
| 298 |
+
monkeypatch.setattr(app, "ensure_uploaded_dataset", lambda *_, **__: "user/dataset")
|
| 299 |
+
monkeypatch.setattr(app, "ensure_results_repo", lambda cfg: cfg.results_repo)
|
| 300 |
+
|
| 301 |
+
def fake_run_job(*args, **kwargs): # noqa: ANN001 - signature compatibility
|
| 302 |
+
response = requests.Response()
|
| 303 |
+
response.status_code = 403
|
| 304 |
+
response.headers = {"x-request-id": "req"}
|
| 305 |
+
raise HfHubHTTPError("forbidden", response=response)
|
| 306 |
+
|
| 307 |
+
monkeypatch.setattr(app, "run_job", fake_run_job)
|
| 308 |
+
|
| 309 |
+
with pytest.raises(RuntimeError) as err:
|
| 310 |
+
app.submit_experiments(
|
| 311 |
+
d0_files=[],
|
| 312 |
+
d0_id="user/dataset",
|
| 313 |
+
task="classification",
|
| 314 |
+
model="meta-llama/Llama-3.1-8B-Instruct",
|
| 315 |
+
metrics=["f1"],
|
| 316 |
+
dk_list=["candidate/dataset"],
|
| 317 |
+
sizes=[1000],
|
| 318 |
+
target_size=1000,
|
| 319 |
+
test_files=None,
|
| 320 |
+
test_id="",
|
| 321 |
+
)
|
| 322 |
+
|
| 323 |
+
assert "jobs:write" in str(err.value)
|
utils/__pycache__/config.cpython-310.pyc
CHANGED
|
Binary files a/utils/__pycache__/config.cpython-310.pyc and b/utils/__pycache__/config.cpython-310.pyc differ
|
|
|
utils/config.py
CHANGED
|
@@ -2,10 +2,12 @@
|
|
| 2 |
from __future__ import annotations
|
| 3 |
|
| 4 |
import os
|
| 5 |
-
from dataclasses import dataclass
|
| 6 |
from pathlib import Path
|
| 7 |
from typing import Dict, Optional
|
| 8 |
|
|
|
|
|
|
|
| 9 |
|
| 10 |
class ConfigError(RuntimeError):
|
| 11 |
"""Raised when required configuration is missing."""
|
|
@@ -17,6 +19,11 @@ class SpaceConfig:
|
|
| 17 |
results_repo: Optional[str]
|
| 18 |
persist_dir: Optional[Path]
|
| 19 |
hf_home: Optional[Path]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
@property
|
| 22 |
def persistent_storage_enabled(self) -> bool:
|
|
@@ -32,6 +39,7 @@ class SpaceConfig:
|
|
| 32 |
def job_env(self, user_token: Optional[str] = None) -> Dict[str, str]:
|
| 33 |
env: Dict[str, str] = {}
|
| 34 |
service_token = self.service_token
|
|
|
|
| 35 |
|
| 36 |
if service_token:
|
| 37 |
env["SERVICE_HF_TOKEN"] = service_token
|
|
@@ -42,19 +50,59 @@ class SpaceConfig:
|
|
| 42 |
if self.hf_home:
|
| 43 |
env["HF_HOME"] = str(self.hf_home)
|
| 44 |
|
|
|
|
| 45 |
if user_token:
|
| 46 |
-
|
| 47 |
-
|
|
|
|
| 48 |
elif service_token:
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
else:
|
| 52 |
raise ConfigError(
|
| 53 |
"SERVICE_HF_TOKEN is required when an OAuth token is not provided."
|
| 54 |
)
|
|
|
|
|
|
|
| 55 |
|
| 56 |
return env
|
| 57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
|
| 59 |
def load_space_config(strict: bool = False) -> SpaceConfig:
|
| 60 |
service_token = os.getenv("SERVICE_HF_TOKEN")
|
|
@@ -69,6 +117,16 @@ def load_space_config(strict: bool = False) -> SpaceConfig:
|
|
| 69 |
hf_home_env = _clean_env_value("HF_HOME")
|
| 70 |
hf_home = Path(hf_home_env) if hf_home_env else None
|
| 71 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
if persist_dir and not persist_dir.exists():
|
| 73 |
persist_dir.mkdir(parents=True, exist_ok=True)
|
| 74 |
|
|
@@ -77,9 +135,47 @@ def load_space_config(strict: bool = False) -> SpaceConfig:
|
|
| 77 |
results_repo=results_repo,
|
| 78 |
persist_dir=persist_dir,
|
| 79 |
hf_home=hf_home,
|
|
|
|
|
|
|
|
|
|
| 80 |
)
|
| 81 |
|
| 82 |
|
| 83 |
def _clean_env_value(key: str) -> Optional[str]:
|
| 84 |
raw = os.getenv(key, "").strip()
|
| 85 |
return raw or None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
from __future__ import annotations
|
| 3 |
|
| 4 |
import os
|
| 5 |
+
from dataclasses import dataclass, field
|
| 6 |
from pathlib import Path
|
| 7 |
from typing import Dict, Optional
|
| 8 |
|
| 9 |
+
from huggingface_hub import whoami
|
| 10 |
+
|
| 11 |
|
| 12 |
class ConfigError(RuntimeError):
|
| 13 |
"""Raised when required configuration is missing."""
|
|
|
|
| 19 |
results_repo: Optional[str]
|
| 20 |
persist_dir: Optional[Path]
|
| 21 |
hf_home: Optional[Path]
|
| 22 |
+
job_token: Optional[str]
|
| 23 |
+
job_namespace: Optional[str]
|
| 24 |
+
preferred_job_namespace: Optional[str]
|
| 25 |
+
_cached_job_namespace: Optional[str] = field(default=None, init=False, repr=False)
|
| 26 |
+
_cached_namespace_token: Optional[str] = field(default=None, init=False, repr=False)
|
| 27 |
|
| 28 |
@property
|
| 29 |
def persistent_storage_enabled(self) -> bool:
|
|
|
|
| 39 |
def job_env(self, user_token: Optional[str] = None) -> Dict[str, str]:
|
| 40 |
env: Dict[str, str] = {}
|
| 41 |
service_token = self.service_token
|
| 42 |
+
job_token = self.job_token
|
| 43 |
|
| 44 |
if service_token:
|
| 45 |
env["SERVICE_HF_TOKEN"] = service_token
|
|
|
|
| 50 |
if self.hf_home:
|
| 51 |
env["HF_HOME"] = str(self.hf_home)
|
| 52 |
|
| 53 |
+
shared_token: Optional[str] = None
|
| 54 |
if user_token:
|
| 55 |
+
shared_token = user_token
|
| 56 |
+
elif job_token:
|
| 57 |
+
shared_token = job_token
|
| 58 |
elif service_token:
|
| 59 |
+
shared_token = service_token
|
| 60 |
+
if not shared_token:
|
|
|
|
| 61 |
raise ConfigError(
|
| 62 |
"SERVICE_HF_TOKEN is required when an OAuth token is not provided."
|
| 63 |
)
|
| 64 |
+
env["HF_TOKEN"] = shared_token
|
| 65 |
+
env["HUGGINGFACEHUB_API_TOKEN"] = shared_token
|
| 66 |
|
| 67 |
return env
|
| 68 |
|
| 69 |
+
def resolve_job_token(self, user_token: Optional[str] = None) -> str:
|
| 70 |
+
if user_token:
|
| 71 |
+
return user_token
|
| 72 |
+
if self.job_token:
|
| 73 |
+
return self.job_token
|
| 74 |
+
if self.service_token:
|
| 75 |
+
return self.service_token
|
| 76 |
+
raise ConfigError(
|
| 77 |
+
"A token with `jobs:write` permission is required. Set `HF_JOBS_TOKEN` or `SERVICE_HF_TOKEN`."
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
def resolve_job_namespace(self, token: Optional[str]) -> Optional[str]:
|
| 81 |
+
if self.job_namespace:
|
| 82 |
+
return self.job_namespace
|
| 83 |
+
|
| 84 |
+
if token and token not in {self.job_token, self.service_token}:
|
| 85 |
+
# OAuth token: let the API infer the namespace from the user identity.
|
| 86 |
+
return None
|
| 87 |
+
|
| 88 |
+
effective = token or self.job_token or self.service_token
|
| 89 |
+
if not effective:
|
| 90 |
+
return None
|
| 91 |
+
|
| 92 |
+
if (
|
| 93 |
+
self._cached_namespace_token == effective
|
| 94 |
+
and self._cached_job_namespace is not None
|
| 95 |
+
):
|
| 96 |
+
return self._cached_job_namespace
|
| 97 |
+
|
| 98 |
+
namespace = _infer_namespace_from_token(
|
| 99 |
+
effective,
|
| 100 |
+
preferred=self.preferred_job_namespace,
|
| 101 |
+
)
|
| 102 |
+
self._cached_namespace_token = effective
|
| 103 |
+
self._cached_job_namespace = namespace
|
| 104 |
+
return namespace
|
| 105 |
+
|
| 106 |
|
| 107 |
def load_space_config(strict: bool = False) -> SpaceConfig:
|
| 108 |
service_token = os.getenv("SERVICE_HF_TOKEN")
|
|
|
|
| 117 |
hf_home_env = _clean_env_value("HF_HOME")
|
| 118 |
hf_home = Path(hf_home_env) if hf_home_env else None
|
| 119 |
|
| 120 |
+
job_token = _clean_env_value("HF_JOBS_TOKEN")
|
| 121 |
+
|
| 122 |
+
job_namespace = _clean_env_value("HF_JOBS_NAMESPACE")
|
| 123 |
+
if not job_namespace:
|
| 124 |
+
job_namespace = _clean_env_value("HF_ORG")
|
| 125 |
+
job_preferred_namespace: Optional[str] = None
|
| 126 |
+
job_code_repo = _clean_env_value("JOB_CODE_REPO")
|
| 127 |
+
if job_code_repo and "/" in job_code_repo:
|
| 128 |
+
job_preferred_namespace = job_code_repo.split("/", 1)[0].strip() or None
|
| 129 |
+
|
| 130 |
if persist_dir and not persist_dir.exists():
|
| 131 |
persist_dir.mkdir(parents=True, exist_ok=True)
|
| 132 |
|
|
|
|
| 135 |
results_repo=results_repo,
|
| 136 |
persist_dir=persist_dir,
|
| 137 |
hf_home=hf_home,
|
| 138 |
+
job_token=job_token,
|
| 139 |
+
job_namespace=job_namespace,
|
| 140 |
+
preferred_job_namespace=job_preferred_namespace,
|
| 141 |
)
|
| 142 |
|
| 143 |
|
| 144 |
def _clean_env_value(key: str) -> Optional[str]:
|
| 145 |
raw = os.getenv(key, "").strip()
|
| 146 |
return raw or None
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
def _infer_namespace_from_token(
|
| 150 |
+
token: str,
|
| 151 |
+
*,
|
| 152 |
+
preferred: Optional[str] = None,
|
| 153 |
+
) -> Optional[str]:
|
| 154 |
+
try:
|
| 155 |
+
identity = whoami(token=token)
|
| 156 |
+
except Exception:
|
| 157 |
+
return None
|
| 158 |
+
|
| 159 |
+
orgs = identity.get("orgs") or []
|
| 160 |
+
identity_name = identity.get("name")
|
| 161 |
+
available_names = {
|
| 162 |
+
*(org.get("name") for org in orgs if org.get("name")),
|
| 163 |
+
identity_name,
|
| 164 |
+
}
|
| 165 |
+
available_names.discard(None)
|
| 166 |
+
|
| 167 |
+
if (
|
| 168 |
+
preferred
|
| 169 |
+
and preferred != identity_name
|
| 170 |
+
and preferred in available_names
|
| 171 |
+
):
|
| 172 |
+
return preferred
|
| 173 |
+
|
| 174 |
+
for org in orgs:
|
| 175 |
+
role = (org.get("role") or "").lower()
|
| 176 |
+
if role in {"admin", "write"} or "job" in role:
|
| 177 |
+
name = org.get("name")
|
| 178 |
+
if name:
|
| 179 |
+
return name
|
| 180 |
+
|
| 181 |
+
return identity.get("name")
|