Spaces:
Sleeping
Sleeping
neindochoh
commited on
Commit
•
38b8acb
1
Parent(s):
174607d
Upload folder using huggingface_hub
Browse files- Dockerfile +20 -0
- README.md +10 -7
- run.py +92 -0
Dockerfile
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.10
|
2 |
+
|
3 |
+
ARG SPOTLIGHT_VERSION=1.5.0rc1
|
4 |
+
|
5 |
+
RUN useradd -m -u 1000 user
|
6 |
+
|
7 |
+
USER user
|
8 |
+
|
9 |
+
ENV HOME=/home/user \
|
10 |
+
PATH=/home/user/.local/bin:$PATH
|
11 |
+
|
12 |
+
WORKDIR $HOME/app
|
13 |
+
|
14 |
+
ENV SPOTLIGHT_VERSION=$SPOTLIGHT_VERSION
|
15 |
+
RUN pip install --no-cache-dir --upgrade pip setuptools wheel
|
16 |
+
RUN pip install --no-cache-dir --upgrade "renumics-spotlight==${SPOTLIGHT_VERSION}"
|
17 |
+
|
18 |
+
COPY --chown=user --chmod=0755 run.py .
|
19 |
+
|
20 |
+
CMD ["./run.py"]
|
README.md
CHANGED
@@ -1,10 +1,13 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: docker
|
|
|
|
|
|
|
|
|
7 |
pinned: false
|
8 |
-
|
9 |
-
|
10 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title:
|
3 |
+
emoji: 🔬
|
4 |
+
colorFrom: indigo
|
5 |
+
colorTo: green
|
6 |
sdk: docker
|
7 |
+
app_port: 7860
|
8 |
+
# models: []
|
9 |
+
# datasets: []
|
10 |
+
# tags: []
|
11 |
pinned: false
|
12 |
+
license: mit
|
13 |
+
---
|
|
run.py
ADDED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
Serve a Hugging Face dataset.
|
4 |
+
"""
|
5 |
+
|
6 |
+
import dataclasses
|
7 |
+
import os
|
8 |
+
from typing import Optional
|
9 |
+
|
10 |
+
import datasets
|
11 |
+
import huggingface_hub
|
12 |
+
from renumics import spotlight
|
13 |
+
|
14 |
+
|
15 |
+
def login() -> None:
|
16 |
+
"""
|
17 |
+
Login to Hugging Face Hub.
|
18 |
+
"""
|
19 |
+
if token := os.environ.get("HF_TOKEN"):
|
20 |
+
huggingface_hub.login(token)
|
21 |
+
|
22 |
+
|
23 |
+
@dataclasses.dataclass
|
24 |
+
class HFSettings:
|
25 |
+
"""
|
26 |
+
Hugging Face settings.
|
27 |
+
"""
|
28 |
+
|
29 |
+
dataset: str
|
30 |
+
subset: Optional[str] = None
|
31 |
+
split: Optional[str] = None
|
32 |
+
revision: Optional[str] = None
|
33 |
+
|
34 |
+
enrichment: Optional[str] = None
|
35 |
+
|
36 |
+
@classmethod
|
37 |
+
def from_environ(cls) -> "HFSettings":
|
38 |
+
"""
|
39 |
+
Parse Hugging Face settings from environment.
|
40 |
+
"""
|
41 |
+
dataset = os.environ.get("HF_DATASET") or None
|
42 |
+
if dataset is None:
|
43 |
+
raise RuntimeError(
|
44 |
+
"Desired Hugging Face dataset must be set as `HF_DATASET` "
|
45 |
+
"environment variable."
|
46 |
+
)
|
47 |
+
return cls(
|
48 |
+
dataset,
|
49 |
+
os.environ.get("HF_SUBSET") or None,
|
50 |
+
os.environ.get("HF_SPLIT") or None,
|
51 |
+
os.environ.get("HF_REVISION") or None,
|
52 |
+
os.environ.get("HF_ENRICHMENT") or None,
|
53 |
+
)
|
54 |
+
|
55 |
+
def __str__(self) -> str:
|
56 |
+
return f"{self.dataset}[subset={self.subset},split={self.split},revision={self.revision}]"
|
57 |
+
|
58 |
+
|
59 |
+
if __name__ == "__main__":
|
60 |
+
"""
|
61 |
+
Load and serve the given Hugging Face dataset.
|
62 |
+
"""
|
63 |
+
login()
|
64 |
+
|
65 |
+
hf_settings = HFSettings.from_environ()
|
66 |
+
print(f"Loading Hugging Face dataset {hf_settings}.")
|
67 |
+
ds = datasets.load_dataset(
|
68 |
+
hf_settings.dataset,
|
69 |
+
hf_settings.subset,
|
70 |
+
split=hf_settings.split,
|
71 |
+
revision=hf_settings.revision,
|
72 |
+
)
|
73 |
+
if hf_settings.enrichment is not None:
|
74 |
+
ds_enrichment = datasets.load_dataset(
|
75 |
+
hf_settings.enrichment,
|
76 |
+
hf_settings.subset,
|
77 |
+
split=hf_settings.split,
|
78 |
+
)
|
79 |
+
if len(ds_enrichment) != len(ds):
|
80 |
+
raise RuntimeError(
|
81 |
+
f"Length of the enrichment dataset ({len(ds_enrichment)}) "
|
82 |
+
f"mismatches length of the original dataset ({len(ds)})"
|
83 |
+
)
|
84 |
+
ds = datasets.concatenate_datasets([ds, ds_enrichment], split=ds.split, axis=1)
|
85 |
+
if not isinstance(ds, datasets.Dataset):
|
86 |
+
raise TypeError(
|
87 |
+
f"Loaded Hugging Face dataset is of type {type(ds)} instead of "
|
88 |
+
"`datasets.Dataset`. Did you forget to specify subset and/or split "
|
89 |
+
"(use environment variables `HF_SUBSET` and `HF_SPLIT` respective)?"
|
90 |
+
)
|
91 |
+
print(f"Serving Hugging Face dataset {hf_settings}.")
|
92 |
+
spotlight.show(ds, host="0.0.0.0", port=7860, wait="forever")
|