Spaces:

renumics
/

spotlight-vikp-textbook_quality_programming

Runtime error

App Files Files Community

neindochoh commited on Oct 12, 2023

Commit

a78959a

•

1 Parent(s): 4bfbb0b

Upload folder using huggingface_hub

Browse files

Files changed (3) hide show

Dockerfile +20 -0
README.md +7 -3
run.py +127 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,20 @@

+FROM python:3.10
+ARG SPOTLIGHT_VERSION=1.5.0
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH
+WORKDIR $HOME/app
+ENV SPOTLIGHT_VERSION=$SPOTLIGHT_VERSION
+RUN pip install --no-cache-dir --upgrade pip setuptools wheel
+RUN pip install --no-cache-dir --upgrade "renumics-spotlight==${SPOTLIGHT_VERSION}"
+COPY --chown=user --chmod=0755 run.py .
+CMD ["./run.py"]

README.md CHANGED Viewed

@@ -1,10 +1,14 @@
 ---
-title: Spotlight-vikp-textbook Quality Programming
-emoji: 🏢
 colorFrom: indigo
 colorTo: green
 sdk: docker
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+# title:
+emoji: 🔬
 colorFrom: indigo
 colorTo: green
 sdk: docker
+app_port: 7860
+datasets: [vikp/textbook_quality_programming, renumics/spotlight-vikp-textbook_quality_programming-enrichment]
+tags: [renumics, spotlight, EDA, enriched, data-centric-ai, viewer]
 pinned: false
+license: mit
 ---
+# Explore vikp/textbook_quality_programming with [Renumics Spotlight](https://github.com/renumics/spotlight)!

run.py ADDED Viewed

	@@ -0,0 +1,127 @@

+#!/usr/bin/env python3
+"""
+Serve a Hugging Face dataset.
+"""
+import dataclasses
+import os
+from typing import Optional
+import datasets
+import huggingface_hub
+from renumics import spotlight  # type: ignore
+def login() -> None:
+    """
+    Login to Hugging Face Hub.
+    """
+    if token := os.environ.get("HF_TOKEN"):
+        huggingface_hub.login(token)
+@dataclasses.dataclass
+class HFSettings:
+    """
+    Hugging Face settings.
+    """
+    dataset: str
+    subset: Optional[str] = None
+    split: Optional[str] = None
+    revision: Optional[str] = None
+    enrichment: Optional[str] = None
+    enrichment_revision: Optional[str] = None
+    @classmethod
+    def from_environ(cls) -> "HFSettings":
+        """
+        Parse Hugging Face settings from environment.
+        """
+        dataset = os.environ.get("HF_DATASET") or None
+        if dataset is None:
+            raise RuntimeError(
+                "Desired Hugging Face dataset must be set as `HF_DATASET` "
+                "environment variable."
+            )
+        return cls(
+            dataset,
+            os.environ.get("HF_SUBSET") or None,
+            os.environ.get("HF_SPLIT") or None,
+            os.environ.get("HF_REVISION") or None,
+            os.environ.get("HF_ENRICHMENT") or None,
+            os.environ.get("HF_ENRICHMENT_REVISION") or None,
+        )
+    def __str__(self) -> str:
+        return f"{self.dataset}[subset={self.subset},split={self.split},revision={self.revision}]"
+if __name__ == "__main__":
+    """
+    Load and serve the given Hugging Face dataset.
+    """
+    login()
+    hf_settings = HFSettings.from_environ()
+    print(f"Loading Hugging Face dataset {hf_settings}.")
+    ds = datasets.load_dataset(
+        hf_settings.dataset,
+        hf_settings.subset,
+        split=hf_settings.split,
+        revision=hf_settings.revision,
+    )
+    if hf_settings.enrichment is not None:
+        ds_enrichment = datasets.load_dataset(
+            hf_settings.enrichment,
+            hf_settings.subset,
+            split=hf_settings.split,
+            revision=hf_settings.enrichment_revision,
+        )
+        if len(ds_enrichment) != len(ds):
+            raise RuntimeError(
+                f"Length of the enrichment dataset ({len(ds_enrichment)}) "
+                f"mismatches length of the original dataset ({len(ds)})"
+            )
+        ds = datasets.concatenate_datasets([ds, ds_enrichment], split=ds.split, axis=1)
+    dtypes = {}
+    for col in ds.column_names:
+        if "embedding" in col and isinstance(ds.features[col], datasets.Sequence):
+            dtypes[col] = spotlight.dtypes.embedding_dtype
+    layout = spotlight.layout.split(
+        spotlight.layout.split(
+            spotlight.layout.tab(spotlight.layout.table(), weight=4),
+            spotlight.layout.tab(
+                spotlight.layout.similaritymap(),
+                spotlight.layout.scatterplot(),
+                weight=3,
+            ),
+            spotlight.layout.tab(
+                spotlight.layout.histogram(), spotlight.layout.metric(), weight=3
+            ),
+            weight=5,
+        ),
+        spotlight.layout.tab(spotlight.layout.inspector(), weight=3),
+        orientation="vertical",
+    )
+    if not isinstance(ds, datasets.Dataset):
+        raise TypeError(
+            f"Loaded Hugging Face dataset is of type {type(ds)} instead of "
+            "`datasets.Dataset`. Did you forget to specify subset and/or split "
+            "(use environment variables `HF_SUBSET` and `HF_SPLIT` respective)?"
+        )
+    print(f"Serving Hugging Face dataset {hf_settings}.")
+    spotlight.show(
+        ds,
+        host="0.0.0.0",
+        port=7860,
+        wait="forever",
+        dtype=dtypes,
+        layout=layout,
+        analyze=True,
+        no_browser=True,
+    )