Spaces:

danielrosehill
/

Code-Gen-Agents-Network

Sleeping

App Files Files Community

danielrosehill commited on Sep 15

Commit

264d760

1 Parent(s): 6e8f6f8

commit

Browse files

Files changed (3) hide show

README.md +16 -0
app.py +91 -13
requirements.txt +4 -0

README.md CHANGED Viewed

@@ -12,3 +12,19 @@ short_description: Code generation agent network with config navigator
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+## Data Source
+- The app loads agent configs from a Hugging Face dataset.
+- Default dataset URL is read from `datasource.txt`.
+- You can override via env var `HF_DATASET_URL` (or `HF_DATASET_ID`).
+Supported loading paths:
+- Direct dataset rows via `datasets.load_dataset` (expects fields like `name`, `system_prompt`, `description`, `category`).
+- If rows are not loadable, it snapshots the dataset repo and scans JSON/YAML files for agent-like structures.
+To use your dataset:
+- Set `datasource.txt` to `https://huggingface.co/datasets/danielrosehill/Code-Gen-Agents-0925` (already set), or
+- Configure a Space secret `HF_DATASET_URL` with the dataset URL.
+Dependencies are pinned in `requirements.txt` and include `huggingface_hub` and `datasets`.

app.py CHANGED Viewed

@@ -156,6 +156,72 @@ def _maybe_snapshot_download_from_hf(url: str, target_dir: Path) -> Optional[Pat
         return None
 def load_agents() -> Tuple[Dict[str, Any], List[dict], List[str]]:
     """
     Returns (catalog_by_category, agents, warnings)
@@ -166,22 +232,28 @@ def load_agents() -> Tuple[Dict[str, Any], List[dict], List[str]]:
     warnings: List[str] = []
     agents: List[dict] = []
     # 1) Prefer local static_data if present
     if STATIC_DATA_DIR.exists():
         agents = _scan_static_data(STATIC_DATA_DIR)
-    else:
-        # 2) Try to download dataset indicated by datasource.txt
-        url = _read_text(DATASOURCE_TXT) or ""
-        if url.strip():
-            maybe_dir = _maybe_snapshot_download_from_hf(url.strip(), STATIC_DATA_DIR)
-            if maybe_dir and maybe_dir.exists():
-                agents = _scan_static_data(maybe_dir)
-            else:
-                warnings.append(
-                    "Dataset fetch unavailable. Add a local 'static_data' folder with agent configs."
-                )
         else:
-            warnings.append("No datasource URL found; using fallback sample data.")
     # 3) Fallback sample if nothing found
     if not agents:
@@ -211,6 +283,13 @@ def load_agents() -> Tuple[Dict[str, Any], List[dict], List[str]]:
             "Showing sample data. Add 'static_data' with JSON/YAML agent configs to replace."
         )
     # Build catalog
     catalog: Dict[str, Dict[str, Any]] = {}
     for a in agents:
@@ -339,4 +418,3 @@ def build_ui():
 if __name__ == "__main__":
     demo = build_ui()
     demo.launch()

         return None
+def _parse_repo_id_from_url(url: str) -> Optional[str]:
+    m = re.match(r"https?://huggingface.co/datasets/([^/]+/[^/]+)", url.strip())
+    return m.group(1) if m else None
+def _extract_agent_from_row(row: dict) -> Optional[dict]:
+    if not isinstance(row, dict):
+        return None
+    name = _extract_field(row, ["name", "agent_name", "title", "id"]) or row.get("name")
+    system_prompt = _extract_field(
+        row,
+        [
+            "system_prompt",
+            "prompt",
+            "instructions",
+            "system",
+            "system_instructions",
+            "system_text",
+        ],
+    )
+    if not (name and system_prompt):
+        return None
+    description = _extract_field(row, ["description", "desc", "about", "summary"]) or ""
+    category = _extract_field(row, ["category", "group", "type"]) or "uncategorized"
+    category_slug = _slugify(category)
+    agent_id = _slugify(f"{category_slug}-{name}")
+    return {
+        "id": agent_id,
+        "name": name,
+        "description": description,
+        "system_prompt": system_prompt,
+        "category": category_slug,
+        "source": "hf-dataset-row",
+    }
+def _maybe_load_hf_dataset_rows(url: str) -> Optional[List[dict]]:
+    try:
+        import datasets  # type: ignore
+        repo_id = _parse_repo_id_from_url(url)
+        if not repo_id:
+            return None
+        # Try common splits; prefer train if present
+        result: List[dict] = []
+        loaded = datasets.load_dataset(repo_id)
+        if isinstance(loaded, dict):
+            split_order = ["train", "validation", "test"] + [k for k in loaded.keys() if k not in {"train", "validation", "test"}]
+            for split in split_order:
+                if split in loaded:
+                    for row in loaded[split]:
+                        a = _extract_agent_from_row(dict(row))
+                        if a:
+                            result.append(a)
+        else:
+            for row in loaded:  # type: ignore
+                a = _extract_agent_from_row(dict(row))
+                if a:
+                    result.append(a)
+        return result or None
+    except Exception:
+        return None
 def load_agents() -> Tuple[Dict[str, Any], List[dict], List[str]]:
     """
     Returns (catalog_by_category, agents, warnings)
     warnings: List[str] = []
     agents: List[dict] = []
+    # Resolve datasource
+    url = os.getenv("HF_DATASET_URL") or os.getenv("HF_DATASET_ID") or (_read_text(DATASOURCE_TXT) or "").strip()
     # 1) Prefer local static_data if present
     if STATIC_DATA_DIR.exists():
         agents = _scan_static_data(STATIC_DATA_DIR)
+    # 2) Try to load dataset rows directly via datasets
+    if not agents and url:
+        maybe_agents = _maybe_load_hf_dataset_rows(url)
+        if maybe_agents:
+            agents = maybe_agents
+    # 3) If rows failed, snapshot the repo and scan files
+    if not agents and url:
+        maybe_dir = _maybe_snapshot_download_from_hf(url, STATIC_DATA_DIR)
+        if maybe_dir and maybe_dir.exists():
+            agents = _scan_static_data(maybe_dir)
         else:
+            warnings.append(
+                "Dataset fetch unavailable. Add a local 'static_data' folder with agent configs."
+            )
+    if not url:
+        warnings.append("No datasource URL found; using fallback sample data.")
     # 3) Fallback sample if nothing found
     if not agents:
             "Showing sample data. Add 'static_data' with JSON/YAML agent configs to replace."
         )
+    # Dedupe by id, prefer first occurrence
+    deduped: Dict[str, dict] = {}
+    for a in agents:
+        if isinstance(a, dict) and a.get("id") and a["id"] not in deduped:
+            deduped[a["id"]] = a
+    agents = list(deduped.values())
     # Build catalog
     catalog: Dict[str, Dict[str, Any]] = {}
     for a in agents:
 if __name__ == "__main__":
     demo = build_ui()
     demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+gradio>=5.45.0
+huggingface_hub>=0.23.0
+datasets>=2.18.0
+pyyaml>=6.0.0