danielrosehill commited on
Commit
264d760
·
1 Parent(s): 6e8f6f8
Files changed (3) hide show
  1. README.md +16 -0
  2. app.py +91 -13
  3. requirements.txt +4 -0
README.md CHANGED
@@ -12,3 +12,19 @@ short_description: Code generation agent network with config navigator
12
  ---
13
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  ---
13
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
15
+
16
+ ## Data Source
17
+
18
+ - The app loads agent configs from a Hugging Face dataset.
19
+ - Default dataset URL is read from `datasource.txt`.
20
+ - You can override via env var `HF_DATASET_URL` (or `HF_DATASET_ID`).
21
+
22
+ Supported loading paths:
23
+ - Direct dataset rows via `datasets.load_dataset` (expects fields like `name`, `system_prompt`, `description`, `category`).
24
+ - If rows are not loadable, it snapshots the dataset repo and scans JSON/YAML files for agent-like structures.
25
+
26
+ To use your dataset:
27
+ - Set `datasource.txt` to `https://huggingface.co/datasets/danielrosehill/Code-Gen-Agents-0925` (already set), or
28
+ - Configure a Space secret `HF_DATASET_URL` with the dataset URL.
29
+
30
+ Dependencies are pinned in `requirements.txt` and include `huggingface_hub` and `datasets`.
app.py CHANGED
@@ -156,6 +156,72 @@ def _maybe_snapshot_download_from_hf(url: str, target_dir: Path) -> Optional[Pat
156
  return None
157
 
158
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  def load_agents() -> Tuple[Dict[str, Any], List[dict], List[str]]:
160
  """
161
  Returns (catalog_by_category, agents, warnings)
@@ -166,22 +232,28 @@ def load_agents() -> Tuple[Dict[str, Any], List[dict], List[str]]:
166
  warnings: List[str] = []
167
  agents: List[dict] = []
168
 
 
 
 
169
  # 1) Prefer local static_data if present
170
  if STATIC_DATA_DIR.exists():
171
  agents = _scan_static_data(STATIC_DATA_DIR)
172
- else:
173
- # 2) Try to download dataset indicated by datasource.txt
174
- url = _read_text(DATASOURCE_TXT) or ""
175
- if url.strip():
176
- maybe_dir = _maybe_snapshot_download_from_hf(url.strip(), STATIC_DATA_DIR)
177
- if maybe_dir and maybe_dir.exists():
178
- agents = _scan_static_data(maybe_dir)
179
- else:
180
- warnings.append(
181
- "Dataset fetch unavailable. Add a local 'static_data' folder with agent configs."
182
- )
183
  else:
184
- warnings.append("No datasource URL found; using fallback sample data.")
 
 
 
 
185
 
186
  # 3) Fallback sample if nothing found
187
  if not agents:
@@ -211,6 +283,13 @@ def load_agents() -> Tuple[Dict[str, Any], List[dict], List[str]]:
211
  "Showing sample data. Add 'static_data' with JSON/YAML agent configs to replace."
212
  )
213
 
 
 
 
 
 
 
 
214
  # Build catalog
215
  catalog: Dict[str, Dict[str, Any]] = {}
216
  for a in agents:
@@ -339,4 +418,3 @@ def build_ui():
339
  if __name__ == "__main__":
340
  demo = build_ui()
341
  demo.launch()
342
-
 
156
  return None
157
 
158
 
159
+ def _parse_repo_id_from_url(url: str) -> Optional[str]:
160
+ m = re.match(r"https?://huggingface.co/datasets/([^/]+/[^/]+)", url.strip())
161
+ return m.group(1) if m else None
162
+
163
+
164
+ def _extract_agent_from_row(row: dict) -> Optional[dict]:
165
+ if not isinstance(row, dict):
166
+ return None
167
+ name = _extract_field(row, ["name", "agent_name", "title", "id"]) or row.get("name")
168
+ system_prompt = _extract_field(
169
+ row,
170
+ [
171
+ "system_prompt",
172
+ "prompt",
173
+ "instructions",
174
+ "system",
175
+ "system_instructions",
176
+ "system_text",
177
+ ],
178
+ )
179
+ if not (name and system_prompt):
180
+ return None
181
+ description = _extract_field(row, ["description", "desc", "about", "summary"]) or ""
182
+ category = _extract_field(row, ["category", "group", "type"]) or "uncategorized"
183
+ category_slug = _slugify(category)
184
+ agent_id = _slugify(f"{category_slug}-{name}")
185
+ return {
186
+ "id": agent_id,
187
+ "name": name,
188
+ "description": description,
189
+ "system_prompt": system_prompt,
190
+ "category": category_slug,
191
+ "source": "hf-dataset-row",
192
+ }
193
+
194
+
195
+ def _maybe_load_hf_dataset_rows(url: str) -> Optional[List[dict]]:
196
+ try:
197
+ import datasets # type: ignore
198
+
199
+ repo_id = _parse_repo_id_from_url(url)
200
+ if not repo_id:
201
+ return None
202
+
203
+ # Try common splits; prefer train if present
204
+ result: List[dict] = []
205
+ loaded = datasets.load_dataset(repo_id)
206
+ if isinstance(loaded, dict):
207
+ split_order = ["train", "validation", "test"] + [k for k in loaded.keys() if k not in {"train", "validation", "test"}]
208
+ for split in split_order:
209
+ if split in loaded:
210
+ for row in loaded[split]:
211
+ a = _extract_agent_from_row(dict(row))
212
+ if a:
213
+ result.append(a)
214
+ else:
215
+ for row in loaded: # type: ignore
216
+ a = _extract_agent_from_row(dict(row))
217
+ if a:
218
+ result.append(a)
219
+
220
+ return result or None
221
+ except Exception:
222
+ return None
223
+
224
+
225
  def load_agents() -> Tuple[Dict[str, Any], List[dict], List[str]]:
226
  """
227
  Returns (catalog_by_category, agents, warnings)
 
232
  warnings: List[str] = []
233
  agents: List[dict] = []
234
 
235
+ # Resolve datasource
236
+ url = os.getenv("HF_DATASET_URL") or os.getenv("HF_DATASET_ID") or (_read_text(DATASOURCE_TXT) or "").strip()
237
+
238
  # 1) Prefer local static_data if present
239
  if STATIC_DATA_DIR.exists():
240
  agents = _scan_static_data(STATIC_DATA_DIR)
241
+ # 2) Try to load dataset rows directly via datasets
242
+ if not agents and url:
243
+ maybe_agents = _maybe_load_hf_dataset_rows(url)
244
+ if maybe_agents:
245
+ agents = maybe_agents
246
+ # 3) If rows failed, snapshot the repo and scan files
247
+ if not agents and url:
248
+ maybe_dir = _maybe_snapshot_download_from_hf(url, STATIC_DATA_DIR)
249
+ if maybe_dir and maybe_dir.exists():
250
+ agents = _scan_static_data(maybe_dir)
 
251
  else:
252
+ warnings.append(
253
+ "Dataset fetch unavailable. Add a local 'static_data' folder with agent configs."
254
+ )
255
+ if not url:
256
+ warnings.append("No datasource URL found; using fallback sample data.")
257
 
258
  # 3) Fallback sample if nothing found
259
  if not agents:
 
283
  "Showing sample data. Add 'static_data' with JSON/YAML agent configs to replace."
284
  )
285
 
286
+ # Dedupe by id, prefer first occurrence
287
+ deduped: Dict[str, dict] = {}
288
+ for a in agents:
289
+ if isinstance(a, dict) and a.get("id") and a["id"] not in deduped:
290
+ deduped[a["id"]] = a
291
+ agents = list(deduped.values())
292
+
293
  # Build catalog
294
  catalog: Dict[str, Dict[str, Any]] = {}
295
  for a in agents:
 
418
  if __name__ == "__main__":
419
  demo = build_ui()
420
  demo.launch()
 
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio>=5.45.0
2
+ huggingface_hub>=0.23.0
3
+ datasets>=2.18.0
4
+ pyyaml>=6.0.0