Spaces:
Sleeping
Sleeping
Implement initial version of SegFormer training pipeline with dataset parsing and model training functionalities. Added Dockerfile for environment setup, utility scripts for parsing and training, and Gradio interface for user interaction.
e4aef33
| import os | |
| import tempfile | |
| import zipfile | |
| import subprocess | |
| from pathlib import Path | |
| def run_supervisely_parser( | |
| project_path: str, | |
| train_ratio: float, | |
| seed: int, | |
| ) -> str: | |
| """Extract a Supervisely project zip and run the parser script inside .venv-sly. | |
| Parameters | |
| ---------- | |
| project_path : (str) | |
| Path to the uploaded Supervisely project .zip. | |
| train_ratio : float | |
| Portion of data to allocate to training (remainder is validation). | |
| seed : int | |
| Random seed forwarded to the parser for reproducible splits. | |
| Returns | |
| ------- | |
| str | |
| Path to the parsed dataset directory produced by the parser script. | |
| """ | |
| project_zip = Path(project_path) | |
| if not project_zip.exists(): | |
| raise FileNotFoundError( | |
| f"Provided project zip not found: {project_zip}" | |
| ) | |
| if project_zip.suffix.lower() != ".zip": | |
| raise ValueError("Supervisely project must be a .zip archive") | |
| project_dir = project_zip.parent | |
| extract_dir = Path(tempfile.mkdtemp(dir=project_dir)) | |
| output_base_dir = Path(tempfile.mkdtemp(dir=project_dir)) | |
| with zipfile.ZipFile(project_zip, "r") as zf: | |
| zf.extractall(extract_dir) | |
| def find_project_root(root: Path) -> Path: | |
| if (root / "meta.json").exists(): | |
| return root | |
| for child in root.iterdir(): | |
| if child.is_dir() and (child / "meta.json").exists(): | |
| return child | |
| raise FileNotFoundError( | |
| f"Could not locate 'meta.json' inside extracted archive at {root}" | |
| ) | |
| project_root = find_project_root(extract_dir) | |
| repo_root = Path(__file__).resolve().parent.parent | |
| parser_script = repo_root / "scripts" / "supervisely_parser.py" | |
| venv_python = repo_root / ".venv-sly" / "bin" / "python" | |
| if not parser_script.exists(): | |
| raise FileNotFoundError( | |
| f"Parser script not found: {parser_script}", | |
| ) | |
| if not venv_python.exists(): | |
| raise FileNotFoundError( | |
| "Expected .venv-sly Python interpreter at: " f"{venv_python}", | |
| ) | |
| cmd = [ | |
| str(venv_python), | |
| str(parser_script), | |
| "--project_dir", | |
| str(project_root), | |
| "--output_base_dir", | |
| str(output_base_dir), | |
| "--train_ratio", | |
| str(train_ratio), | |
| "--seed", | |
| str(seed), | |
| ] | |
| result = subprocess.run( | |
| cmd, | |
| capture_output=True, | |
| text=True, | |
| env={**os.environ}, | |
| ) | |
| if result.returncode != 0: | |
| raise RuntimeError( | |
| "Supervisely parser failed.\n" | |
| f"STDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}" | |
| ) | |
| produced_dirs = [p for p in output_base_dir.iterdir() if p.is_dir()] | |
| if len(produced_dirs) != 1: | |
| raise RuntimeError( | |
| "Could not unambiguously determine parsed dataset directory in " | |
| f"{output_base_dir}. Found: {produced_dirs}" | |
| ) | |
| dataset_dir = produced_dirs[0] | |
| return str(dataset_dir) | |