HomeSenseTest / utils /parse.py
YusufMesbah's picture
Implement initial version of SegFormer training pipeline with dataset parsing and model training functionalities. Added Dockerfile for environment setup, utility scripts for parsing and training, and Gradio interface for user interaction.
e4aef33
import os
import tempfile
import zipfile
import subprocess
from pathlib import Path
def run_supervisely_parser(
project_path: str,
train_ratio: float,
seed: int,
) -> str:
"""Extract a Supervisely project zip and run the parser script inside .venv-sly.
Parameters
----------
project_path : (str)
Path to the uploaded Supervisely project .zip.
train_ratio : float
Portion of data to allocate to training (remainder is validation).
seed : int
Random seed forwarded to the parser for reproducible splits.
Returns
-------
str
Path to the parsed dataset directory produced by the parser script.
"""
project_zip = Path(project_path)
if not project_zip.exists():
raise FileNotFoundError(
f"Provided project zip not found: {project_zip}"
)
if project_zip.suffix.lower() != ".zip":
raise ValueError("Supervisely project must be a .zip archive")
project_dir = project_zip.parent
extract_dir = Path(tempfile.mkdtemp(dir=project_dir))
output_base_dir = Path(tempfile.mkdtemp(dir=project_dir))
with zipfile.ZipFile(project_zip, "r") as zf:
zf.extractall(extract_dir)
def find_project_root(root: Path) -> Path:
if (root / "meta.json").exists():
return root
for child in root.iterdir():
if child.is_dir() and (child / "meta.json").exists():
return child
raise FileNotFoundError(
f"Could not locate 'meta.json' inside extracted archive at {root}"
)
project_root = find_project_root(extract_dir)
repo_root = Path(__file__).resolve().parent.parent
parser_script = repo_root / "scripts" / "supervisely_parser.py"
venv_python = repo_root / ".venv-sly" / "bin" / "python"
if not parser_script.exists():
raise FileNotFoundError(
f"Parser script not found: {parser_script}",
)
if not venv_python.exists():
raise FileNotFoundError(
"Expected .venv-sly Python interpreter at: " f"{venv_python}",
)
cmd = [
str(venv_python),
str(parser_script),
"--project_dir",
str(project_root),
"--output_base_dir",
str(output_base_dir),
"--train_ratio",
str(train_ratio),
"--seed",
str(seed),
]
result = subprocess.run(
cmd,
capture_output=True,
text=True,
env={**os.environ},
)
if result.returncode != 0:
raise RuntimeError(
"Supervisely parser failed.\n"
f"STDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}"
)
produced_dirs = [p for p in output_base_dir.iterdir() if p.is_dir()]
if len(produced_dirs) != 1:
raise RuntimeError(
"Could not unambiguously determine parsed dataset directory in "
f"{output_base_dir}. Found: {produced_dirs}"
)
dataset_dir = produced_dirs[0]
return str(dataset_dir)