Spaces:
Running
Running
| """`vgm` command-line interface. | |
| Three subcommands: | |
| - `build` — full pipeline: video (+ optional transcript) → HTML / zip / review HTML. | |
| - `export-metadata` — run the pipeline and dump the unified metadata JSON. | |
| - `render-from-metadata` — re-render HTML from a previously exported metadata JSON. | |
| """ | |
| from __future__ import annotations | |
| import contextlib | |
| import logging | |
| from pathlib import Path | |
| from typing import Optional | |
| import typer | |
| from rich.progress import ( | |
| BarColumn, | |
| Progress, | |
| TaskProgressColumn, | |
| TextColumn, | |
| TimeElapsedColumn, | |
| ) | |
| from .pipeline import bundle_zip, dump_metadata, load_metadata, render_guide, render_review | |
| from .pipeline.html_gen import metadata_to_segment, render_from_metadata | |
| from .pipeline.orchestrator import ( | |
| PipelineError, | |
| PipelineInputs, | |
| ProgressEvent, | |
| run_pipeline, | |
| ) | |
| def _progress_bar(): | |
| """Yield a (callable progress_cb, finalize) pair backed by a rich Progress. | |
| The progress_cb signature matches orchestrator.ProgressCallback. | |
| """ | |
| progress = Progress( | |
| TextColumn("[bold]{task.fields[stage]:<13}", justify="left"), | |
| BarColumn(bar_width=None), | |
| TaskProgressColumn(), | |
| TextColumn("{task.fields[msg]}"), | |
| TimeElapsedColumn(), | |
| transient=False, | |
| ) | |
| progress.start() | |
| task_id = progress.add_task("vgm", total=100, stage="starting", msg="") | |
| def cb(ev: ProgressEvent) -> None: | |
| progress.update(task_id, completed=ev.percent, stage=ev.stage, msg=ev.message) | |
| try: | |
| yield cb | |
| finally: | |
| progress.stop() | |
| app = typer.Typer( | |
| add_completion=False, | |
| help="VideoGuideMaker — generate WCAG-ready study guides from video + transcript.", | |
| no_args_is_help=True, | |
| ) | |
| log = logging.getLogger("videoguidemaker.cli") | |
| def _setup_logging(verbose: bool) -> None: | |
| # Default to WARNING so the rich progress bar isn't disrupted by INFO logs. | |
| # `--verbose` opts into DEBUG output. | |
| logging.basicConfig( | |
| level=logging.DEBUG if verbose else logging.WARNING, | |
| format="%(asctime)s %(levelname)s %(name)s: %(message)s", | |
| ) | |
| def _resolve_format(fmt: str, output: Path) -> str: | |
| fmt = fmt.lower() | |
| if fmt not in ("review", "single", "zip", "guide"): | |
| raise typer.BadParameter("format must be one of: review, single, zip, guide") | |
| return fmt | |
| def _safe_filename(title: str) -> str: | |
| safe = "".join(c if c.isalnum() or c in "-_ " else "-" for c in title).strip() | |
| safe = safe.replace(" ", "-") | |
| return safe or "study-guide" | |
| def build( | |
| video: Path = typer.Argument(..., exists=True, dir_okay=False, readable=True), | |
| transcript: Optional[Path] = typer.Argument(None, exists=False, dir_okay=False), | |
| title: str = typer.Option("Untitled Study Guide", "--title"), | |
| subtitle: Optional[str] = typer.Option(None, "--subtitle"), | |
| module: Optional[str] = typer.Option(None, "--module"), | |
| output: Path = typer.Option(Path("study_guide.html"), "--output", "-o"), | |
| frames_dir: Path = typer.Option(Path("static"), "--frames-dir"), | |
| auto_transcribe: bool = typer.Option(False, "--auto-transcribe"), | |
| whisper_model: str = typer.Option("small", "--whisper-model"), | |
| threshold: float = typer.Option(27.0, "--threshold"), | |
| min_gap: float = typer.Option(0.0, "--min-gap", help="Drop frames closer than N seconds to the previous one."), | |
| max_frames: Optional[int] = typer.Option(None, "--max-frames", help="Cap total frames; uniformly downsamples preserving first + last."), | |
| skip_ocr: bool = typer.Option(False, "--skip-ocr", help="Skip OCR pass; on-screen text fields will be empty."), | |
| skip_inverted_ocr: bool = typer.Option(False, "--skip-inverted-ocr", help="Skip the inverted-binarization OCR pass. Halves OCR time and avoids inverted-pass garbling on slides without coloured callouts."), | |
| face_threshold: float = typer.Option(0.12, "--face-threshold"), | |
| lang: str = typer.Option("en", "--lang"), | |
| fmt: str = typer.Option("single", "--format", help="review | single | zip | guide"), | |
| export_metadata: Optional[Path] = typer.Option(None, "--export-metadata"), | |
| verbose: bool = typer.Option(False, "--verbose", "-v"), | |
| ) -> None: | |
| """Run the full pipeline: video + transcript → HTML.""" | |
| _setup_logging(verbose) | |
| fmt = _resolve_format(fmt, output) | |
| inputs = PipelineInputs( | |
| video_path=video, | |
| transcript_path=transcript if transcript and transcript.exists() else None, | |
| frames_dir=frames_dir, | |
| title=title, | |
| subtitle=subtitle, | |
| module=module, | |
| lang=lang, | |
| threshold=threshold, | |
| min_gap_seconds=min_gap, | |
| max_frames=max_frames, | |
| skip_ocr=skip_ocr, | |
| skip_inverted_ocr=skip_inverted_ocr, | |
| face_threshold=face_threshold, | |
| auto_transcribe=auto_transcribe, | |
| whisper_model=whisper_model, | |
| inline_images=(fmt in ("single", "review")), | |
| ) | |
| try: | |
| with _progress_bar() as cb: | |
| result = run_pipeline(inputs, progress=cb) | |
| except PipelineError as exc: | |
| typer.secho(f"error: {exc}", fg=typer.colors.RED, err=True) | |
| raise typer.Exit(2) | |
| # Dump metadata BEFORE rendering: a render failure (template bug, | |
| # disk full mid-write) would otherwise discard the LLM/OCR work | |
| # the user just paid for. | |
| if export_metadata: | |
| dump_metadata(export_metadata, result.page) | |
| typer.echo(f"wrote {export_metadata}") | |
| common = dict( | |
| title=title, | |
| segments=result.segments, | |
| lang=lang, | |
| subtitle=subtitle, | |
| module=module, | |
| meta_lines=result.page.meta_lines or None, | |
| eyebrow=result.page.eyebrow, | |
| ) | |
| if fmt == "review": | |
| html = render_review(**common) | |
| else: | |
| inline = fmt == "single" | |
| if inline: | |
| # Inline audio data URIs alongside images so the single HTML | |
| # stays self-contained (no broken audio/foo.mp3 references). | |
| import base64 | |
| for seg, ap in zip(result.segments, result.audio_paths): | |
| if ap and ap.exists(): | |
| seg.audio_data_uri = ( | |
| "data:audio/mpeg;base64," | |
| + base64.b64encode(ap.read_bytes()).decode("ascii") | |
| ) | |
| html = render_guide(inline_images=inline, **common) | |
| if fmt == "zip": | |
| audio_disk_paths = [p for p in result.audio_paths if p is not None] | |
| zip_bytes = bundle_zip( | |
| html, | |
| [f.image_path for f in result.kept_frames], | |
| audio_paths=audio_disk_paths, | |
| ) | |
| if output.suffix.lower() != ".zip": | |
| output = output.with_suffix(".zip") | |
| output.write_bytes(zip_bytes) | |
| else: | |
| output.write_text(html, encoding="utf-8") | |
| typer.echo(f"wrote {output}") | |
| def export_metadata_cmd( | |
| video: Path = typer.Argument(..., exists=True, dir_okay=False, readable=True), | |
| transcript: Optional[Path] = typer.Argument(None, exists=False, dir_okay=False), | |
| title: str = typer.Option("Untitled Study Guide", "--title"), | |
| subtitle: Optional[str] = typer.Option(None, "--subtitle"), | |
| module: Optional[str] = typer.Option(None, "--module"), | |
| output: Path = typer.Option(Path("study_guide_metadata.json"), "--output", "-o"), | |
| frames_dir: Path = typer.Option(Path("static"), "--frames-dir"), | |
| auto_transcribe: bool = typer.Option(False, "--auto-transcribe"), | |
| whisper_model: str = typer.Option("small", "--whisper-model"), | |
| threshold: float = typer.Option(27.0, "--threshold"), | |
| min_gap: float = typer.Option(0.0, "--min-gap", help="Drop frames closer than N seconds to the previous one."), | |
| max_frames: Optional[int] = typer.Option(None, "--max-frames", help="Cap total frames; uniformly downsamples preserving first + last."), | |
| skip_ocr: bool = typer.Option(False, "--skip-ocr", help="Skip OCR pass; on-screen text fields will be empty."), | |
| skip_inverted_ocr: bool = typer.Option(False, "--skip-inverted-ocr", help="Skip the inverted-binarization OCR pass. Halves OCR time and avoids inverted-pass garbling on slides without coloured callouts."), | |
| face_threshold: float = typer.Option(0.12, "--face-threshold"), | |
| lang: str = typer.Option("en", "--lang"), | |
| verbose: bool = typer.Option(False, "--verbose", "-v"), | |
| ) -> None: | |
| """Run the pipeline and dump the metadata JSON only (no HTML).""" | |
| _setup_logging(verbose) | |
| inputs = PipelineInputs( | |
| video_path=video, | |
| transcript_path=transcript if transcript and transcript.exists() else None, | |
| frames_dir=frames_dir, | |
| title=title, | |
| subtitle=subtitle, | |
| module=module, | |
| lang=lang, | |
| threshold=threshold, | |
| min_gap_seconds=min_gap, | |
| max_frames=max_frames, | |
| skip_ocr=skip_ocr, | |
| skip_inverted_ocr=skip_inverted_ocr, | |
| face_threshold=face_threshold, | |
| auto_transcribe=auto_transcribe, | |
| whisper_model=whisper_model, | |
| inline_images=False, | |
| ) | |
| try: | |
| with _progress_bar() as cb: | |
| result = run_pipeline(inputs, progress=cb) | |
| except PipelineError as exc: | |
| typer.secho(f"error: {exc}", fg=typer.colors.RED, err=True) | |
| raise typer.Exit(2) | |
| dump_metadata(output, result.page) | |
| typer.echo(f"wrote {output} ({len(result.page.segments)} segments, frames in {frames_dir})") | |
| def render_from_metadata_cmd( | |
| metadata_json: Path = typer.Argument(..., exists=True, dir_okay=False, readable=True), | |
| output: Path = typer.Option(Path("study_guide.html"), "--output", "-o"), | |
| frames_dir: Optional[Path] = typer.Option( | |
| None, "--frames-dir", | |
| help="Override the frames_dir recorded in the metadata JSON.", | |
| ), | |
| fmt: str = typer.Option("single", "--format", help="review | single | guide"), | |
| verbose: bool = typer.Option(False, "--verbose", "-v"), | |
| ) -> None: | |
| """Re-render HTML from a previously exported metadata JSON.""" | |
| _setup_logging(verbose) | |
| fmt = _resolve_format(fmt, output) | |
| if fmt == "zip": | |
| raise typer.BadParameter("zip format requires source frames; use 'build' instead.") | |
| page = load_metadata(metadata_json) | |
| resolved_frames_dir = ( | |
| frames_dir | |
| if frames_dir is not None | |
| else (metadata_json.parent / page.frames_dir).resolve() | |
| ) | |
| html = render_from_metadata(page, Path(resolved_frames_dir), mode=fmt) | |
| output.write_text(html, encoding="utf-8") | |
| typer.echo(f"wrote {output}") | |
| if __name__ == "__main__": # pragma: no cover | |
| app() | |