Spaces:
Runtime error
Runtime error
| from __future__ import annotations | |
| import argparse | |
| import json | |
| import sys | |
| from pathlib import Path | |
| from typing import Callable, Optional, TextIO | |
| from PIL import Image | |
| from .pipeline import CoRGIPipeline | |
| from .qwen_client import Qwen3VLClient, QwenGenerationConfig | |
| from .types import GroundedEvidence, ReasoningStep | |
| DEFAULT_MODEL_ID = "Qwen/Qwen3-VL-8B-Thinking" | |
| def build_parser() -> argparse.ArgumentParser: | |
| parser = argparse.ArgumentParser( | |
| prog="corgi-cli", | |
| description="Run the CoRGI reasoning pipeline over an image/question pair.", | |
| ) | |
| parser.add_argument("--image", type=Path, required=True, help="Path to the input image (jpg/png/etc.)") | |
| parser.add_argument("--question", type=str, required=True, help="Visual question for the image") | |
| parser.add_argument("--max-steps", type=int, default=4, help="Maximum number of reasoning steps to request") | |
| parser.add_argument( | |
| "--max-regions", | |
| type=int, | |
| default=4, | |
| help="Maximum number of grounded regions per visual step", | |
| ) | |
| parser.add_argument( | |
| "--model-id", | |
| type=str, | |
| default=None, | |
| help="Optional override for the Qwen3-VL model identifier", | |
| ) | |
| parser.add_argument( | |
| "--json-out", | |
| type=Path, | |
| default=None, | |
| help="Optional path to write the pipeline result as JSON", | |
| ) | |
| return parser | |
| def _format_step(step: ReasoningStep) -> str: | |
| needs = "yes" if step.needs_vision else "no" | |
| suffix = f"; reason: {step.reason}" if step.reason else "" | |
| return f"[{step.index}] {step.statement} (needs vision: {needs}{suffix})" | |
| def _format_evidence_item(evidence: GroundedEvidence) -> str: | |
| bbox = ", ".join(f"{coord:.2f}" for coord in evidence.bbox) | |
| parts = [f"Step {evidence.step_index} | bbox=({bbox})"] | |
| if evidence.description: | |
| parts.append(f"desc: {evidence.description}") | |
| if evidence.confidence is not None: | |
| parts.append(f"conf: {evidence.confidence:.2f}") | |
| return " | ".join(parts) | |
| def _default_pipeline_factory(model_id: Optional[str]) -> CoRGIPipeline: | |
| config = QwenGenerationConfig(model_id=model_id or DEFAULT_MODEL_ID) | |
| client = Qwen3VLClient(config=config) | |
| return CoRGIPipeline(vlm_client=client) | |
| def execute_cli( | |
| *, | |
| image_path: Path, | |
| question: str, | |
| max_steps: int, | |
| max_regions: int, | |
| model_id: Optional[str], | |
| json_out: Optional[Path], | |
| pipeline_factory: Callable[[Optional[str]], CoRGIPipeline] | None = None, | |
| output_stream: TextIO | None = None, | |
| ) -> None: | |
| if output_stream is None: | |
| output_stream = sys.stdout | |
| factory = pipeline_factory or _default_pipeline_factory | |
| with Image.open(image_path) as img: | |
| image = img.convert("RGB") | |
| pipeline = factory(model_id) | |
| result = pipeline.run( | |
| image=image, | |
| question=question, | |
| max_steps=max_steps, | |
| max_regions=max_regions, | |
| ) | |
| print(f"Question: {question}", file=output_stream) | |
| print("-- Steps --", file=output_stream) | |
| for step in result.steps: | |
| print(_format_step(step), file=output_stream) | |
| if not result.steps: | |
| print("(no reasoning steps returned)", file=output_stream) | |
| print("-- Evidence --", file=output_stream) | |
| if result.evidence: | |
| for evidence in result.evidence: | |
| print(_format_evidence_item(evidence), file=output_stream) | |
| else: | |
| print("(no visual evidence)", file=output_stream) | |
| print("-- Answer --", file=output_stream) | |
| print(f"Answer: {result.answer}", file=output_stream) | |
| if json_out is not None: | |
| json_out.parent.mkdir(parents=True, exist_ok=True) | |
| with json_out.open("w", encoding="utf-8") as handle: | |
| json.dump(result.to_json(), handle, ensure_ascii=False, indent=2) | |
| def main(argv: Optional[list[str]] = None) -> int: | |
| parser = build_parser() | |
| args = parser.parse_args(argv) | |
| execute_cli( | |
| image_path=args.image, | |
| question=args.question, | |
| max_steps=args.max_steps, | |
| max_regions=args.max_regions, | |
| model_id=args.model_id, | |
| json_out=args.json_out, | |
| ) | |
| return 0 | |
| __all__ = ["build_parser", "execute_cli", "main"] | |