Spaces:

ddebree
/

mathvision-jepa-explorer

Running

App Files Files Community

ddebree commited on 17 days ago

Commit

f9306c2

verified ·

1 Parent(s): 683fd03

Upload folder using huggingface_hub

Browse files

Files changed (26) hide show

Dockerfile +24 -0
README.md +139 -5
app.py +11 -0
packages.txt +1 -0
pyproject.toml +59 -0
requirements.txt +7 -0
src/mathvision_explorer/__init__.py +6 -0
src/mathvision_explorer/__pycache__/__init__.cpython-313.pyc +0 -0
src/mathvision_explorer/__pycache__/cli.cpython-313.pyc +0 -0
src/mathvision_explorer/__pycache__/dataset.cpython-313.pyc +0 -0
src/mathvision_explorer/__pycache__/demo.cpython-313.pyc +0 -0
src/mathvision_explorer/__pycache__/embeddings.cpython-313.pyc +0 -0
src/mathvision_explorer/__pycache__/explorer.cpython-313.pyc +0 -0
src/mathvision_explorer/__pycache__/html.cpython-313.pyc +0 -0
src/mathvision_explorer/__pycache__/index.cpython-313.pyc +0 -0
src/mathvision_explorer/__pycache__/similarity.cpython-313.pyc +0 -0
src/mathvision_explorer/cli.py +132 -0
src/mathvision_explorer/dataset.py +143 -0
src/mathvision_explorer/demo.py +229 -0
src/mathvision_explorer/embeddings.py +133 -0
src/mathvision_explorer/explorer.py +47 -0
src/mathvision_explorer/html.py +141 -0
src/mathvision_explorer/index.py +88 -0
src/mathvision_explorer/py.typed +1 -0
src/mathvision_explorer/similarity.py +88 -0
src/mathvision_explorer/streamlit_app.py +156 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,24 @@

+FROM python:3.11-slim
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+ENV HF_HOME=/data/.cache/huggingface
+ENV STREAMLIT_SERVER_HEADLESS=true
+ENV STREAMLIT_BROWSER_GATHER_USAGE_STATS=false
+WORKDIR /app
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends git \
+    && rm -rf /var/lib/apt/lists/*
+COPY pyproject.toml README.md requirements.txt ./
+COPY src ./src
+COPY app.py ./
+RUN pip install --no-cache-dir --upgrade pip \
+    && pip install --no-cache-dir -r requirements.txt
+EXPOSE 8501
+CMD ["streamlit", "run", "app.py", "--server.address=0.0.0.0", "--server.port=8501"]

README.md CHANGED Viewed

@@ -1,10 +1,144 @@
 ---
-title: Mathvision Jepa Explorer
-emoji: 💻
-colorFrom: gray
-colorTo: red
 sdk: docker
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: MathVision JEPA Explorer
+emoji: 🔎
+colorFrom: blue
+colorTo: green
 sdk: docker
 pinned: false
 ---
+# MathVision Explorer
+Typed Python-startpunt voor een **MathVision + JEPA** explorer. De eerste versie werkt
+lokaal met MathVision-achtige JSONL-bestanden en een eenvoudige, testbare image embedder.
+Daarna kun je de embedder vervangen door V-JEPA-features.
+## Installatie
+```powershell
+uv sync --dev
+```
+Of via Make:
+```bash
+make sync
+```
+Met V-JEPA ondersteuning:
+```powershell
+make sync-jepa
+```
+Alles klaarzetten, inclusief Streamlit, V-JEPA, demo-data, gallery en checks:
+```bash
+make ready
+```
+Met Streamlit UI:
+```bash
+make sync-app
+```
+## Verwacht JSONL-formaat
+Elke regel is een probleem:
+```json
+{"id":"mv-001","question":"How many cubes are visible?","answer":"7","image":"images/mv-001.png","subject":"geometry","level":2}
+```
+Ondersteunde velden:
+- `id` of `problem_id`
+- `question`
+- `answer`
+- `image`
+- `options`
+- `subject`
+- `level`
+- `solution`
+## Gebruik
+Maak eerst een zichtbare mini-demo:
+```powershell
+uv run mathvision demo data/demo
+uv run mathvision export-html data/demo/demo.jsonl artifacts/demo.html
+```
+Hetzelfde via Make:
+```bash
+make demo
+make gallery
+make app
+```
+Inspecteer een export:
+```powershell
+uv run mathvision inspect data/mathvision-sample.jsonl
+```
+Bouw een lokale image-index:
+```powershell
+uv run mathvision index data/mathvision-sample.jsonl artifacts/image-index.tsv
+```
+Bouw een V-JEPA index:
+```powershell
+uv run mathvision index data/mathvision-sample.jsonl artifacts/jepa-index.tsv --embedder vjepa
+```
+Zoek visueel vergelijkbare problemen:
+```powershell
+uv run mathvision search data/mathvision-sample.jsonl artifacts/image-index.tsv mv-001 --limit 5
+```
+Zoek met dezelfde V-JEPA embedder:
+```powershell
+uv run mathvision search data/mathvision-sample.jsonl artifacts/jepa-index.tsv mv-001 --embedder vjepa --limit 5
+```
+## V-JEPA integratie
+De module `mathvision_explorer.embeddings` definieert een `ImageEmbedder` protocol. De
+meegeleverde `ColorStatsEmbedder` is bewust simpel, zodat tests snel en offline draaien.
+`VJepaImageEmbedder` gebruikt standaard `facebook/vjepa2-vitl-fpc64-256` via Hugging Face
+Transformers en maakt van een still image een korte herhaalde video-input voor
+`get_vision_features`.
+## Kwaliteit
+```powershell
+uv run pytest
+uv run mypy
+uv run ruff check .
+```
+Of:
+```bash
+make check
+```
+## Hugging Face Spaces
+Deze repo is voorbereid als Docker Space. De hosted entrypoint is `app.py`; die maakt
+demo-data aan en start daarna de Streamlit explorer.
+Belangrijk voor V-JEPA:
+- Docker gebruikt CPU-only PyTorch wheels.
+- Transformers wordt vanaf Hugging Face main geinstalleerd, omdat V-JEPA 2 daar recente
+  support nodig heeft.
+- De eerste keer dat `vjepa` gekozen wordt, downloadt de app het model.

app.py ADDED Viewed

	@@ -0,0 +1,11 @@

+"""Hosted Streamlit entrypoint for Hugging Face Spaces."""
+from __future__ import annotations
+from pathlib import Path
+from mathvision_explorer.demo import create_demo_dataset
+from mathvision_explorer.streamlit_app import main
+create_demo_dataset(Path("data/demo"))
+main()

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ git

pyproject.toml ADDED Viewed

	@@ -0,0 +1,59 @@

+[project]
+name = "mathvision-explorer"
+version = "0.1.0"
+description = "Typed tools for exploring MathVision-style visual math datasets with JEPA-ready embeddings."
+readme = "README.md"
+requires-python = ">=3.11"
+authors = [{ name = "MathVision Explorer" }]
+dependencies = [
+    "pillow>=10.0",
+]
+[project.optional-dependencies]
+app = [
+    "streamlit>=1.35",
+]
+jepa = [
+    "numpy>=1.26",
+    "torch>=2.4",
+    "torchvision>=0.19",
+]
+[project.scripts]
+mathvision = "mathvision_explorer.cli:main"
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[dependency-groups]
+dev = [
+    "mypy>=1.10",
+    "pytest>=8.0",
+    "ruff>=0.5",
+]
+[tool.pytest.ini_options]
+addopts = "-q"
+testpaths = ["tests"]
+[tool.uv.sources]
+torch = { index = "pytorch-cpu" }
+torchvision = { index = "pytorch-cpu" }
+[[tool.uv.index]]
+name = "pytorch-cpu"
+url = "https://download.pytorch.org/whl/cpu"
+explicit = true
+[tool.ruff]
+line-length = 100
+target-version = "py311"
+[tool.ruff.lint]
+select = ["E", "F", "I", "UP", "B", "SIM"]
+[tool.mypy]
+python_version = "3.11"
+strict = true
+files = ["src", "tests"]

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+--extra-index-url https://download.pytorch.org/whl/cpu
+-e .
+streamlit>=1.35
+numpy>=1.26
+torch>=2.4
+torchvision>=0.19
+git+https://github.com/huggingface/transformers

src/mathvision_explorer/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+"""Utilities for exploring visual math problems and JEPA-style embeddings."""
+from mathvision_explorer.dataset import MathVisionRecord, load_jsonl_records
+from mathvision_explorer.index import Neighbor, VectorIndex
+__all__ = ["MathVisionRecord", "Neighbor", "VectorIndex", "load_jsonl_records"]

src/mathvision_explorer/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (494 Bytes). View file

src/mathvision_explorer/__pycache__/cli.cpython-313.pyc ADDED Viewed

Binary file (7.01 kB). View file

src/mathvision_explorer/__pycache__/dataset.cpython-313.pyc ADDED Viewed

Binary file (7.04 kB). View file

src/mathvision_explorer/__pycache__/demo.cpython-313.pyc ADDED Viewed

Binary file (8.83 kB). View file

src/mathvision_explorer/__pycache__/embeddings.cpython-313.pyc ADDED Viewed

Binary file (8.24 kB). View file

src/mathvision_explorer/__pycache__/explorer.cpython-313.pyc ADDED Viewed

Binary file (2.42 kB). View file

src/mathvision_explorer/__pycache__/html.cpython-313.pyc ADDED Viewed

Binary file (5.29 kB). View file

src/mathvision_explorer/__pycache__/index.cpython-313.pyc ADDED Viewed

Binary file (6.59 kB). View file

src/mathvision_explorer/__pycache__/similarity.cpython-313.pyc ADDED Viewed

Binary file (3.49 kB). View file

src/mathvision_explorer/cli.py ADDED Viewed

	@@ -0,0 +1,132 @@

+"""Command-line interface for MathVision Explorer."""
+from __future__ import annotations
+import argparse
+import json
+from pathlib import Path
+from mathvision_explorer.dataset import load_jsonl_records, summarize_records
+from mathvision_explorer.demo import create_demo_dataset
+from mathvision_explorer.embeddings import (
+    ColorStatsEmbedder,
+    ImageEmbedder,
+    VJepaImageEmbedder,
+    embed_record_image,
+)
+from mathvision_explorer.explorer import build_image_index
+from mathvision_explorer.html import export_html
+from mathvision_explorer.index import VectorIndex
+def main() -> None:
+    """Run the MathVision Explorer command-line interface."""
+    parser = argparse.ArgumentParser(prog="mathvision")
+    subparsers = parser.add_subparsers(dest="command", required=True)
+    inspect_parser = subparsers.add_parser("inspect", help="Inspect a MathVision-style JSONL file.")
+    inspect_parser.add_argument("jsonl", type=Path)
+    demo_parser = subparsers.add_parser("demo", help="Create a tiny local demo dataset.")
+    demo_parser.add_argument("output_dir", type=Path)
+    html_parser = subparsers.add_parser("export-html", help="Export records to a browser gallery.")
+    html_parser.add_argument("jsonl", type=Path)
+    html_parser.add_argument("output", type=Path)
+    index_parser = subparsers.add_parser("index", help="Build a local image-feature index.")
+    index_parser.add_argument("jsonl", type=Path)
+    index_parser.add_argument("output", type=Path)
+    _add_embedder_arguments(index_parser)
+    search_parser = subparsers.add_parser("search", help="Search similar indexed records.")
+    search_parser.add_argument("jsonl", type=Path)
+    search_parser.add_argument("index", type=Path)
+    search_parser.add_argument("query_id")
+    search_parser.add_argument("--limit", type=int, default=5)
+    _add_embedder_arguments(search_parser)
+    args = parser.parse_args()
+    if args.command == "inspect":
+        _inspect(args.jsonl)
+    elif args.command == "demo":
+        _demo(args.output_dir)
+    elif args.command == "export-html":
+        _export_html(args.jsonl, args.output)
+    elif args.command == "index":
+        _index(args.jsonl, args.output, embedder=_embedder_from_args(args))
+    elif args.command == "search":
+        _search(
+            args.jsonl,
+            args.index,
+            args.query_id,
+            limit=args.limit,
+            embedder=_embedder_from_args(args),
+        )
+def _inspect(jsonl: Path) -> None:
+    records = load_jsonl_records(jsonl)
+    print(json.dumps(summarize_records(records), indent=2, sort_keys=True))
+def _demo(output_dir: Path) -> None:
+    jsonl_path = create_demo_dataset(output_dir)
+    print(f"Wrote demo dataset to {jsonl_path}")
+def _export_html(jsonl: Path, output: Path) -> None:
+    records = load_jsonl_records(jsonl)
+    export_html(records, output)
+    print(f"Wrote gallery to {output}")
+def _index(jsonl: Path, output: Path, *, embedder: ImageEmbedder) -> None:
+    records = load_jsonl_records(jsonl)
+    index = build_image_index(records, embedder)
+    index.save_tsv(output)
+    print(f"Wrote {len(index)} vectors to {output}")
+def _search(
+    jsonl: Path,
+    index_path: Path,
+    query_id: str,
+    *,
+    limit: int,
+    embedder: ImageEmbedder,
+) -> None:
+    records = load_jsonl_records(jsonl)
+    record_by_id = {record.problem_id: record for record in records}
+    query_record = record_by_id.get(query_id)
+    if query_record is None:
+        raise SystemExit(f"Unknown query id: {query_id}")
+    query_vector = embed_record_image(query_record.image_path, embedder)
+    index = VectorIndex.load_tsv(index_path)
+    for neighbor in index.search(query_vector, limit=limit, exclude_id=query_id):
+        record = record_by_id.get(neighbor.item_id)
+        label = record.question if record is not None else neighbor.item_id
+        print(f"{neighbor.score:.4f}\t{neighbor.item_id}\t{label}")
+def _add_embedder_arguments(parser: argparse.ArgumentParser) -> None:
+    parser.add_argument("--embedder", choices=["color", "vjepa"], default="color")
+    parser.add_argument("--jepa-model", default="facebook/vjepa2-vitl-fpc64-256")
+    parser.add_argument("--jepa-device", default=None)
+    parser.add_argument("--jepa-frames", type=int, default=16)
+def _embedder_from_args(args: argparse.Namespace) -> ImageEmbedder:
+    if args.embedder == "vjepa":
+        return VJepaImageEmbedder(
+            model_id=args.jepa_model,
+            device=args.jepa_device,
+            frame_count=args.jepa_frames,
+        )
+    return ColorStatsEmbedder()
+if __name__ == "__main__":
+    main()

src/mathvision_explorer/dataset.py ADDED Viewed

	@@ -0,0 +1,143 @@

+"""Dataset loading helpers for MathVision-like JSONL exports."""
+from __future__ import annotations
+import json
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+@dataclass(frozen=True, slots=True)
+class MathVisionRecord:
+    """A single visual math problem with optional image and solution metadata."""
+    problem_id: str
+    question: str
+    answer: str
+    subject: str | None = None
+    level: int | None = None
+    image_path: Path | None = None
+    options: tuple[str, ...] = ()
+    solution: str | None = None
+def load_jsonl_records(path: Path) -> list[MathVisionRecord]:
+    """Load MathVision-like records from a UTF-8 JSONL file."""
+    records: list[MathVisionRecord] = []
+    with path.open("r", encoding="utf-8") as jsonl_file:
+        for line_number, line in enumerate(jsonl_file, start=1):
+            stripped = line.strip()
+            if not stripped:
+                continue
+            payload = json.loads(stripped)
+            if not isinstance(payload, dict):
+                msg = f"Line {line_number} must contain a JSON object."
+                raise ValueError(msg)
+            records.append(record_from_mapping(payload, source_dir=path.parent))
+    return records
+def record_from_mapping(
+    payload: dict[str, Any], *, source_dir: Path | None = None
+) -> MathVisionRecord:
+    """Create a typed record from a raw dictionary."""
+    problem_id = _required_string(payload, "id", fallback_key="problem_id")
+    question = _required_string(payload, "question")
+    answer = _required_string(payload, "answer")
+    image_path = _optional_path(payload.get("image"), source_dir=source_dir)
+    options = _options_from_value(payload.get("options"))
+    return MathVisionRecord(
+        problem_id=problem_id,
+        question=question,
+        answer=answer,
+        subject=_optional_string(payload.get("subject")),
+        level=_optional_int(payload.get("level")),
+        image_path=image_path,
+        options=options,
+        solution=_optional_string(payload.get("solution")),
+    )
+def filter_records(
+    records: list[MathVisionRecord],
+    *,
+    subject: str | None = None,
+    level: int | None = None,
+) -> list[MathVisionRecord]:
+    """Return records matching optional subject and level filters."""
+    return [
+        record
+        for record in records
+        if (subject is None or record.subject == subject)
+        and (level is None or record.level == level)
+    ]
+def summarize_records(records: list[MathVisionRecord]) -> dict[str, object]:
+    """Build a compact summary for CLI output or dashboards."""
+    subjects = sorted({record.subject for record in records if record.subject is not None})
+    levels = sorted({record.level for record in records if record.level is not None})
+    image_count = sum(1 for record in records if record.image_path is not None)
+    return {
+        "records": len(records),
+        "images": image_count,
+        "subjects": subjects,
+        "levels": levels,
+    }
+def _required_string(
+    payload: dict[str, Any], key: str, *, fallback_key: str | None = None
+) -> str:
+    value = payload.get(key)
+    if value is None and fallback_key is not None:
+        value = payload.get(fallback_key)
+    if not isinstance(value, str) or not value.strip():
+        msg = f"Missing required string field: {key}"
+        raise ValueError(msg)
+    return value
+def _optional_string(value: object) -> str | None:
+    if value is None:
+        return None
+    if not isinstance(value, str):
+        msg = "Optional text fields must be strings when present."
+        raise ValueError(msg)
+    return value
+def _optional_int(value: object) -> int | None:
+    if value is None:
+        return None
+    if isinstance(value, bool) or not isinstance(value, int):
+        msg = "Level must be an integer when present."
+        raise ValueError(msg)
+    return value
+def _optional_path(value: object, *, source_dir: Path | None) -> Path | None:
+    if value is None:
+        return None
+    if not isinstance(value, str) or not value:
+        msg = "Image path must be a non-empty string when present."
+        raise ValueError(msg)
+    image_path = Path(value)
+    if source_dir is not None and not image_path.is_absolute():
+        return source_dir / image_path
+    return image_path
+def _options_from_value(value: object) -> tuple[str, ...]:
+    if value is None:
+        return ()
+    if not isinstance(value, list) or not all(isinstance(option, str) for option in value):
+        msg = "Options must be a list of strings when present."
+        raise ValueError(msg)
+    return tuple(value)

src/mathvision_explorer/demo.py ADDED Viewed

	@@ -0,0 +1,229 @@

+"""Demo dataset generation for trying the explorer locally."""
+from __future__ import annotations
+import json
+from pathlib import Path
+from PIL import Image, ImageDraw
+def create_demo_dataset(output_dir: Path) -> Path:
+    """Create a tiny MathVision-like dataset with simple generated images."""
+    image_dir = output_dir / "images"
+    image_dir.mkdir(parents=True, exist_ok=True)
+    records = [
+        {
+            "id": "demo-red-squares",
+            "question": "How many red squares are visible?",
+            "answer": "4",
+            "image": "images/red-squares.png",
+            "options": ["3", "4", "5"],
+            "subject": "counting",
+            "level": 1,
+            "solution": "Count the four red square tiles.",
+        },
+        {
+            "id": "demo-red-squares-small",
+            "question": "How many small red squares are visible?",
+            "answer": "5",
+            "image": "images/red-squares-small.png",
+            "options": ["4", "5", "6"],
+            "subject": "counting",
+            "level": 2,
+            "solution": "The red tiles form a group of five.",
+        },
+        {
+            "id": "demo-blue-triangles",
+            "question": "How many blue triangles are visible?",
+            "answer": "3",
+            "image": "images/blue-triangles.png",
+            "options": ["2", "3", "4"],
+            "subject": "geometry",
+            "level": 1,
+            "solution": "There are three separate blue triangles.",
+        },
+        {
+            "id": "demo-blue-pyramids",
+            "question": "Which shape appears repeatedly?",
+            "answer": "triangle",
+            "image": "images/blue-pyramids.png",
+            "options": ["circle", "triangle", "square"],
+            "subject": "geometry",
+            "level": 2,
+            "solution": "The repeated blue shapes are triangles.",
+        },
+        {
+            "id": "demo-red-grid",
+            "question": "Which tile color dominates the grid?",
+            "answer": "red",
+            "image": "images/red-grid.png",
+            "options": ["red", "blue", "green"],
+            "subject": "pattern",
+            "level": 2,
+            "solution": "Most grid cells are red.",
+        },
+        {
+            "id": "demo-green-grid",
+            "question": "Which tile color dominates this grid?",
+            "answer": "green",
+            "image": "images/green-grid.png",
+            "options": ["red", "blue", "green"],
+            "subject": "pattern",
+            "level": 2,
+            "solution": "Green appears in most grid cells.",
+        },
+        {
+            "id": "demo-number-line",
+            "question": "Which point is closest to 4?",
+            "answer": "C",
+            "image": "images/number-line.png",
+            "options": ["A", "B", "C"],
+            "subject": "algebra",
+            "level": 1,
+            "solution": "Point C is drawn nearest to the tick labeled 4.",
+        },
+        {
+            "id": "demo-clock",
+            "question": "Which hour does the short hand point to?",
+            "answer": "3",
+            "image": "images/clock.png",
+            "options": ["2", "3", "4"],
+            "subject": "measurement",
+            "level": 1,
+            "solution": "The shorter hand points toward 3.",
+        },
+    ]
+    _draw_red_squares(image_dir / "red-squares.png")
+    _draw_red_squares_small(image_dir / "red-squares-small.png")
+    _draw_blue_triangles(image_dir / "blue-triangles.png")
+    _draw_blue_pyramids(image_dir / "blue-pyramids.png")
+    _draw_red_grid(image_dir / "red-grid.png")
+    _draw_green_grid(image_dir / "green-grid.png")
+    _draw_number_line(image_dir / "number-line.png")
+    _draw_clock(image_dir / "clock.png")
+    jsonl_path = output_dir / "demo.jsonl"
+    with jsonl_path.open("w", encoding="utf-8") as jsonl_file:
+        for record in records:
+            jsonl_file.write(json.dumps(record, sort_keys=True))
+            jsonl_file.write("\n")
+    return jsonl_path
+def _new_canvas() -> Image.Image:
+    return Image.new("RGB", (420, 280), color=(248, 250, 252))
+def _draw_red_squares(path: Path) -> None:
+    image = _new_canvas()
+    draw = ImageDraw.Draw(image)
+    for x, y in [(80, 60), (170, 60), (80, 150), (170, 150)]:
+        draw.rectangle((x, y, x + 58, y + 58), fill=(220, 38, 38), outline=(127, 29, 29), width=3)
+    image.save(path)
+def _draw_blue_triangles(path: Path) -> None:
+    image = _new_canvas()
+    draw = ImageDraw.Draw(image)
+    triangles = [
+        [(90, 190), (130, 80), (170, 190)],
+        [(190, 190), (230, 80), (270, 190)],
+        [(290, 190), (330, 80), (370, 190)],
+    ]
+    for triangle in triangles:
+        draw.polygon(triangle, fill=(37, 99, 235), outline=(30, 64, 175))
+    image.save(path)
+def _draw_red_squares_small(path: Path) -> None:
+    image = _new_canvas()
+    draw = ImageDraw.Draw(image)
+    for x, y in [(78, 54), (148, 54), (218, 54), (112, 134), (184, 134)]:
+        draw.rectangle((x, y, x + 46, y + 46), fill=(239, 68, 68), outline=(127, 29, 29), width=3)
+    image.save(path)
+def _draw_blue_pyramids(path: Path) -> None:
+    image = _new_canvas()
+    draw = ImageDraw.Draw(image)
+    for x, y, size in [(82, 178, 52), (162, 178, 68), (262, 178, 82)]:
+        draw.polygon(
+            [(x, y), (x + size // 2, y - size), (x + size, y)],
+            fill=(59, 130, 246),
+            outline=(30, 64, 175),
+        )
+    image.save(path)
+def _draw_red_grid(path: Path) -> None:
+    image = _new_canvas()
+    draw = ImageDraw.Draw(image)
+    colors = [
+        (220, 38, 38),
+        (220, 38, 38),
+        (22, 163, 74),
+        (220, 38, 38),
+        (37, 99, 235),
+        (220, 38, 38),
+    ]
+    for index, color in enumerate(colors):
+        row, column = divmod(index, 3)
+        x = 92 + column * 82
+        y = 64 + row * 82
+        draw.rectangle((x, y, x + 64, y + 64), fill=color, outline=(15, 23, 42), width=2)
+    image.save(path)
+def _draw_green_grid(path: Path) -> None:
+    image = _new_canvas()
+    draw = ImageDraw.Draw(image)
+    colors = [
+        (22, 163, 74),
+        (22, 163, 74),
+        (220, 38, 38),
+        (22, 163, 74),
+        (37, 99, 235),
+        (22, 163, 74),
+    ]
+    for index, color in enumerate(colors):
+        row, column = divmod(index, 3)
+        x = 92 + column * 82
+        y = 64 + row * 82
+        draw.rectangle((x, y, x + 64, y + 64), fill=color, outline=(15, 23, 42), width=2)
+    image.save(path)
+def _draw_number_line(path: Path) -> None:
+    image = _new_canvas()
+    draw = ImageDraw.Draw(image)
+    draw.line((62, 148, 358, 148), fill=(15, 23, 42), width=4)
+    for index in range(6):
+        x = 62 + index * 59
+        draw.line((x, 134, x, 162), fill=(15, 23, 42), width=3)
+        draw.text((x - 5, 170), str(index), fill=(15, 23, 42))
+    points = [
+        ("A", 174, (37, 99, 235)),
+        ("B", 246, (22, 163, 74)),
+        ("C", 296, (220, 38, 38)),
+    ]
+    for label, x, color in points:
+        draw.ellipse((x - 9, 108, x + 9, 126), fill=color)
+        draw.text((x - 5, 86), label, fill=(15, 23, 42))
+    image.save(path)
+def _draw_clock(path: Path) -> None:
+    image = _new_canvas()
+    draw = ImageDraw.Draw(image)
+    center = (210, 140)
+    draw.ellipse((100, 30, 320, 250), fill=(255, 255, 255), outline=(15, 23, 42), width=4)
+    for label, xy in [("12", (199, 48)), ("3", (290, 132)), ("6", (205, 220)), ("9", (120, 132))]:
+        draw.text(xy, label, fill=(15, 23, 42))
+    draw.line((center[0], center[1], 282, 140), fill=(220, 38, 38), width=6)
+    draw.line((center[0], center[1], 210, 68), fill=(37, 99, 235), width=4)
+    draw.ellipse((202, 132, 218, 148), fill=(15, 23, 42))
+    image.save(path)

src/mathvision_explorer/embeddings.py ADDED Viewed

	@@ -0,0 +1,133 @@

+"""Embedding helpers for image records.
+The default embedder is intentionally lightweight and deterministic. It gives the
+project a testable local baseline while leaving room to plug in V-JEPA features later.
+"""
+from __future__ import annotations
+from importlib import import_module
+from pathlib import Path
+from typing import Any, Protocol
+from PIL import Image, ImageStat
+class ImageEmbedder(Protocol):
+    """Protocol for objects that turn image paths into numeric vectors."""
+    def embed_image(self, image_path: Path) -> tuple[float, ...]:
+        """Return an embedding vector for an image file."""
+class ColorStatsEmbedder:
+    """Embed images with normalized RGB mean and standard deviation features."""
+    def embed_image(self, image_path: Path) -> tuple[float, ...]:
+        """Return six normalized color-statistics features for an image."""
+        with Image.open(image_path) as image:
+            rgb_image = image.convert("RGB")
+            stat = ImageStat.Stat(rgb_image)
+        means = tuple(channel / 255.0 for channel in stat.mean)
+        stddevs = tuple(channel / 255.0 for channel in stat.stddev)
+        return means + stddevs
+class MissingImageError(RuntimeError):
+    """Raised when a record cannot be embedded because no image path is available."""
+class JepaDependencyError(RuntimeError):
+    """Raised when optional V-JEPA dependencies are not installed."""
+class VJepaImageEmbedder:
+    """Embed images with a Hugging Face V-JEPA 2 image/video encoder.
+    The implementation follows the model-card pattern for image inputs: a still image is
+    processed as video pixels and repeated across frames before `get_vision_features`.
+    """
+    def __init__(
+        self,
+        *,
+        model_id: str = "facebook/vjepa2-vitl-fpc64-256",
+        device: str | None = None,
+        frame_count: int = 16,
+    ) -> None:
+        """Load the V-JEPA processor and model lazily at embedder construction time."""
+        if frame_count < 1:
+            raise ValueError("Frame count must be at least 1.")
+        self.model_id = model_id
+        self.frame_count = frame_count
+        self._torch = _import_optional("torch")
+        transformers = _import_optional("transformers")
+        _quiet_transformers_logging(transformers)
+        try:
+            self._processor = transformers.AutoVideoProcessor.from_pretrained(model_id)
+            self._model = transformers.AutoModel.from_pretrained(model_id)
+        except ImportError as error:
+            msg = (
+                "V-JEPA backend dependencies are missing. Install them with "
+                "`make sync-jepa`."
+            )
+            raise JepaDependencyError(msg) from error
+        self._device = device or ("cuda" if self._torch.cuda.is_available() else "cpu")
+        self._model.to(self._device)
+        self._model.eval()
+    def embed_image(self, image_path: Path) -> tuple[float, ...]:
+        """Return a pooled V-JEPA feature vector for an image."""
+        with Image.open(image_path) as image:
+            rgb_image = image.convert("RGB")
+        encoded = self._processor(rgb_image, return_tensors="pt").to(self._model.device)
+        pixel_values = encoded["pixel_values_videos"]
+        pixel_values = pixel_values.repeat(1, self.frame_count, 1, 1, 1)
+        with self._torch.no_grad():
+            features = self._model.get_vision_features(pixel_values)
+            pooled = _mean_pool_features(features)
+        return tuple(float(value) for value in pooled.squeeze(0).detach().cpu().tolist())
+def _mean_pool_features(features: Any) -> Any:
+    """Pool token/time dimensions while preserving the final feature dimension."""
+    if features.ndim <= 2:
+        return features
+    return features.mean(dim=tuple(range(1, features.ndim - 1)))
+def embed_record_image(image_path: Path | None, embedder: ImageEmbedder) -> tuple[float, ...]:
+    """Embed a record image or raise a clear error when the path is missing."""
+    if image_path is None:
+        raise MissingImageError("Record has no image path to embed.")
+    return embedder.embed_image(image_path)
+def _import_optional(module_name: str) -> Any:
+    try:
+        return import_module(module_name)
+    except ImportError as error:
+        msg = (
+            "V-JEPA dependencies are missing. Install them with "
+            "`uv sync --extra jepa --dev`."
+        )
+        raise JepaDependencyError(msg) from error
+def _quiet_transformers_logging(transformers: Any) -> None:
+    """Reduce noisy dev-version Transformers compatibility logging."""
+    try:
+        transformers.logging.set_verbosity_error()
+    except AttributeError:
+        return

src/mathvision_explorer/explorer.py ADDED Viewed

	@@ -0,0 +1,47 @@

+"""High-level workflows for MathVision exploration."""
+from __future__ import annotations
+from pathlib import Path
+from mathvision_explorer.dataset import MathVisionRecord, filter_records, load_jsonl_records
+from mathvision_explorer.embeddings import ImageEmbedder, embed_record_image
+from mathvision_explorer.index import Neighbor, VectorIndex
+def build_image_index(records: list[MathVisionRecord], embedder: ImageEmbedder) -> VectorIndex:
+    """Build a vector index for all records that have image paths."""
+    index = VectorIndex()
+    for record in records:
+        if record.image_path is None:
+            continue
+        index.add(record.problem_id, embed_record_image(record.image_path, embedder))
+    return index
+def find_similar_records(
+    records: list[MathVisionRecord],
+    index: VectorIndex,
+    query_id: str,
+    query_vector: tuple[float, ...],
+    *,
+    limit: int = 5,
+) -> list[tuple[MathVisionRecord, Neighbor]]:
+    """Find records nearest to a query vector."""
+    record_by_id = {record.problem_id: record for record in records}
+    neighbors = index.search(query_vector, limit=limit, exclude_id=query_id)
+    return [
+        (record_by_id[neighbor.item_id], neighbor)
+        for neighbor in neighbors
+        if neighbor.item_id in record_by_id
+    ]
+def load_filtered_records(
+    path: Path, *, subject: str | None = None, level: int | None = None
+) -> list[MathVisionRecord]:
+    """Load records and apply optional explorer filters."""
+    return filter_records(load_jsonl_records(path), subject=subject, level=level)

src/mathvision_explorer/html.py ADDED Viewed

	@@ -0,0 +1,141 @@

+"""HTML export for visual inspection of MathVision-like records."""
+from __future__ import annotations
+from html import escape
+from pathlib import Path
+from mathvision_explorer.dataset import MathVisionRecord
+def export_html(records: list[MathVisionRecord], output: Path) -> None:
+    """Write a standalone HTML gallery for records."""
+    output.parent.mkdir(parents=True, exist_ok=True)
+    cards = "\n".join(_render_card(record, output_dir=output.parent) for record in records)
+    html = f"""<!doctype html>
+<html lang="en">
+<head>
+  <meta charset="utf-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1">
+  <title>MathVision Explorer</title>
+  <style>
+    body {{
+      margin: 0;
+      font-family: system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
+      color: #172033;
+      background: #f7f8fb;
+    }}
+    header {{
+      padding: 28px 32px 18px;
+      background: #ffffff;
+      border-bottom: 1px solid #dde3ee;
+    }}
+    h1 {{
+      margin: 0 0 6px;
+      font-size: 28px;
+      font-weight: 750;
+    }}
+    main {{
+      display: grid;
+      gap: 18px;
+      grid-template-columns: repeat(auto-fit, minmax(280px, 1fr));
+      padding: 24px 32px 36px;
+    }}
+    article {{
+      overflow: hidden;
+      border: 1px solid #d8deea;
+      border-radius: 8px;
+      background: #ffffff;
+    }}
+    img {{
+      display: block;
+      width: 100%;
+      aspect-ratio: 3 / 2;
+      object-fit: contain;
+      background: #eef2f7;
+    }}
+    .body {{
+      padding: 16px;
+    }}
+    .meta {{
+      display: flex;
+      flex-wrap: wrap;
+      gap: 8px;
+      margin-bottom: 12px;
+      font-size: 13px;
+    }}
+    .tag {{
+      padding: 3px 8px;
+      border: 1px solid #cfd7e6;
+      border-radius: 999px;
+      background: #f4f7fb;
+    }}
+    h2 {{
+      margin: 0 0 12px;
+      font-size: 16px;
+      line-height: 1.35;
+    }}
+    p {{
+      margin: 8px 0 0;
+      line-height: 1.45;
+    }}
+    .answer {{
+      font-weight: 700;
+    }}
+  </style>
+</head>
+<body>
+  <header>
+    <h1>MathVision Explorer</h1>
+    <div>{len(records)} visual math records</div>
+  </header>
+  <main>
+{cards}
+  </main>
+</body>
+</html>
+"""
+    output.write_text(html, encoding="utf-8")
+def _render_card(record: MathVisionRecord, *, output_dir: Path) -> str:
+    image_html = ""
+    if record.image_path is not None:
+        image_src = _relative_or_absolute_image(record.image_path, output_dir=output_dir)
+        image_html = f'    <img src="{escape(image_src)}" alt="{escape(record.problem_id)}">\n'
+    meta = [_tag(record.problem_id)]
+    if record.subject is not None:
+        meta.append(_tag(record.subject))
+    if record.level is not None:
+        meta.append(_tag(f"level {record.level}"))
+    options = ""
+    if record.options:
+        options = f"<p>Options: {escape(', '.join(record.options))}</p>"
+    solution = ""
+    if record.solution:
+        solution = f"<p>{escape(record.solution)}</p>"
+    return f"""  <article>
+{image_html}    <div class="body">
+      <div class="meta">{''.join(meta)}</div>
+      <h2>{escape(record.question)}</h2>
+      <p class="answer">Answer: {escape(record.answer)}</p>
+      {options}
+      {solution}
+    </div>
+  </article>"""
+def _tag(value: str) -> str:
+    return f'<span class="tag">{escape(value)}</span>'
+def _relative_or_absolute_image(image_path: Path, *, output_dir: Path) -> str:
+    try:
+        return image_path.resolve().relative_to(output_dir.resolve()).as_posix()
+    except ValueError:
+        return image_path.resolve().as_uri()

src/mathvision_explorer/index.py ADDED Viewed

	@@ -0,0 +1,88 @@

+"""Small vector index for nearest-neighbor exploration."""
+from __future__ import annotations
+import math
+from dataclasses import dataclass
+from pathlib import Path
+@dataclass(frozen=True, slots=True)
+class Neighbor:
+    """A nearest-neighbor result from a vector search."""
+    item_id: str
+    score: float
+class VectorIndex:
+    """In-memory cosine-similarity vector index."""
+    def __init__(self) -> None:
+        """Create an empty vector index."""
+        self._vectors: dict[str, tuple[float, ...]] = {}
+    def add(self, item_id: str, vector: tuple[float, ...]) -> None:
+        """Add or replace an item vector."""
+        if not vector:
+            raise ValueError("Vector must contain at least one value.")
+        if not all(math.isfinite(value) for value in vector):
+            raise ValueError("Vector values must be finite.")
+        self._vectors[item_id] = vector
+    def search(
+        self, query_vector: tuple[float, ...], *, limit: int = 5, exclude_id: str | None = None
+    ) -> list[Neighbor]:
+        """Return the closest vectors by cosine similarity."""
+        if limit < 1:
+            raise ValueError("Limit must be at least 1.")
+        neighbors = [
+            Neighbor(item_id=item_id, score=_cosine_similarity(query_vector, vector))
+            for item_id, vector in self._vectors.items()
+            if item_id != exclude_id
+        ]
+        return sorted(neighbors, key=lambda neighbor: neighbor.score, reverse=True)[:limit]
+    def save_tsv(self, path: Path) -> None:
+        """Persist the index as a simple tab-separated text file."""
+        path.parent.mkdir(parents=True, exist_ok=True)
+        with path.open("w", encoding="utf-8") as index_file:
+            for item_id, vector in sorted(self._vectors.items()):
+                values = "\t".join(str(value) for value in vector)
+                index_file.write(f"{item_id}\t{values}\n")
+    @classmethod
+    def load_tsv(cls, path: Path) -> VectorIndex:
+        """Load an index produced by :meth:`save_tsv`."""
+        index = cls()
+        with path.open("r", encoding="utf-8") as index_file:
+            for line_number, line in enumerate(index_file, start=1):
+                fields = line.rstrip("\n").split("\t")
+                if len(fields) < 2:
+                    msg = f"Line {line_number} must contain an id and vector values."
+                    raise ValueError(msg)
+                index.add(fields[0], tuple(float(value) for value in fields[1:]))
+        return index
+    def __len__(self) -> int:
+        """Return the number of indexed vectors."""
+        return len(self._vectors)
+def _cosine_similarity(left: tuple[float, ...], right: tuple[float, ...]) -> float:
+    if len(left) != len(right):
+        raise ValueError("Vectors must have the same dimensions.")
+    left_norm = math.sqrt(sum(value * value for value in left))
+    right_norm = math.sqrt(sum(value * value for value in right))
+    if left_norm == 0.0 or right_norm == 0.0:
+        return 0.0
+    dot_product = sum(
+        left_value * right_value for left_value, right_value in zip(left, right, strict=True)
+    )
+    return dot_product / (left_norm * right_norm)

src/mathvision_explorer/py.typed ADDED Viewed

	@@ -0,0 +1 @@


1	+

src/mathvision_explorer/similarity.py ADDED Viewed

	@@ -0,0 +1,88 @@

+"""Helpers for interpreting nearest-neighbor similarity scores."""
+from __future__ import annotations
+from dataclasses import dataclass
+from mathvision_explorer.dataset import MathVisionRecord
+@dataclass(frozen=True, slots=True)
+class MatchInterpretation:
+    """Human-readable context for a nearest-neighbor match."""
+    label: str
+    subject_match: bool | None
+    level_delta: int | None
+    summary: str
+def similarity_label(score: float) -> str:
+    """Return a compact label for a cosine similarity score."""
+    if score >= 0.90:
+        return "Very close visual match"
+    if score >= 0.75:
+        return "Related visual structure"
+    if score >= 0.55:
+        return "Loose visual overlap"
+    return "Weak visual match"
+def interpret_match(
+    query: MathVisionRecord,
+    match: MathVisionRecord,
+    *,
+    score: float,
+) -> MatchInterpretation:
+    """Explain a match using visual score, subject overlap, and difficulty delta."""
+    subject_match = _subject_match(query, match)
+    level_delta = _level_delta(query, match)
+    parts = [similarity_label(score)]
+    if subject_match is True:
+        parts.append("same subject")
+    elif subject_match is False:
+        parts.append("different subject")
+    if level_delta is not None:
+        if level_delta == 0:
+            parts.append("same level")
+        elif level_delta > 0:
+            parts.append(f"{level_delta} level harder")
+        else:
+            parts.append(f"{abs(level_delta)} level easier")
+    return MatchInterpretation(
+        label=similarity_label(score),
+        subject_match=subject_match,
+        level_delta=level_delta,
+        summary="; ".join(parts),
+    )
+def embedder_description(embedder_name: str) -> str:
+    """Describe what a selected embedder is comparing."""
+    if embedder_name == "vjepa":
+        return (
+            "V-JEPA compares learned visual features: layout, shapes, object-like structure, "
+            "and spatial patterns. Read scores relatively, not as percentages."
+        )
+    return (
+        "Color compares only RGB means and spread. It is a fast sanity-check baseline, "
+        "not semantic visual understanding."
+    )
+def _subject_match(query: MathVisionRecord, match: MathVisionRecord) -> bool | None:
+    if query.subject is None or match.subject is None:
+        return None
+    return query.subject == match.subject
+def _level_delta(query: MathVisionRecord, match: MathVisionRecord) -> int | None:
+    if query.level is None or match.level is None:
+        return None
+    return match.level - query.level

src/mathvision_explorer/streamlit_app.py ADDED Viewed

	@@ -0,0 +1,156 @@

+"""Streamlit app for browsing MathVision-like records."""
+from __future__ import annotations
+import argparse
+from importlib import import_module
+from pathlib import Path
+from typing import Any
+from mathvision_explorer.dataset import MathVisionRecord, filter_records, load_jsonl_records
+from mathvision_explorer.embeddings import ColorStatsEmbedder, ImageEmbedder, VJepaImageEmbedder
+from mathvision_explorer.explorer import build_image_index, find_similar_records
+from mathvision_explorer.similarity import embedder_description, interpret_match
+def main() -> None:
+    """Run the Streamlit explorer app."""
+    args = _parse_args()
+    st = _load_streamlit()
+    records = load_jsonl_records(args.jsonl)
+    st.set_page_config(page_title="MathVision Explorer", layout="wide")
+    st.title("MathVision Explorer")
+    subjects = sorted({record.subject for record in records if record.subject is not None})
+    levels = sorted({record.level for record in records if record.level is not None})
+    with st.sidebar:
+        st.header("Filters")
+        subject = st.selectbox("Subject", ["all", *subjects])
+        level_label = st.selectbox("Level", ["all", *(str(level) for level in levels)])
+        show_solutions = st.toggle("Show solutions", value=True)
+        st.header("Latent Space")
+        embedder_label = st.selectbox(
+            "Embedder",
+            ["color (fast demo)", "vjepa (requires make sync-jepa)"],
+        )
+        query_id = st.selectbox("Query record", [record.problem_id for record in records])
+        neighbor_count = st.slider("Neighbors", min_value=1, max_value=8, value=3)
+    selected_subject = None if subject == "all" else subject
+    selected_level = None if level_label == "all" else int(level_label)
+    filtered = filter_records(records, subject=selected_subject, level=selected_level)
+    _render_similarity_panel(
+        st,
+        records,
+        query_id=query_id,
+        embedder_name=_embedder_name_from_label(embedder_label),
+        neighbor_count=neighbor_count,
+    )
+    st.caption(f"{len(filtered)} of {len(records)} records")
+    for record in filtered:
+        _render_record(st, record, show_solution=show_solutions)
+def _render_similarity_panel(
+    st: Any,
+    records: list[MathVisionRecord],
+    *,
+    query_id: str,
+    embedder_name: str,
+    neighbor_count: int,
+) -> None:
+    st.header("Nearest Neighbors")
+    st.caption(embedder_description(embedder_name))
+    record_by_id = {record.problem_id: record for record in records}
+    query = record_by_id[query_id]
+    if query.image_path is None:
+        st.warning("Selected query has no image.")
+        return
+    try:
+        embedder = _load_embedder(embedder_name)
+        query_vector = embedder.embed_image(query.image_path)
+        index = build_image_index(records, embedder)
+        matches = find_similar_records(
+            records,
+            index,
+            query.problem_id,
+            query_vector,
+            limit=neighbor_count,
+        )
+    except RuntimeError as error:
+        st.error(str(error))
+        return
+    columns = st.columns([1, 2])
+    with columns[0]:
+        st.caption("Query")
+        st.image(str(query.image_path), width="stretch")
+        st.write(query.problem_id)
+    with columns[1]:
+        for record, neighbor in matches:
+            interpretation = interpret_match(query, record, score=neighbor.score)
+            with st.container(border=True):
+                match_columns = st.columns([0.35, 1])
+                with match_columns[0]:
+                    if record.image_path is not None:
+                        st.image(str(record.image_path), width="stretch")
+                with match_columns[1]:
+                    st.write(f"**{record.problem_id}**")
+                    st.caption(f"similarity {neighbor.score:.4f} | {interpretation.label}")
+                    st.write(record.question)
+                    st.write(interpretation.summary)
+def _render_record(st: Any, record: MathVisionRecord, *, show_solution: bool) -> None:
+    with st.container(border=True):
+        columns = st.columns([1, 1.4])
+        with columns[0]:
+            if record.image_path is not None:
+                st.image(str(record.image_path), width="stretch")
+        with columns[1]:
+            st.subheader(record.question)
+            badges = [record.problem_id]
+            if record.subject is not None:
+                badges.append(record.subject)
+            if record.level is not None:
+                badges.append(f"level {record.level}")
+            st.caption(" | ".join(badges))
+            if record.options:
+                st.write("Options: " + ", ".join(record.options))
+            st.write(f"Answer: **{record.answer}**")
+            if show_solution and record.solution:
+                st.write(record.solution)
+def _parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--jsonl", type=Path, default=Path("data/demo/demo.jsonl"))
+    return parser.parse_args()
+def _load_streamlit() -> Any:
+    try:
+        return import_module("streamlit")
+    except ImportError as error:
+        msg = "Streamlit is missing. Install it with `uv sync --extra app --dev`."
+        raise RuntimeError(msg) from error
+def _load_embedder(embedder_name: str) -> ImageEmbedder:
+    if embedder_name == "vjepa":
+        return VJepaImageEmbedder()
+    return ColorStatsEmbedder()
+def _embedder_name_from_label(label: str) -> str:
+    return "vjepa" if label.startswith("vjepa") else "color"
+if __name__ == "__main__":
+    main()