project_02_DS / task /task_02 /pipeline.py
griddev's picture
Deploy Streamlit Space app
0710b5c verified
"""
pipeline.py
============
Master Orchestrator β€” Task 2, Iteration 3
This script chains all five steps together in the correct order,
printing a clear progress banner at each stage so you can see exactly
what is happening and inspect intermediate results.
Step-by-step flow
------------------
STEP 1 β†’ Load BLIP model (with fine-tuned weights if available).
STEP 2 β†’ Encode image through ViT β†’ encoder_hidden_states.
STEP 3 β†’ Greedy decode token-by-token with Attention Flow heatmaps
(multi-layer GradCAM rollout, bicubic upscaling).
STEP 4 β†’ Build 2Γ—5 overlay grid image β†’ attention_grid_v3.png.
STEP 5 β†’ Grade alignment with OWL-ViT + IoU β†’ iou_chart_v3.png.
Designed to be deployment-friendly:
β€’ Every step is a clean function import from its own module.
β€’ Intermediate artefacts (heatmaps, tokens) can be inspected between steps.
β€’ Outputs are saved to the same directory as this script.
Usage:
export PYTHONPATH=.
venv/bin/python task/task_02/pipeline.py
"""
import os
import sys
import requests
from PIL import Image
# ── path bootstrap ────────────────────────────────────────────────────────────
_THIS_DIR = os.path.dirname(os.path.abspath(__file__))
_PROJECT_ROOT = os.path.dirname(os.path.dirname(_THIS_DIR))
if _PROJECT_ROOT not in sys.path:
sys.path.insert(0, _PROJECT_ROOT)
# ── step imports ─────────────────────────────────────────────────────────────
from task.task_02.step1_load_model import load_model
from task.task_02.step2_encode_image import encode_image
from task.task_02.step3_gradcam_flow import generate_with_flow
from task.task_02.step4_visualize import save_attention_grid
from task.task_02.step5_iou_grade import load_detector, grade_alignment, plot_iou_chart
# ── Output paths ─────────────────────────────────────────────────────────────
OUT_GRID = os.path.join(_THIS_DIR, "attention_grid_v3.png")
OUT_CHART = os.path.join(_THIS_DIR, "iou_chart_v3.png")
# ── Test images ───────────────────────────────────────────────────────────────
TEST_URLS = [
"http://images.cocodataset.org/val2017/000000039769.jpg", # cats on couch
"http://images.cocodataset.org/val2017/000000000139.jpg", # dining room
]
def _load_image(url: str) -> Image.Image:
"""Download an image from url, return PIL RGB image."""
print(f"\nπŸ“₯ Downloading test image: {url}")
return Image.open(requests.get(url, stream=True).raw).convert("RGB")
def _banner(step: int, title: str):
print(f"\n{'='*60}")
print(f" STEP {step} β€” {title}")
print(f"{'='*60}")
# ── Main pipeline ─────────────────────────────────────────────────────────────
def run_pipeline():
# ── STEP 1: Load model ───────────────────────────────────────────────────
_banner(1, "Load BLIP Model")
model, processor, device = load_model(use_finetuned=True)
# ── Load OWL-ViT grader (do once, reuse for all images) ─────────────────
detector = load_detector(device)
# Aggregate IoU results across images for the final chart
all_iou_results = []
for img_url in TEST_URLS:
raw_image = _load_image(img_url)
# ── STEP 2: Encode image ─────────────────────────────────────────────
_banner(2, "Encode Image through ViT")
image_224, enc_hidden, enc_mask = encode_image(model, processor, device, raw_image)
# ── STEP 3: Generate caption + Attention Flow heatmaps ───────────────
_banner(3, "Greedy Decode with Attention Flow")
tokens, heatmaps = generate_with_flow(
model, processor, device, enc_hidden, enc_mask
)
# ── INSPECT intermediate results ─────────────────────────────────────
print(f"\n πŸ“ Tokens : {tokens}")
print(f" πŸ—Ί Heatmaps : {len(heatmaps)} maps, each shape {heatmaps[0].shape if heatmaps else 'N/A'}")
print(f" Peak values: {[f'{h.max():.3f}' for h in heatmaps[:5]]} …")
# ── STEP 4: Visualize (only for the first image to save space) ───────
if img_url == TEST_URLS[0]:
_banner(4, "Build Attention Grid Visualization")
save_attention_grid(image_224, tokens, heatmaps, out_path=OUT_GRID)
# ── STEP 5: Grade alignment ──────────────────────────────────────────
_banner(5, "Grade Attention Alignment (IoU)")
results = grade_alignment(raw_image, tokens, heatmaps, detector)
all_iou_results.extend(results)
# ── Save IoU chart (all images combined) ─────────────────────────────────
if all_iou_results:
print(f"\nπŸ“ˆ Saving IoU chart for {len(all_iou_results)} data points …")
plot_iou_chart(all_iou_results, out_path=OUT_CHART)
print("\n" + "="*60)
print(" βœ… PIPELINE COMPLETE")
print(f" Attention grid β†’ {OUT_GRID}")
print(f" IoU chart β†’ {OUT_CHART}")
print("="*60 + "\n")
if __name__ == "__main__":
run_pipeline()