Spaces:

griddev
/

project_02_DS

Sleeping

App Files Files Community

project_02_DS / task /task_02 /pipeline.py

griddev

Deploy Streamlit Space app

0710b5c verified 28 days ago

raw

history blame contribute delete

6.1 kB

	"""
	pipeline.py
	============
	Master Orchestrator — Task 2, Iteration 3

	This script chains all five steps together in the correct order,
	printing a clear progress banner at each stage so you can see exactly
	what is happening and inspect intermediate results.

	Step-by-step flow
	------------------
	STEP 1 → Load BLIP model (with fine-tuned weights if available).
	STEP 2 → Encode image through ViT → encoder_hidden_states.
	STEP 3 → Greedy decode token-by-token with Attention Flow heatmaps
	(multi-layer GradCAM rollout, bicubic upscaling).
	STEP 4 → Build 2×5 overlay grid image → attention_grid_v3.png.
	STEP 5 → Grade alignment with OWL-ViT + IoU → iou_chart_v3.png.

	Designed to be deployment-friendly:
	• Every step is a clean function import from its own module.
	• Intermediate artefacts (heatmaps, tokens) can be inspected between steps.
	• Outputs are saved to the same directory as this script.

	Usage:
	export PYTHONPATH=.
	venv/bin/python task/task_02/pipeline.py
	"""

	import os
	import sys
	import requests
	from PIL import Image

	# ── path bootstrap ────────────────────────────────────────────────────────────
	_THIS_DIR = os.path.dirname(os.path.abspath(__file__))
	_PROJECT_ROOT = os.path.dirname(os.path.dirname(_THIS_DIR))
	if _PROJECT_ROOT not in sys.path:
	sys.path.insert(0, _PROJECT_ROOT)

	# ── step imports ─────────────────────────────────────────────────────────────
	from task.task_02.step1_load_model import load_model
	from task.task_02.step2_encode_image import encode_image
	from task.task_02.step3_gradcam_flow import generate_with_flow
	from task.task_02.step4_visualize import save_attention_grid
	from task.task_02.step5_iou_grade import load_detector, grade_alignment, plot_iou_chart


	# ── Output paths ─────────────────────────────────────────────────────────────
	OUT_GRID = os.path.join(_THIS_DIR, "attention_grid_v3.png")
	OUT_CHART = os.path.join(_THIS_DIR, "iou_chart_v3.png")

	# ── Test images ───────────────────────────────────────────────────────────────
	TEST_URLS = [
	"http://images.cocodataset.org/val2017/000000039769.jpg", # cats on couch
	"http://images.cocodataset.org/val2017/000000000139.jpg", # dining room
	]


	def _load_image(url: str) -> Image.Image:
	"""Download an image from url, return PIL RGB image."""
	print(f"\n📥 Downloading test image: {url}")
	return Image.open(requests.get(url, stream=True).raw).convert("RGB")


	def _banner(step: int, title: str):
	print(f"\n{'='*60}")
	print(f" STEP {step} — {title}")
	print(f"{'='*60}")


	# ── Main pipeline ─────────────────────────────────────────────────────────────
	def run_pipeline():
	# ── STEP 1: Load model ───────────────────────────────────────────────────
	_banner(1, "Load BLIP Model")
	model, processor, device = load_model(use_finetuned=True)

	# ── Load OWL-ViT grader (do once, reuse for all images) ─────────────────
	detector = load_detector(device)

	# Aggregate IoU results across images for the final chart
	all_iou_results = []

	for img_url in TEST_URLS:
	raw_image = _load_image(img_url)

	# ── STEP 2: Encode image ─────────────────────────────────────────────
	_banner(2, "Encode Image through ViT")
	image_224, enc_hidden, enc_mask = encode_image(model, processor, device, raw_image)

	# ── STEP 3: Generate caption + Attention Flow heatmaps ───────────────
	_banner(3, "Greedy Decode with Attention Flow")
	tokens, heatmaps = generate_with_flow(
	model, processor, device, enc_hidden, enc_mask
	)

	# ── INSPECT intermediate results ─────────────────────────────────────
	print(f"\n 📝 Tokens : {tokens}")
	print(f" 🗺 Heatmaps : {len(heatmaps)} maps, each shape {heatmaps[0].shape if heatmaps else 'N/A'}")
	print(f" Peak values: {[f'{h.max():.3f}' for h in heatmaps[:5]]} …")

	# ── STEP 4: Visualize (only for the first image to save space) ───────
	if img_url == TEST_URLS[0]:
	_banner(4, "Build Attention Grid Visualization")
	save_attention_grid(image_224, tokens, heatmaps, out_path=OUT_GRID)

	# ── STEP 5: Grade alignment ──────────────────────────────────────────
	_banner(5, "Grade Attention Alignment (IoU)")
	results = grade_alignment(raw_image, tokens, heatmaps, detector)
	all_iou_results.extend(results)

	# ── Save IoU chart (all images combined) ─────────────────────────────────
	if all_iou_results:
	print(f"\n📈 Saving IoU chart for {len(all_iou_results)} data points …")
	plot_iou_chart(all_iou_results, out_path=OUT_CHART)

	print("\n" + "="*60)
	print(" ✅ PIPELINE COMPLETE")
	print(f" Attention grid → {OUT_GRID}")
	print(f" IoU chart → {OUT_CHART}")
	print("="*60 + "\n")


	if __name__ == "__main__":
	run_pipeline()