Spaces:
Sleeping
Sleeping
| """ | |
| pipeline.py | |
| ============ | |
| Master Orchestrator β Task 2, Iteration 3 | |
| This script chains all five steps together in the correct order, | |
| printing a clear progress banner at each stage so you can see exactly | |
| what is happening and inspect intermediate results. | |
| Step-by-step flow | |
| ------------------ | |
| STEP 1 β Load BLIP model (with fine-tuned weights if available). | |
| STEP 2 β Encode image through ViT β encoder_hidden_states. | |
| STEP 3 β Greedy decode token-by-token with Attention Flow heatmaps | |
| (multi-layer GradCAM rollout, bicubic upscaling). | |
| STEP 4 β Build 2Γ5 overlay grid image β attention_grid_v3.png. | |
| STEP 5 β Grade alignment with OWL-ViT + IoU β iou_chart_v3.png. | |
| Designed to be deployment-friendly: | |
| β’ Every step is a clean function import from its own module. | |
| β’ Intermediate artefacts (heatmaps, tokens) can be inspected between steps. | |
| β’ Outputs are saved to the same directory as this script. | |
| Usage: | |
| export PYTHONPATH=. | |
| venv/bin/python task/task_02/pipeline.py | |
| """ | |
| import os | |
| import sys | |
| import requests | |
| from PIL import Image | |
| # ββ path bootstrap ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| _THIS_DIR = os.path.dirname(os.path.abspath(__file__)) | |
| _PROJECT_ROOT = os.path.dirname(os.path.dirname(_THIS_DIR)) | |
| if _PROJECT_ROOT not in sys.path: | |
| sys.path.insert(0, _PROJECT_ROOT) | |
| # ββ step imports βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| from task.task_02.step1_load_model import load_model | |
| from task.task_02.step2_encode_image import encode_image | |
| from task.task_02.step3_gradcam_flow import generate_with_flow | |
| from task.task_02.step4_visualize import save_attention_grid | |
| from task.task_02.step5_iou_grade import load_detector, grade_alignment, plot_iou_chart | |
| # ββ Output paths βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| OUT_GRID = os.path.join(_THIS_DIR, "attention_grid_v3.png") | |
| OUT_CHART = os.path.join(_THIS_DIR, "iou_chart_v3.png") | |
| # ββ Test images βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| TEST_URLS = [ | |
| "http://images.cocodataset.org/val2017/000000039769.jpg", # cats on couch | |
| "http://images.cocodataset.org/val2017/000000000139.jpg", # dining room | |
| ] | |
| def _load_image(url: str) -> Image.Image: | |
| """Download an image from url, return PIL RGB image.""" | |
| print(f"\nπ₯ Downloading test image: {url}") | |
| return Image.open(requests.get(url, stream=True).raw).convert("RGB") | |
| def _banner(step: int, title: str): | |
| print(f"\n{'='*60}") | |
| print(f" STEP {step} β {title}") | |
| print(f"{'='*60}") | |
| # ββ Main pipeline βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def run_pipeline(): | |
| # ββ STEP 1: Load model βββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| _banner(1, "Load BLIP Model") | |
| model, processor, device = load_model(use_finetuned=True) | |
| # ββ Load OWL-ViT grader (do once, reuse for all images) βββββββββββββββββ | |
| detector = load_detector(device) | |
| # Aggregate IoU results across images for the final chart | |
| all_iou_results = [] | |
| for img_url in TEST_URLS: | |
| raw_image = _load_image(img_url) | |
| # ββ STEP 2: Encode image βββββββββββββββββββββββββββββββββββββββββββββ | |
| _banner(2, "Encode Image through ViT") | |
| image_224, enc_hidden, enc_mask = encode_image(model, processor, device, raw_image) | |
| # ββ STEP 3: Generate caption + Attention Flow heatmaps βββββββββββββββ | |
| _banner(3, "Greedy Decode with Attention Flow") | |
| tokens, heatmaps = generate_with_flow( | |
| model, processor, device, enc_hidden, enc_mask | |
| ) | |
| # ββ INSPECT intermediate results βββββββββββββββββββββββββββββββββββββ | |
| print(f"\n π Tokens : {tokens}") | |
| print(f" πΊ Heatmaps : {len(heatmaps)} maps, each shape {heatmaps[0].shape if heatmaps else 'N/A'}") | |
| print(f" Peak values: {[f'{h.max():.3f}' for h in heatmaps[:5]]} β¦") | |
| # ββ STEP 4: Visualize (only for the first image to save space) βββββββ | |
| if img_url == TEST_URLS[0]: | |
| _banner(4, "Build Attention Grid Visualization") | |
| save_attention_grid(image_224, tokens, heatmaps, out_path=OUT_GRID) | |
| # ββ STEP 5: Grade alignment ββββββββββββββββββββββββββββββββββββββββββ | |
| _banner(5, "Grade Attention Alignment (IoU)") | |
| results = grade_alignment(raw_image, tokens, heatmaps, detector) | |
| all_iou_results.extend(results) | |
| # ββ Save IoU chart (all images combined) βββββββββββββββββββββββββββββββββ | |
| if all_iou_results: | |
| print(f"\nπ Saving IoU chart for {len(all_iou_results)} data points β¦") | |
| plot_iou_chart(all_iou_results, out_path=OUT_CHART) | |
| print("\n" + "="*60) | |
| print(" β PIPELINE COMPLETE") | |
| print(f" Attention grid β {OUT_GRID}") | |
| print(f" IoU chart β {OUT_CHART}") | |
| print("="*60 + "\n") | |
| if __name__ == "__main__": | |
| run_pipeline() | |