NVIDIA-RETR-V2

Sleeping

App Files Files Community

AkshitShubham commited on Oct 17

Commit

9231f06

verified ·

1 Parent(s): 1817521

Update app.py

Browse files

Files changed (1) hide show

app.py +193 -625

app.py CHANGED Viewed

@@ -1,661 +1,229 @@
 import os
 import requests
-import json
-import gradio as gr
 from PIL import Image, ImageDraw
 import io
 import base64
-import re
-import fitz
-import zipfile
 import tempfile
-import time
-# --- Configuration ---
-NVIDIA_API_KEY = os.getenv("NVIDIA_API_KEY")
-if not NVIDIA_API_KEY:
-    raise ValueError("NVIDIA_API_KEY environment variable not set.")
-NIM_API_URL = "https://integrate.api.nvidia.com/v1/chat/completions"
-HEADERS = {
-    "Authorization": f"Bearer {NVIDIA_API_KEY}",
-    "Accept": "application/json",
-    "Content-Type": "application/json",
-}
-MODEL_MAX_WIDTH = 1648
-MODEL_MAX_HEIGHT = 2048
-# Global store for processed data to enable download later (key is timestamp)
-PROCESSED_PAGES_STORE = {}
-CROPPED_QUESTIONS_STORE = {}
-# --- 1. Helpers for Page Selection, Image & API (Unchanged) ---
-# ... (resize_image_if_needed, call_parse_api_base64, get_question_number, process_column functions remain the same) ...
-# [Note: Due to space constraints, these helpers are assumed to be copied from the previous final working script]
-# --- Core Processing Logic (Re-included for clarity of dependencies) ---
-def resize_image_if_needed(image: Image.Image) -> Image.Image:
-    width, height = image.size
-    if width > MODEL_MAX_WIDTH or height > MODEL_MAX_HEIGHT:
-        ratio = min(MODEL_MAX_WIDTH / width, MODEL_MAX_HEIGHT / height)
-        new_width = int(width * ratio)
-        new_height = int(height * ratio)
-        return image.resize((new_width, new_height), Image.Resampling.LANCZOS)
-    return image
-def call_parse_api_base64(image_bytes: bytes):
     try:
-        base64_encoded_data = base64.b64encode(image_bytes)
-        base64_string = base64_encoded_data.decode('utf-8')
-        image_url = f"data:image/png;base64,{base64_string}"
-        payload = {
-            "model": "nvidia/nemoretriever-parse",
-            "messages": [{"role": "user", "content": [{"type": "image_url", "image_url": {"url": image_url}}]}],
-            "tools": [{"type": "function", "function": {"name": "markdown_bbox"}}],
-            "max_tokens": 2048,
-        }
-        response = requests.post(NIM_API_URL, headers=HEADERS, json=payload, timeout=300)
-        response.raise_for_status()
-        return response.json()
-    except requests.exceptions.RequestException as e:
-        error_detail = str(e)
-        if e.response is not None:
-            try:
-                error_detail = e.response.json().get("detail", e.response.text)
-            except json.JSONDecodeError:
-                error_detail = e.response.text
-        raise gr.Error(f"API Error: {error_detail}")
-def get_question_number(text: str) -> int:
-    match = re.match(r"^\d+", text.strip())
-    return int(match.group(0)) if match else -1
-def process_and_crop(original_image: Image.Image, api_response: dict, split_page: bool):
-    # This function now returns both the gallery images and the full question data
     try:
-        tool_call = api_response["choices"][0]["message"]["tool_calls"][0]
-        arguments_str = tool_call["function"]["arguments"]
-        all_elements = json.loads(arguments_str)[0]
-    except (KeyError, IndexError, json.JSONDecodeError):
-        return original_image, [], [], 0
-    question_starts = [elem for elem in all_elements if get_question_number(elem.get("text", "")) > 0]
-    if not question_starts:
-        return original_image, [], [], 0
-    image_with_boxes = original_image.copy()
-    img_draw = ImageDraw.Draw(image_with_boxes)
-    all_cropped_questions = []
-    if split_page:
-        page_midpoint = 0.5
-        left_starts = sorted([q for q in question_starts if q['bbox']['xmin'] < page_midpoint], key=lambda q: q['bbox']['ymin'])
-        right_starts = sorted([q for q in question_starts if q['bbox']['xmin'] >= page_midpoint], key=lambda q: q['bbox']['ymin'])
-        process_column(left_starts, all_elements, (0.0, page_midpoint), img_draw, original_image, all_cropped_questions)
-        process_column(right_starts, all_elements, (page_midpoint, 1.0), img_draw, original_image, all_cropped_questions)
-    else:
-        sorted_starts = sorted(question_starts, key=lambda q: q['bbox']['ymin'])
-        process_column(sorted_starts, all_elements, (0.0, 1.0), img_draw, original_image, all_cropped_questions)
-    all_cropped_questions.sort(key=lambda item: item[0])
-    final_gallery_images = [item[1] for item in all_cropped_questions]
-    return image_with_boxes, final_gallery_images, all_cropped_questions, len(all_cropped_questions)
-def process_column(column_starts, all_elements, column_bounds, img_draw, original_image, cropped_questions_list):
-    # This function processes a column and filters out too small crops
-    img_width, img_height = original_image.size
-    MIN_CROP_WIDTH = 100  # Minimum width in pixels
-    MIN_CROP_HEIGHT = 50  # Minimum height in pixels
-    for i, start_element in enumerate(column_starts):
-        q_num = get_question_number(start_element['text'])
-        slice_ymin = start_element['bbox']['ymin']
-        if i + 1 < len(column_starts):
-            slice_ymax = column_starts[i+1]['bbox']['ymin']
-        else:
-            remaining_elements = [e for e in all_elements if e['bbox']['ymin'] >= slice_ymin and column_bounds[0] <= e['bbox']['xmin'] < column_bounds[1]]
-            slice_ymax = max(e['bbox']['ymax'] for e in remaining_elements) if remaining_elements else 1.0
-        elements_in_slice = [e for e in all_elements if (slice_ymin <= e['bbox']['ymin'] < slice_ymax and column_bounds[0] <= e['bbox']['xmin'] < column_bounds[1])]
-        if not elements_in_slice: continue
-        crop_xmin = min(e['bbox']['xmin'] for e in elements_in_slice)
-        crop_xmax = max(e['bbox']['xmax'] for e in elements_in_slice)
-        abs_box = (crop_xmin * img_width, slice_ymin * img_height, crop_xmax * img_width, slice_ymax * img_height)
-        # Check if crop is too small
-        crop_width = abs_box[2] - abs_box[0]
-        crop_height = abs_box[3] - abs_box[1]
-        if crop_width < MIN_CROP_WIDTH or crop_height < MIN_CROP_HEIGHT:
-            print(f"Skipping too small crop for question {q_num}: {crop_width}x{crop_height}")
-            continue
-        img_draw.rectangle(abs_box, outline="red", width=3)
-        cropped_img = original_image.crop(abs_box)
-        # Generate descriptive filename from question text
-        question_text = start_element.get('text', '').strip()
-        # Clean text for filename (remove special characters, limit length)
-        clean_text = re.sub(r'[^\w\s-]', '', question_text)[:50]
-        clean_text = re.sub(r'\s+', '_', clean_text)
-        filename = f"{q_num}-{clean_text}" if clean_text else f"{q_num}-question"
-        cropped_questions_list.append((q_num, cropped_img, filename))
-def parse_page_ranges(range_str: str) -> set:
-    """Parses a string like '1,3,5-10' into a set of page numbers (1-based)."""
-    # ... (function remains the same)
-    if not range_str: return set()
-    pages = set()
-    parts = range_str.split(',')
-    for part in parts:
-        part = part.strip()
-        if not part: continue
-        try:
-            if '-' in part:
-                start, end = map(int, part.split('-'))
-                if start > end: continue
-                pages.update(range(start, end + 1))
-            else:
-                pages.add(int(part))
-        except ValueError:
-            continue
-    return pages
-# --- 4. NEW DOWNLOADER FUNCTION ---
-def upload_to_report_app(selected_indices_str: str, session_id: str):
     """
-    Uploads selected questions to the Report App (Flask app on port 1302) and returns redirect URL.
     """
-    print(f"🚀 REPORT APP UPLOAD - Starting upload process")
-    print(f"📝 Selected indices string: '{selected_indices_str}'")
-    print(f"🔑 Session ID: {session_id}")
-    if session_id not in CROPPED_QUESTIONS_STORE:
-        print(f"❌ Session {session_id} not found in CROPPED_QUESTIONS_STORE")
-        print(f"📋 Available sessions: {list(CROPPED_QUESTIONS_STORE.keys())}")
-        raise gr.Error("No processed questions found. Please run the extraction first.")
-    cropped_questions = CROPPED_QUESTIONS_STORE[session_id]
-    print(f"📊 Found {len(cropped_questions)} questions in session")
-    if not cropped_questions:
-        print("❌ No questions found in session")
-        raise gr.Error("No questions were extracted from the processed files.")
-    # If no selection specified, upload all questions
-    if not selected_indices_str.strip():
-        selected_indices = set(item[0] for item in cropped_questions)
-        print(f"📌 No selection specified, using all questions: {selected_indices}")
-    else:
-        selected_indices = parse_page_ranges(selected_indices_str)
-        print(f"📌 Parsed selection: {selected_indices}")
-        if not selected_indices:
-            print("❌ No valid indices parsed")
-            raise gr.Error("Please enter valid question numbers/ranges.")
     try:
-        print("🔧 Preparing files for upload...")
-        # Prepare files for upload to Flask app
-        files = []
-        selected_questions = []
-        for i, question_data in enumerate(cropped_questions):
-            print(f"🔍 Processing question {i+1}/{len(cropped_questions)}: {question_data[0]} (type: {type(question_data)})")
-            if len(question_data) >= 3:
-                q_num, img, filename = question_data[0], question_data[1], question_data[2]
-                print(f"   ✅ Question {q_num}, filename: {filename}")
-                if q_num in selected_indices:
-                    print(f"   🎯 Question {q_num} is selected for upload")
-                    # Convert PIL image to bytes
-                    img_io = io.BytesIO()
-                    print(f"   🖼️  Converting image to bytes (size: {img.size})")
-                    img.save(img_io, format='PNG')
-                    img_bytes = img_io.getvalue()
-                    print(f"   💾 Image converted to {len(img_bytes)} bytes")
-                    # Create file tuple for requests
-                    file_tuple = ('images', (f"{filename}.png", img_bytes, 'image/png'))
-                    files.append(file_tuple)
-                    selected_questions.append({'q_num': q_num, 'filename': filename})
-                    print(f"   ✅ Added to upload list")
-                else:
-                    print(f"   ⏭️  Question {q_num} not in selection, skipping")
-            else:
-                print(f"   ❌ Invalid question data format: {len(question_data)} items")
-        print(f"📦 Prepared {len(files)} files for upload")
-        print(f"📋 Selected questions: {[q['q_num'] for q in selected_questions]}")
-        if not files:
-            print("❌ No files prepared for upload")
-            raise gr.Error("No matching questions found to upload.")
-        # Upload to Flask app
-        flask_url = 'http://localhost:1302/upload'
-        print(f"🌐 Making POST request to: {flask_url}")
-        print(f"📤 Uploading {len(files)} files...")
-        response = requests.post(
-            flask_url,
-            files=files,
-            timeout=30
-        )
-        print(f"📡 Response status: {response.status_code}")
-        print(f"📡 Response headers: {dict(response.headers)}")
-        print(f"📡 Response text: {response.text[:500]}...")  # First 500 chars
-        if response.status_code == 200:
-            print("✅ Upload successful!")
-            try:
-                result = response.json()
-                print(f"📋 Response JSON: {result}")
-                flask_session_id = result.get('session_id')
-                print(f"🔑 Flask session ID: {flask_session_id}")
-                if flask_session_id:
-                    # Return the URL to redirect to question entry page
-                    redirect_url = f"http://localhost:1302/question_entry/{flask_session_id}"
-                    print(f"🎯 Generated redirect URL: {redirect_url}")
-                    return redirect_url
-                else:
-                    print("❌ No session_id in Flask response")
-                    raise gr.Error("Failed to get session ID from Report App.")
-            except json.JSONDecodeError as e:
-                print(f"❌ JSON decode error: {e}")
-                print(f"📄 Raw response: {response.text}")
-                raise gr.Error("Invalid JSON response from Report App.")
         else:
-            print(f"❌ HTTP error: {response.status_code}")
-            print(f"📄 Error response: {response.text}")
-            raise gr.Error(f"Upload failed: {response.status_code} - {response.text}")
-    except requests.exceptions.ConnectionError as e:
-        print(f"❌ Connection error: {e}")
-        raise gr.Error("Could not connect to Report App. Make sure it's running on port 1302.")
-    except requests.exceptions.Timeout as e:
-        print(f"❌ Timeout error: {e}")
-        raise gr.Error("Upload timed out. Please try again.")
     except Exception as e:
-        print(f"❌ Unexpected error: {type(e).__name__}: {e}")
-        import traceback
-        print(f"📋 Traceback: {traceback.format_exc()}")
-        raise gr.Error(f"Upload error: {str(e)}")
-def zip_selected_questions(selected_indices_str: str, session_id: str):
     """
-    Creates a ZIP file containing the individual question images selected by the user.
-    `selected_indices_str` is a comma-separated string of question numbers.
-    If empty, downloads all questions.
     """
-    if session_id not in CROPPED_QUESTIONS_STORE:
-        raise gr.Error("No processed questions found. Please run the extraction first.")
-    cropped_questions = CROPPED_QUESTIONS_STORE[session_id]
-    if not cropped_questions:
-        raise gr.Error("No questions were extracted from the processed files.")
-    # If no selection specified, download all questions
-    if not selected_indices_str.strip():
-        selected_indices = set(item[0] for item in cropped_questions)
-    else:
-        selected_indices = parse_page_ranges(selected_indices_str)
-        if not selected_indices:
-            raise gr.Error("Please enter valid question numbers/ranges to download.")
-    # Create temporary zip file
-    zip_path = os.path.join(tempfile.gettempdir(), f"questions_{session_id}.zip")
-    with zipfile.ZipFile(zip_path, 'w') as zf:
-        for question_data in cropped_questions:
-            q_num, img, filename = question_data
-            if q_num in selected_indices:
-                # Save image to bytes buffer
-                img_io = io.BytesIO()
-                img.save(img_io, format='PNG')
-                img_io.seek(0)
-                # Add to zip file with descriptive name
-                zf.writestr(f"{filename}.png", img_io.read())
-    return zip_path
-def zip_selected_pages(selected_indices_str: str, session_id: str):
-    """
-    Creates a ZIP file containing the processed pages selected by the user.
-    `selected_indices_str` is a comma-separated string of 1-based indices (0-based in Python).
-    If empty, downloads all pages.
-    """
-    if session_id not in PROCESSED_PAGES_STORE:
-        raise gr.Error("No processed results found. Please run the extraction first.")
-    processed_pages = PROCESSED_PAGES_STORE[session_id]
-    if not processed_pages:
-        raise gr.Error("No pages were processed.")
-    # If no selection specified, download all pages
-    if not selected_indices_str.strip():
-        selected_indices = set(range(1, len(processed_pages) + 1))  # 1-based indexing
-    else:
-        selected_indices = parse_page_ranges(selected_indices_str)
-        if not selected_indices:
-            raise gr.Error("Please enter valid page numbers/ranges to download.")
-    # Create temporary zip file
-    zip_path = os.path.join(tempfile.gettempdir(), f"processed_pages_{session_id}.zip")
-    with zipfile.ZipFile(zip_path, 'w') as zf:
-        for user_page_num in selected_indices:
-            # Convert 1-based user input to 0-based list index
-            list_index = user_page_num - 1
-            if 0 <= list_index < len(processed_pages):
-                img = processed_pages[list_index]
-                # Save image to bytes buffer
-                img_io = io.BytesIO()
-                img.save(img_io, format='PNG')
-                img_io.seek(0)
-                # Add to zip file
-                zf.writestr(f"Page_{user_page_num}_boxed.png", img_io.read())
-            else:
-                print(f"Warning: Page {user_page_num} is out of bounds and skipped.")
-    return zip_path
-# --- 5. Main Gradio Function (Updated Inputs) ---
-def question_extractor_app(pdf_file, image_file, split_page_toggle, page_selection_str):
-    # Determine the file source
-    if pdf_file and image_file:
-        raise gr.Error("Please upload either a PDF or an Image, not both.")
-    elif pdf_file:
-        input_filepath = pdf_file.name
-    elif image_file:
-        input_filepath = image_file.name
-    else:
-        raise gr.Error("Please upload a file.")
-    if not NVIDIA_API_KEY:
-        raise gr.Error("NVIDIA_API_KEY is not set. Please configure your environment variables.")
-    # --- File Loading ---
-    page_images_to_process = []
-    if input_filepath.lower().endswith('.pdf'):
-        selected_pages = parse_page_ranges(page_selection_str)
-        doc = fitz.open(input_filepath)
-        for page_num in range(len(doc)):
-            if not selected_pages or (page_num + 1) in selected_pages:
-                page = doc.load_page(page_num)
-                pix = page.get_pixmap(dpi=300)
-                img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
-                page_images_to_process.append(img)
-        doc.close()
-    else:
-        # Note: Page selection is ignored for single image files
-        page_images_to_process.append(Image.open(input_filepath))
-    if not page_images_to_process:
-        return [], [], "", "", "No pages were selected or the file is empty.", "No questions found."
-    # --- Processing ---
-    all_processed_pages = []
-    all_gallery_images = []
-    all_question_data = []  # Store the full question data with metadata
-    total_questions_found = 0
-    for i, page_img in enumerate(page_images_to_process):
-        processed_image = resize_image_if_needed(page_img)
-        with io.BytesIO() as img_byte_arr:
-            processed_image.save(img_byte_arr, format='PNG')
-            image_bytes = img_byte_arr.getvalue()
-        api_response = call_parse_api_base64(image_bytes)
-        image_with_boxes, gallery_from_page, question_data_from_page, num_found = process_and_crop(processed_image, api_response, split_page_toggle)
-        all_processed_pages.append(image_with_boxes)
-        all_gallery_images.extend(gallery_from_page)
-        all_question_data.extend(question_data_from_page)
-        total_questions_found += num_found
-    summary = f"Processed {len(page_images_to_process)} page(s) and found a total of {total_questions_found} questions."
-    # Store processed data and generate unique session ID for download
-    session_id = str(time.time()).replace('.', '')
-    PROCESSED_PAGES_STORE[session_id] = all_processed_pages
-    CROPPED_QUESTIONS_STORE[session_id] = all_question_data
-    # Generate strings for download info
-    available_pages_str = ", ".join(str(i+1) for i in range(len(all_processed_pages)))
-    available_questions_str = ", ".join(str(item[0]) for item in all_question_data)
-    return (all_processed_pages, all_gallery_images, summary, session_id,
-            f"Available pages: {available_pages_str}", f"Available questions: {available_questions_str}")
-# --- 6. Launch the App ---
 if __name__ == "__main__":
-    with gr.Blocks(title="NIM Question Extractor", theme=gr.themes.Soft()) as demo:
         gr.Markdown(
             """
-            # 📄 NVIDIA NIM Question Extractor
-            Extract and crop individual questions from PDF documents or images with multi-column support and download capabilities.
             """
         )
-        # Input Section
-        with gr.Group():
-            gr.Markdown("## 📁 Input Files")
-            with gr.Row():
-                pdf_input = gr.File(
-                    label="Upload PDF File",
-                    file_types=['.pdf'],
-                    scale=1
-                )
-                image_input = gr.File(
-                    label="Upload Image File",
-                    file_types=['.png', '.jpg', '.jpeg'],
-                    scale=1
-                )
-        # Processing Options Section
-        with gr.Group():
-            gr.Markdown("## ⚙️ Processing Options")
-            with gr.Row():
-                with gr.Column(scale=2):
-                    page_select_input = gr.Textbox(
-                        label="Select Pages (PDF only)",
-                        placeholder="e.g., 1, 3, 5-10 (leave blank for all pages)",
-                        info="Enter page numbers or ranges separated by commas"
-                    )
-                with gr.Column(scale=1):
-                    split_toggle = gr.Checkbox(
-                        label="Two-Column Layout",
-                        info="Check if document has two columns"
-                    )
-        # Action Button
         with gr.Row():
-            submit_btn = gr.Button(
-                "🚀 Start Question Extraction",
-                variant="primary",
-                size="lg"
-            )
-        # Hidden session management
-        session_id_output = gr.Textbox(visible=False)
-        # Results Section
-        with gr.Group():
-            gr.Markdown("## 📊 Results")
-            # Summary
-            output_summary = gr.Textbox(
-                label="Processing Summary",
-                interactive=False,
-                show_copy_button=True
-            )
-            # Download Sections
-            with gr.Row():
-                # Pages Download
-                with gr.Column(scale=1):
-                    gr.Markdown("### 📄 Download Pages (with boxes)")
-                    download_pages_info = gr.Textbox(
-                        label="Available Pages",
-                        interactive=False,
-                        placeholder="Process files first"
-                    )
-                    download_pages_input = gr.Textbox(
-                        label="Select Pages",
-                        placeholder="e.g., 1-3, 5 (leave blank for all)",
-                        info="Pages with red boxes"
-                    )
-                    download_pages_btn = gr.DownloadButton(
-                        "📥 Download Pages ZIP",
-                        interactive=False,
-                        variant="secondary"
-                    )
-                # Questions Download
-                with gr.Column(scale=1):
-                    gr.Markdown("### 🔍 Download Questions")
-                    download_questions_info = gr.Textbox(
-                        label="Available Questions",
-                        interactive=False,
-                        placeholder="Process files first"
-                    )
-                    download_questions_input = gr.Textbox(
-                        label="Select Questions",
-                        placeholder="e.g., 1-5, 8, 10-12 (leave blank for all)",
-                        info="Individual question images"
-                    )
-                    download_questions_btn = gr.DownloadButton(
-                        "📥 Download Questions ZIP",
-                        interactive=False,
-                        variant="primary"
-                    )
-                # Report App Integration
-                with gr.Column(scale=1):
-                    gr.Markdown("### 📝 Report App")
-                    report_app_input = gr.Textbox(
-                        label="Select Questions for Report",
-                        placeholder="e.g., 1-5, 8 (leave blank for all)",
-                        info="Upload to Report App for analysis"
-                    )
-                    report_app_output = gr.Textbox(
-                        label="Report App URL",
-                        interactive=False,
-                        placeholder="Upload questions to get redirect URL",
-                        show_copy_button=True
-                    )
-                    with gr.Row():
-                        report_upload_btn = gr.Button(
-                            "🚀 Upload to Report App",
-                            interactive=False,
-                            variant="primary"
-                        )
-                        report_open_btn = gr.Button(
-                            "🔗 Open Report App",
-                            interactive=False,
-                            link="",
-                            variant="secondary"
-                        )
-        # Image Galleries
-        with gr.Group():
-            gr.Markdown("## 🖼️ Visual Results")
-            with gr.Tab("Processed Pages (with boxes)"):
-                output_processed_pages = gr.Gallery(
-                    label="Pages with Question Boundaries",
-                    height=400,
-                    columns=2,
-                    object_fit="contain",
-                    show_label=False
-                )
-            with gr.Tab("Individual Questions"):
-                output_cropped_gallery = gr.Gallery(
-                    label="Cropped Questions (sorted by number)",
-                    height=400,
-                    columns=4,
-                    object_fit="contain",
-                    show_label=False
-                )
-        # --- Event Handlers ---
-        # Main processing handler
-        submit_btn.click(
-            fn=question_extractor_app,
-            inputs=[pdf_input, image_input, split_toggle, page_select_input],
-            outputs=[output_processed_pages, output_cropped_gallery, output_summary,
-                    session_id_output, download_pages_info, download_questions_info]
-        ).then(
-            # Re-enable download buttons after results are ready
-            lambda: (gr.DownloadButton(interactive=True), gr.DownloadButton(interactive=True), gr.Button(interactive=True)),
-            outputs=[download_pages_btn, download_questions_btn, report_upload_btn]
-        )
-        # Download handlers
-        download_pages_btn.click(
-            fn=zip_selected_pages,
-            inputs=[download_pages_input, session_id_output],
-            outputs=[download_pages_btn],
-            api_name=False
-        )
-        download_questions_btn.click(
-            fn=zip_selected_questions,
-            inputs=[download_questions_input, session_id_output],
-            outputs=[download_questions_btn],
-            api_name=False
-        )
-        # Report App handlers
-        def handle_report_upload(questions_input, session_id):
-            try:
-                url = upload_to_report_app(questions_input, session_id)
-                return url, gr.Button(interactive=True, link=url)
-            except Exception as e:
-                return f"Error: {str(e)}", gr.Button(interactive=False)
-        report_upload_btn.click(
-            fn=handle_report_upload,
-            inputs=[report_app_input, session_id_output],
-            outputs=[report_app_output, report_open_btn]
         )
-        # Footer
-        gr.Markdown(
-            """
-            ---
-            💡 **Tips:**
-            - Upload either a PDF or image file, not both
-            - Use page selection to process specific pages from PDFs
-            - Enable two-column layout for documents with side-by-side content
-            - **Pages ZIP**: Contains full pages with red boxes showing question boundaries
-            - **Questions ZIP**: Contains individual cropped question images with descriptive names
-            - **Report App**: Upload questions to the analysis app on port 1302 for detailed reporting
-            - **Leave download/upload fields blank to process ALL pages/questions**
-            """
-        )
     demo.launch(debug=True)

 import os
 import requests
 from PIL import Image, ImageDraw
 import io
 import base64
+import json
+import gradio as gr
+import fitz  # PyMuPDF
 import tempfile
+from typing import Union
+# --- Configuration & API Constants ---
+INVOKE_URL_OCR = "https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-ocr-v1"
+INVOKE_URL_PARSER = "https://integrate.api.nvidia.com/v1/chat/completions"
+MAX_PIXELS_FOR_PARSER = 1024 * 1024  # 1 Megapixel
+# =================================================================================
+# SELF-CONTAINED REDACTION LOGIC
+# (This is the refined function from the previous step)
+# =================================================================================
+def _get_average_color_from_regions(image: Image.Image, regions: list[tuple]):
+    """Calculates the average RGB color from a list of regions in an image."""
+    total_r, total_g, total_b = 0, 0, 0; pixel_count = 0
+    img_width, img_height = image.size
+    if image.mode == 'RGBA': image = image.convert('RGB')
+    pixels = image.load()
+    for region in regions:
+        x1, y1, x2, y2 = [max(0, int(c)) for c in region]
+        x2 = min(img_width, x2); y2 = min(img_height, y2)
+        for x in range(x1, x2):
+            for y in range(y1, y2):
+                r, g, b = pixels[x, y]
+                total_r += r; total_g += g; total_b += b
+                pixel_count += 1
+    if pixel_count == 0: return (0, 0, 0)
+    return (total_r // pixel_count, total_g // pixel_count, total_b // pixel_count)
+def _detect_pictures_with_parser(image_to_process: Image.Image, api_key: str):
+    """Sends an image to the NemoRetriever Parser model to detect 'Picture' elements."""
+    headers = {"Authorization": f"Bearer {api_key}", "Accept": "application/json"}
+    buffered = io.BytesIO()
+    image_to_process.save(buffered, format="PNG")
+    b64_str = base64.b64encode(buffered.getvalue()).decode("ascii")
+    content = f'<img src="data:image/png;base64,{b64_str}" />'
+    tool_name = "markdown_bbox"
+    payload = {
+        "model": "nvidia/nemoretriever-parse", "messages": [{"role": "user", "content": content}],
+        "tools": [{"type": "function", "function": {"name": tool_name}}],
+        "tool_choice": {"type": "function", "function": {"name": tool_name}}, "max_tokens": 2048,
+    }
+    response = requests.post(INVOKE_URL_PARSER, headers=headers, json=payload, timeout=120)
+    response.raise_for_status()
+    response_json = response.json()
+    picture_bboxes = []
+    tool_calls = response_json.get('choices', [{}])[0].get('message', {}).get('tool_calls', [])
+    if tool_calls:
+        arguments_str = tool_calls[0].get('function', {}).get('arguments', '[]')
+        parsed_arguments = json.loads(arguments_str)
+        if parsed_arguments and isinstance(parsed_arguments, list):
+            for element in parsed_arguments[0]:
+                if element.get("type") == "Picture" and element.get("bbox"):
+                    picture_bboxes.append(element["bbox"])
+    return picture_bboxes
+def _redact_text_in_image(input_image: Image.Image, api_key: str):
+    """Sends a (cropped) image to the OCR model and returns a redacted version."""
+    headers = {"Authorization": f"Bearer {api_key}", "Accept": "application/json"}
+    buffered = io.BytesIO(); input_image.save(buffered, format="PNG")
+    image_b64 = base64.b64encode(buffered.getvalue()).decode()
+    payload = {"input": [{"type": "image_url", "url": f"data:image/png;base64,{image_b64}"}]}
     try:
+        response = requests.post(INVOKE_URL_OCR, headers=headers, json=payload, timeout=60)
+        response.raise_for_status(); response_json = response.json()
+    except requests.exceptions.RequestException: return input_image
+    image_with_redactions = input_image.copy(); draw = ImageDraw.Draw(image_with_redactions)
+    img_width, img_height = image_with_redactions.size
+    radius = max(1, int(((img_width**2 + img_height**2)**0.5) / 100))
     try:
+        detections = response_json['data'][0]['text_detections']
+        for detection in detections:
+            bbox = detection.get("bounding_box")
+            if bbox and bbox.get("points"):
+                points = bbox["points"]
+                p1 = (points[0]['x'] * img_width, points[0]['y'] * img_height); p3 = (points[2]['x'] * img_width, points[2]['y'] * img_height)
+                sample_regions = [(p1[0], p1[1] - radius, p3[0], p1[1]), (p1[0], p3[1], p3[0], p3[1] + radius), (p1[0] - radius, p1[1], p1[0], p3[1]), (p3[0], p1[1], p3[0] + radius, p3[1])]
+                redaction_color = _get_average_color_from_regions(image_with_redactions, sample_regions)
+                draw.rectangle([p1, p3], fill=redaction_color)
+        return image_with_redactions
+    except (KeyError, IndexError, TypeError): return input_image
+def redact_pictures_in_image(image_source: Union[str, Image.Image], api_key: str, callback: callable = None) -> Image.Image:
     """
+    Analyzes an image to find pictures, then redacts text within those pictures.
+    Now accepts a file path, base64 string, or a PIL Image object directly.
     """
+    def _progress(message: str):
+        if callback: callback(message)
+    _progress("Loading image for processing...")
     try:
+        if isinstance(image_source, Image.Image):
+            input_image = image_source.convert("RGB")
+        elif os.path.exists(image_source):
+            input_image = Image.open(image_source).convert("RGB")
         else:
+            input_image = Image.open(io.BytesIO(base64.b64decode(image_source))).convert("RGB")
     except Exception as e:
+        raise ValueError(f"Invalid image_source. Error: {e}")
+    image_to_analyze = input_image
+    original_width, original_height = input_image.size
+    if (original_width * original_height) > MAX_PIXELS_FOR_PARSER:
+        _progress(f"Image is large, resizing for analysis...")
+        scale = (MAX_PIXELS_FOR_PARSER / (original_width * original_height))**0.5
+        new_dims = (int(original_width * scale), int(original_height * scale))
+        image_to_analyze = input_image.resize(new_dims, Image.Resampling.LANCZOS)
+    _progress("Detecting 'Picture' elements...")
+    try:
+        picture_bboxes = _detect_pictures_with_parser(image_to_analyze, api_key)
+    except requests.exceptions.RequestException as e:
+        _progress(f"API Error during picture detection: {e}"); raise
+    if not picture_bboxes:
+        _progress("No 'Picture' elements found.")
+        return input_image
+    _progress(f"Found {len(picture_bboxes)} 'Picture' element(s). Redacting text...")
+    final_image = input_image.copy()
+    for i, box in enumerate(picture_bboxes):
+        _progress(f"  - Processing picture {i + 1}/{len(picture_bboxes)}...")
+        x1, y1 = int(box["xmin"] * original_width), int(box["ymin"] * original_height)
+        x2, y2 = int(box["xmax"] * original_width), int(box["ymax"] * original_height)
+        cropped_element = input_image.crop((x1, y1, x2, y2))
+        redacted_crop = _redact_text_in_image(cropped_element, api_key)
+        final_image.paste(redacted_crop, (x1, y1))
+    _progress("Redaction for this page complete.")
+    return final_image
+# =================================================================================
+# GRADIO PDF PROCESSING APPLICATION
+# =================================================================================
+def process_pdf(pdf_file, progress=gr.Progress(track_tqdm=True)):
     """
+    Main function for the Gradio app. Takes an uploaded PDF file, processes each
+    page, and returns the path to the redacted output PDF.
     """
+    if pdf_file is None:
+        raise gr.Error("Please upload a PDF file.")
+    api_key = os.getenv("NVIDIA_API_KEY")
+    if not api_key:
+        raise gr.Error("NVIDIA_API_KEY environment variable not set.")
+    log_messages = []
+    def progress_callback(message):
+        print(message) # Also print to console for debugging
+        log_messages.append(message)
+    try:
+        pdf_path = pdf_file.name
+        doc = fitz.open(pdf_path)
+        processed_pages = []
+        for page_num in progress.tqdm(range(len(doc)), desc="Processing PDF Pages"):
+            progress_callback(f"\n--- Processing Page {page_num + 1} of {len(doc)} ---")
+            # Convert page to image (150 DPI is a good balance of quality and size)
+            page = doc.load_page(page_num)
+            pix = page.get_pixmap(dpi=150)
+            page_image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
+            # Run the redaction pipeline on the single page image
+            processed_image = redact_pictures_in_image(
+                image_source=page_image,
+                api_key=api_key,
+                callback=progress_callback
+            )
+            processed_pages.append(processed_image)
+        progress_callback("\n--- Finalizing PDF ---")
+        if not processed_pages:
+            raise gr.Error("No pages were processed from the PDF.")
+        # Save processed images into a new PDF
+        output_pdf_path = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False).name
+        processed_pages[0].save(
+            output_pdf_path,
+            "PDF",
+            resolution=100.0,
+            save_all=True,
+            append_images=processed_pages[1:]
+        )
+        progress_callback(f"Successfully created redacted PDF: {os.path.basename(output_pdf_path)}")
+        return output_pdf_path, "\n".join(log_messages)
+    except Exception as e:
+        gr.Error(f"An error occurred: {e}")
+        return None, f"An error occurred: {e}"
+# --- Gradio UI Definition ---
 if __name__ == "__main__":
+    with gr.Blocks(theme=gr.themes.Default(), title="NVIDIA PDF Redactor") as demo:
         gr.Markdown(
             """
+            # document Redactor for Pictures
+            Upload a PDF document. The tool will scan each page for pictures, redact any text found exclusively
+            within those pictures, and then generate a new, downloadable PDF with the redactions.
+            Pages without pictures are skipped to save time and cost.
             """
         )
         with gr.Row():
+            with gr.Column(scale=1):
+                pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
+                process_btn = gr.Button("🚀 Process PDF and Redact Pictures", variant="primary")
+            with gr.Column(scale=2):
+                pdf_output = gr.File(label="Download Redacted PDF", interactive=False)
+                status_log = gr.Textbox(label="Processing Log", lines=15, interactive=False)
+        process_btn.click(
+            fn=process_pdf,
+            inputs=[pdf_input],
+            outputs=[pdf_output, status_log]
         )
+        gr.Markdown("---")
+        gr.Markdown("Powered by [NVIDIA NIM](https://build.nvidia.com/explore/discover).")
     demo.launch(debug=True)