| import streamlit as st |
| import tempfile |
| import os |
| import pandas as pd |
| from src.extract_text.ingest import RequirementsIngest |
| from src.extract_text.google_document_api import GoogleDocumentAPI |
| from src.extract_text.extract_meta_data import PDFArtworkMetadataExtractor |
| from src.core.analysis import ComplianceAnalysis |
| from pdf2image import convert_from_path |
| from PIL import Image, ImageDraw, ImageFont |
| from src.utils.image_utils import ImageUtils |
| import base64 |
| from io import BytesIO |
| from src.utils.barcode import Barcode |
| import glob |
| import os |
|
|
| os.system("apt-get update && apt-get install -y poppler-utils") |
|
|
| def load_client_requirements_files(): |
| """Load all requirements and packaging files from client-requirements directory""" |
| base_path = "requirements_library/client-requirements" |
| requirements_files = [] |
| packaging_files = [] |
| |
| if not os.path.exists(base_path): |
| return requirements_files, packaging_files |
| |
| |
| for root, dirs, files in os.walk(base_path): |
| for file in files: |
| file_path = os.path.join(root, file) |
| relative_path = os.path.relpath(file_path, base_path) |
| |
| if file.lower().endswith('.txt') and 'requirement' in file.lower(): |
| requirements_files.append({ |
| 'name': f"{relative_path}", |
| 'path': file_path, |
| 'type': 'requirements' |
| }) |
| elif file.lower().endswith('.pdf') and 'requirement' in file.lower(): |
| requirements_files.append({ |
| 'name': f"{relative_path}", |
| 'path': file_path, |
| 'type': 'requirements' |
| }) |
| elif file.lower().endswith('.pdf'): |
| packaging_files.append({ |
| 'name': f"{relative_path}", |
| 'path': file_path, |
| 'type': 'packaging' |
| }) |
| |
| return requirements_files, packaging_files |
|
|
| def load_file_content(file_info): |
| """Load content from a file based on its type""" |
| try: |
| if file_info['type'] == 'requirements': |
| |
| with open(file_info['path'], 'r', encoding='utf-8') as f: |
| return f.read() |
| else: |
| |
| with open(file_info['path'], 'rb') as f: |
| return f.read() |
| except Exception as e: |
| st.error(f"Error loading file {file_info['name']}: {str(e)}") |
| return None |
|
|
| def load_requirements_content(file_info): |
| """Load requirements content as string""" |
| try: |
| with open(file_info['path'], 'r', encoding='utf-8') as f: |
| return f.read() |
| except Exception as e: |
| st.error(f"Error loading requirements file {file_info['name']}: {str(e)}") |
| return None |
|
|
| def load_packaging_content(file_info): |
| """Load packaging content as bytes""" |
| try: |
| with open(file_info['path'], 'rb') as f: |
| return f.read() |
| except Exception as e: |
| st.error(f"Error loading packaging file {file_info['name']}: {str(e)}") |
| return None |
|
|
| def main(): |
| st.set_page_config(layout="wide", page_title="Packaging Compliance Checker") |
| |
| |
| client_requirements_files, client_packaging_files = load_client_requirements_files() |
| |
| |
| if "requirements_text" not in st.session_state: |
| st.session_state.requirements_text = None |
| if "analysis_results" not in st.session_state: |
| st.session_state.analysis_results = None |
| if "current_requirements_file" not in st.session_state: |
| st.session_state.current_requirements_file = None |
| if "uploaded_packaging_files" not in st.session_state: |
| st.session_state.uploaded_packaging_files = [] |
| if "selected_packaging_file" not in st.session_state: |
| st.session_state.selected_packaging_file = None |
| if "client_requirements_files" not in st.session_state: |
| st.session_state.client_requirements_files = client_requirements_files |
| if "client_packaging_files" not in st.session_state: |
| st.session_state.client_packaging_files = client_packaging_files |
|
|
| st.title("Packaging Compliance Checker") |
| st.write( |
| "Upload a requirements document (plain text) that specifies requirements, " |
| "and then upload one or more packaging PDFs to check for compliance." |
| ) |
| |
| |
| col1, col2 = st.columns([1, 1]) |
| |
| with col1: |
| |
| st.markdown(""" |
| <style> |
| .upload-section { |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); |
| padding: 20px; |
| border-radius: 15px; |
| color: white; |
| margin-bottom: 20px; |
| } |
| .upload-title { |
| font-size: 24px; |
| font-weight: bold; |
| margin-bottom: 15px; |
| text-align: center; |
| } |
| .upload-description { |
| font-size: 14px; |
| opacity: 0.9; |
| margin-bottom: 20px; |
| text-align: center; |
| } |
| .file-uploader { |
| background: rgba(255, 255, 255, 0.1); |
| border: 2px dashed rgba(255, 255, 255, 0.3); |
| border-radius: 10px; |
| padding: 15px; |
| margin-bottom: 15px; |
| } |
| .requirements-display { |
| background: rgba(255, 255, 255, 0.05); |
| border-radius: 10px; |
| padding: 15px; |
| margin-top: 15px; |
| } |
| .artwork-display { |
| background: rgba(255, 255, 255, 0.05); |
| border-radius: 10px; |
| padding: 15px; |
| margin-top: 15px; |
| } |
| .image-container { |
| max-width: 100%; |
| border-radius: 8px; |
| overflow: hidden; |
| box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); |
| } |
| </style> |
| """, unsafe_allow_html=True) |
| |
| |
| st.markdown('<div class="upload-section">', unsafe_allow_html=True) |
| st.markdown('<div class="upload-title">📄 Document Upload</div>', unsafe_allow_html=True) |
| st.markdown('<div class="upload-description">Upload your requirements and packaging documents for compliance analysis</div>', unsafe_allow_html=True) |
| |
| |
| st.markdown('<div class="file-uploader">', unsafe_allow_html=True) |
| st.markdown("**📋 Requirements Document**") |
| |
| |
| req_tab1, req_tab2 = st.tabs(["📁 Client Files", "📤 Upload New"]) |
| |
| with req_tab1: |
| if st.session_state.client_requirements_files: |
| req_options = ["Select a requirements file..."] + [f["name"] for f in st.session_state.client_requirements_files] |
| selected_req_file = st.selectbox("Choose from client files:", req_options) |
| |
| if selected_req_file != "Select a requirements file...": |
| |
| selected_file_info = None |
| for file_info in st.session_state.client_requirements_files: |
| if file_info["name"] == selected_req_file: |
| selected_file_info = file_info |
| break |
| |
| if selected_file_info: |
| |
| if selected_file_info["name"].lower().endswith('.pdf'): |
| |
| requirements_content = load_packaging_content(selected_file_info) |
| if requirements_content: |
| |
| import io |
| temp_file = io.BytesIO(requirements_content) |
| temp_file.name = selected_file_info["name"] |
| else: |
| |
| requirements_content = load_requirements_content(selected_file_info) |
| if requirements_content: |
| |
| import io |
| temp_file = io.StringIO(requirements_content) |
| temp_file.name = selected_file_info["name"] |
| |
| st.session_state.requirements_text = RequirementsIngest().ingest_requirements_document(temp_file) |
| st.session_state.current_requirements_file = temp_file |
| st.session_state.analysis_results = None |
| |
| |
| if isinstance(st.session_state.requirements_text, dict): |
| file_type = st.session_state.requirements_text.get('type', 'unknown') |
| if file_type == 'pdf': |
| st.success(f"✅ Loaded PDF requirements from: {selected_req_file}") |
| st.info("📄 PDF will be processed natively by Claude for full visual analysis") |
| else: |
| st.success(f"✅ Loaded requirements from: {selected_req_file}") |
| else: |
| st.success(f"✅ Loaded requirements from: {selected_req_file}") |
| else: |
| st.info("No client requirements files found") |
| |
| with req_tab2: |
| requirements_file = st.file_uploader("Upload Requirements Document (TXT or PDF)", type=["txt", "pdf"]) |
| |
| |
| if requirements_file and requirements_file != st.session_state.current_requirements_file: |
| st.session_state.requirements_text = RequirementsIngest().ingest_requirements_document(requirements_file) |
| st.session_state.current_requirements_file = requirements_file |
| st.session_state.analysis_results = None |
| |
| |
| if isinstance(st.session_state.requirements_text, dict): |
| file_type = st.session_state.requirements_text.get('type', 'unknown') |
| file_size = st.session_state.requirements_text.get('file_size', 0) |
| if file_type == 'pdf': |
| st.success(f"✅ Uploaded PDF requirements: {requirements_file.name} ({file_size:,} bytes)") |
| st.info("📄 PDF will be processed natively by Claude for full visual analysis") |
| else: |
| st.success(f"✅ Uploaded requirements: {requirements_file.name}") |
| else: |
| st.success(f"✅ Uploaded requirements: {requirements_file.name}") |
| |
| st.markdown('</div>', unsafe_allow_html=True) |
| |
| |
| st.markdown('<div class="file-uploader">', unsafe_allow_html=True) |
| st.markdown("**📦 Packaging PDFs**") |
| |
| |
| pkg_tab1, pkg_tab2 = st.tabs(["📁 Client Files", "📤 Upload New"]) |
| |
| with pkg_tab1: |
| if st.session_state.client_packaging_files: |
| pkg_options = ["Select packaging files..."] + [f["name"] for f in st.session_state.client_packaging_files] |
| selected_pkg_files = st.multiselect("Choose from client files:", pkg_options[1:]) |
| |
| if selected_pkg_files: |
| |
| client_file_objects = [] |
| for selected_file_name in selected_pkg_files: |
| |
| for file_info in st.session_state.client_packaging_files: |
| if file_info["name"] == selected_file_name: |
| |
| import io |
| file_content = load_packaging_content(file_info) |
| if file_content: |
| temp_file = io.BytesIO(file_content) |
| temp_file.name = file_info["name"] |
| client_file_objects.append(temp_file) |
| break |
| |
| st.session_state.uploaded_packaging_files = client_file_objects |
| |
| if not st.session_state.selected_packaging_file and client_file_objects: |
| st.session_state.selected_packaging_file = client_file_objects[0] |
| st.success(f"✅ Loaded {len(client_file_objects)} packaging files from client directory") |
| else: |
| st.info("No client packaging files found") |
| |
| with pkg_tab2: |
| packaging_files = st.file_uploader("Upload Packaging PDFs", type=["pdf"], accept_multiple_files=True) |
| |
| |
| if packaging_files: |
| st.session_state.uploaded_packaging_files = packaging_files |
| |
| if not st.session_state.selected_packaging_file and packaging_files: |
| st.session_state.selected_packaging_file = packaging_files[0] |
| st.success(f"✅ Uploaded {len(packaging_files)} packaging files") |
| else: |
| |
| if not st.session_state.uploaded_packaging_files: |
| st.session_state.selected_packaging_file = None |
| |
| st.markdown('</div>', unsafe_allow_html=True) |
| |
| |
| if st.session_state.uploaded_packaging_files: |
| st.markdown('<div class="file-uploader">', unsafe_allow_html=True) |
| file_names = [f.name for f in st.session_state.uploaded_packaging_files] |
| selected_file_name = st.selectbox( |
| "Select packaging file to display:", |
| file_names, |
| index=file_names.index(st.session_state.selected_packaging_file.name) if st.session_state.selected_packaging_file else 0 |
| ) |
| |
| |
| for file in st.session_state.uploaded_packaging_files: |
| if file.name == selected_file_name: |
| st.session_state.selected_packaging_file = file |
| break |
| st.markdown('</div>', unsafe_allow_html=True) |
| |
|
|
| |
| |
| st.markdown('</div>', unsafe_allow_html=True) |
| |
| |
| if st.session_state.requirements_text: |
| st.markdown('<div class="requirements-display">', unsafe_allow_html=True) |
| with st.expander("📋 Requirements Document", expanded=True): |
| if isinstance(st.session_state.requirements_text, dict): |
| |
| file_type = st.session_state.requirements_text.get('type', 'unknown') |
| filename = st.session_state.requirements_text.get('filename', 'Unknown') |
| file_size = st.session_state.requirements_text.get('file_size', 0) |
| |
| st.markdown(f"**File Type:** {file_type.upper()}") |
| st.markdown(f"**Filename:** {filename}") |
| st.markdown(f"**File Size:** {file_size:,} bytes") |
| |
| if file_type == 'pdf': |
| st.info("📄 This PDF will be processed natively by Claude for full visual analysis including charts, graphs, and visual layouts.") |
| st.markdown("**Preview Text:**") |
| st.text_area("Requirements Text", st.session_state.requirements_text.get('text_content', ''), height=200) |
| else: |
| st.text_area("Requirements Text", st.session_state.requirements_text.get('text_content', ''), height=200) |
| else: |
| |
| st.text_area("Requirements Text", st.session_state.requirements_text, height=200) |
| st.markdown('</div>', unsafe_allow_html=True) |
| |
| |
| if st.session_state.selected_packaging_file: |
| st.markdown('<div class="artwork-display">', unsafe_allow_html=True) |
| with st.expander("🎨 Package Artwork", expanded=True): |
| try: |
| |
| st.session_state.selected_packaging_file.seek(0) |
| |
| |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file: |
| tmp_file.write(st.session_state.selected_packaging_file.read()) |
| tmp_pdf_path = tmp_file.name |
| |
| |
| try: |
| images = convert_from_path(tmp_pdf_path) |
| if not images: |
| raise ValueError("No pages found in PDF") |
| page_image = images[0] |
| except Exception as e: |
| st.error(f"Error converting PDF to image: {str(e)}") |
| |
| page_image = Image.new('RGB', (800, 600), color='white') |
| draw = ImageDraw.Draw(page_image) |
| draw.text((400, 300), "PDF conversion failed", fill='black', anchor='mm') |
| |
| |
| st.markdown('<div class="image-container">', unsafe_allow_html=True) |
| st.image(page_image, caption=f"Package: {st.session_state.selected_packaging_file.name}", use_container_width=True) |
| st.markdown('</div>', unsafe_allow_html=True) |
| |
| |
| if os.path.exists(tmp_pdf_path): |
| os.unlink(tmp_pdf_path) |
| |
| except Exception as e: |
| st.error(f"Error displaying package artwork: {str(e)}") |
| st.markdown('</div>', unsafe_allow_html=True) |
| |
| with col2: |
| |
| st.markdown(""" |
| <style> |
| .compliance-section { |
| background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%); |
| padding: 20px; |
| border-radius: 15px; |
| color: white; |
| height: 100%; |
| } |
| .compliance-title { |
| font-size: 24px; |
| font-weight: bold; |
| margin-bottom: 15px; |
| text-align: center; |
| } |
| .compliance-content { |
| background: rgba(255, 255, 255, 0.1); |
| border-radius: 10px; |
| padding: 15px; |
| margin-top: 15px; |
| } |
| .status-compliant { |
| background: rgba(76, 175, 80, 0.2); |
| border-left: 4px solid #4CAF50; |
| padding: 10px; |
| margin: 10px 0; |
| border-radius: 5px; |
| } |
| .status-partial { |
| background: rgba(255, 193, 7, 0.2); |
| border-left: 4px solid #FFC107; |
| padding: 10px; |
| margin: 10px 0; |
| border-radius: 5px; |
| } |
| .status-non-compliant { |
| background: rgba(244, 67, 54, 0.2); |
| border-left: 4px solid #F44336; |
| padding: 10px; |
| margin: 10px 0; |
| border-radius: 5px; |
| } |
| </style> |
| """, unsafe_allow_html=True) |
| |
| st.markdown('<div class="compliance-section">', unsafe_allow_html=True) |
| st.markdown('<div class="compliance-title">📋 Compliance Guidelines</div>', unsafe_allow_html=True) |
| |
| |
| try: |
| with open("requirements_library/compliance_outline.txt", "r") as f: |
| outline_content = f.read() |
| |
| st.markdown('<div class="compliance-content">', unsafe_allow_html=True) |
| |
| |
| lines = outline_content.strip().split('\n') |
| current_section = "" |
| |
| for line in lines: |
| line = line.strip() |
| if not line: |
| continue |
| |
| if line == "Compliance Outline": |
| st.markdown("**📋 Compliance Outline**") |
| elif line == "Compliant": |
| st.markdown('<div class="status-compliant">', unsafe_allow_html=True) |
| st.markdown("🟢 **Compliant**") |
| current_section = "compliant" |
| elif line == "Partially Compliant": |
| st.markdown('</div>', unsafe_allow_html=True) |
| st.markdown('<div class="status-partial">', unsafe_allow_html=True) |
| st.markdown("🟡 **Partially Compliant**") |
| current_section = "partial" |
| elif line == "Non-Compliant": |
| st.markdown('</div>', unsafe_allow_html=True) |
| st.markdown('<div class="status-non-compliant">', unsafe_allow_html=True) |
| st.markdown("🔴 **Non-Compliant**") |
| current_section = "non_compliant" |
| elif line.startswith("> "): |
| |
| description = line[2:] |
| st.markdown(f"*{description}*") |
| elif line == "Example Criteria:": |
| st.markdown("**Example Criteria:**") |
| elif line.startswith("- "): |
| |
| criteria = line[2:] |
| st.markdown(f"• {criteria}") |
| elif line and not line.startswith("Example Criteria:"): |
| |
| st.markdown(line) |
| |
| |
| if current_section: |
| st.markdown('</div>', unsafe_allow_html=True) |
| |
| st.markdown('</div>', unsafe_allow_html=True) |
| |
| except FileNotFoundError: |
| st.error("Compliance outline file not found") |
| except Exception as e: |
| st.error(f"Error reading compliance outline: {e}") |
| |
| st.markdown('</div>', unsafe_allow_html=True) |
|
|
| |
| |
| |
| |
| |
| model_option = "claude-sonnet-4-20250514" |
| |
| |
| if st.button("Analyze Compliance"): |
| if st.session_state.requirements_text and st.session_state.uploaded_packaging_files: |
| for packaging_file in st.session_state.uploaded_packaging_files: |
| st.markdown(f"## Analyzing: {packaging_file.name}") |
| |
| |
| progress_bar = st.progress(0) |
| status_text = st.empty() |
| |
| |
| |
| packaging_file.seek(0) |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file: |
| tmp_file.write(packaging_file.read()) |
| tmp_pdf_path = tmp_file.name |
| |
| try: |
| |
| status_text.text("Extracting text from packaging PDF...") |
| google_document_api = GoogleDocumentAPI(credentials_path="src/extract_text/photon-services-f0d3ec1417d0.json") |
| document = google_document_api.process_document(tmp_pdf_path) |
| packaging_text = google_document_api.extract_text_with_markdown_table(document) |
| packaging_data = google_document_api.extract_text_with_bounding_boxes(document) |
| progress_bar.progress(25) |
| |
| |
| status_text.text("Processing packaging image...") |
| try: |
| images = convert_from_path(tmp_pdf_path) |
| if not images: |
| raise ValueError("No pages found in PDF") |
| page_image = images[0] |
| except Exception as e: |
| st.error(f"Error converting PDF to image: {str(e)}") |
| |
| page_image = Image.new('RGB', (800, 600), color='white') |
| draw = ImageDraw.Draw(page_image) |
| draw.text((400, 300), "PDF conversion failed", fill='black', anchor='mm') |
| |
| buffer = BytesIO() |
| page_image.save(buffer, format='PNG') |
| image_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8') |
|
|
| |
| status_text.text("Scanning for barcodes...") |
| barcode = Barcode() |
| barcode_results = barcode.scan_and_validate(page_image) |
|
|
| progress_bar.progress(40) |
| |
| |
| status_text.text("Extracting metadata from packaging...") |
| metadata_extractor = PDFArtworkMetadataExtractor() |
| metadata_results = metadata_extractor.extract_metadata(tmp_pdf_path) |
| |
| |
| if metadata_results and not metadata_results.get('error'): |
| if 'text_colors' in metadata_results: |
| |
| text_colors_str = {} |
| for color_tuple, count in metadata_results['text_colors'].items(): |
| if isinstance(color_tuple, tuple): |
| color_str = f"RGB{color_tuple}" |
| else: |
| color_str = str(color_tuple) |
| text_colors_str[color_str] = count |
| metadata_results['text_colors'] = text_colors_str |
| |
| progress_bar.progress(50) |
| |
| |
| status_text.text("Analyzing requirements and compliance...") |
| st.session_state.analysis_results = ComplianceAnalysis().analyze_compliance( |
| st.session_state.requirements_text, |
| packaging_text, |
| packaging_data, |
| image_base64, |
| barcode_results, |
| metadata_results, |
| model=model_option |
| ) |
| progress_bar.progress(100) |
| status_text.text("Analysis complete!") |
| |
| |
| st.markdown("### Extracted Requirements") |
| if "requirements" in st.session_state.analysis_results: |
| req_df = pd.DataFrame(st.session_state.analysis_results["requirements"]) |
| st.dataframe(req_df) |
| |
| st.markdown("### Verification Results") |
| if "verifications" in st.session_state.analysis_results: |
| |
| tabs = st.tabs(["Summary", "Detailed Results"]) |
| |
| with tabs[0]: |
| |
| if "verifications" in st.session_state.analysis_results: |
| statuses = [v.get("compliance_status", "UNKNOWN") for v in st.session_state.analysis_results["verifications"]] |
| compliant = statuses.count("COMPLIANT") |
| non_compliant = statuses.count("NON-COMPLIANT") |
| partial = statuses.count("PARTIALLY COMPLIANT") |
| error = len(statuses) - compliant - non_compliant - partial |
| |
| |
| col1, col2, col3, col4 = st.columns(4) |
| col1.metric("Compliant", compliant) |
| col2.metric("Non-Compliant", non_compliant) |
| col3.metric("Partially Compliant", partial) |
| col4.metric("Errors", error) |
| |
| |
| if "compliance_report" in st.session_state.analysis_results: |
| st.markdown(st.session_state.analysis_results["compliance_report"]) |
| |
| with tabs[1]: |
| st.markdown("### Barcode Scanning Results") |
| if "barcode_data" in st.session_state.analysis_results and st.session_state.analysis_results["barcode_data"]: |
| barcode_df = pd.DataFrame(st.session_state.analysis_results["barcode_data"]) |
| st.dataframe(barcode_df) |
| |
| |
| valid_barcodes = sum(1 for barcode in st.session_state.analysis_results["barcode_data"] if barcode["valid"]) |
| total_barcodes = len(st.session_state.analysis_results["barcode_data"]) |
| st.markdown(f"**Barcode Summary:** {valid_barcodes}/{total_barcodes} valid barcodes found") |
| else: |
| st.info("No barcodes found in the packaging") |
| |
| |
| st.markdown("### Typography and Design Metadata") |
| if "metadata" in st.session_state.analysis_results and st.session_state.analysis_results["metadata"]: |
| metadata = st.session_state.analysis_results["metadata"] |
| |
| if metadata.get('error'): |
| st.error(f"Metadata extraction error: {metadata['error']}") |
| else: |
| |
| col1, col2 = st.columns(2) |
| |
| with col1: |
| st.markdown("**Extraction Info:**") |
| st.write(f"**Method:** {metadata.get('extraction_method', 'Unknown')}") |
| st.write(f"**Selectable Text:** {'Yes' if metadata.get('has_selectable_text') else 'No'}") |
| st.write(f"**Pages Processed:** {metadata.get('pages_processed', 0)}") |
| |
| with col2: |
| st.markdown("**Dominant Elements:**") |
| if metadata.get('fonts'): |
| dominant_font = max(metadata['fonts'].items(), key=lambda x: x[1])[0] |
| st.write(f"**Font:** {dominant_font}") |
| if metadata.get('font_sizes'): |
| dominant_size = max(metadata['font_sizes'].items(), key=lambda x: x[1])[0] |
| st.write(f"**Font Size:** {dominant_size:.1f}pt") |
| if metadata.get('text_colors'): |
| dominant_color = max(metadata['text_colors'].items(), key=lambda x: x[1])[0] |
| st.write(f"**Text Color:** {dominant_color}") |
| |
| |
| with st.expander("📊 Detailed Font Analysis"): |
| if metadata.get('fonts'): |
| font_df = pd.DataFrame([ |
| {'Font': font, 'Character Count': count} |
| for font, count in list(metadata['fonts'].items())[:10] |
| ]) |
| st.dataframe(font_df) |
| else: |
| st.info("No font data available") |
| |
| with st.expander("📏 Font Size Distribution"): |
| if metadata.get('font_sizes'): |
| size_df = pd.DataFrame([ |
| {'Font Size (pt)': f"{size:.1f}", 'Character Count': count} |
| for size, count in list(metadata['font_sizes'].items())[:10] |
| ]) |
| st.dataframe(size_df) |
| else: |
| st.info("No font size data available") |
| |
| with st.expander("🎨 Text Color Analysis"): |
| if metadata.get('text_colors'): |
| color_df = pd.DataFrame([ |
| {'Color (RGB)': str(color), 'Character Count': count} |
| for color, count in list(metadata['text_colors'].items())[:10] |
| ]) |
| st.dataframe(color_df) |
| else: |
| st.info("No color data available") |
| else: |
| st.info("No metadata available") |
| |
| |
| for i, verification in enumerate(st.session_state.analysis_results["verifications"]): |
| req_id = verification.get("requirement_id", f"REQ{i+1}") |
| text_id = verification.get("Text ID", "Unknown") |
| status = verification.get("compliance_status", "UNKNOWN") |
| |
| |
| if status == "COMPLIANT": |
| status_color = "green" |
| elif status == "NON-COMPLIANT": |
| status_color = "red" |
| elif status == "PARTIALLY COMPLIANT": |
| status_color = "orange" |
| else: |
| status_color = "gray" |
| |
| with st.expander(f"{req_id}: {status}", expanded=status != "COMPLIANT"): |
| |
| if "confidence" in verification: |
| st.progress(verification["confidence"]) |
| |
| |
| if "reasoning" in verification: |
| st.markdown(f"**Reasoning:** {verification['reasoning']}") |
| |
| |
| if "criteria" in verification and verification["criteria"]: |
| st.markdown("**Criteria:**") |
| for criterion in verification["criteria"]: |
| st.markdown(f"- {criterion}") |
| |
| |
| if "evidence_found" in verification and verification["evidence_found"]: |
| st.markdown("**Evidence Found:**") |
| |
| |
| text_evidence = [] |
| visual_evidence = [] |
| barcode_evidence = [] |
| |
| for evidence in verification["evidence_found"]: |
| if "text_id" in evidence and evidence["text_id"] is not None: |
| text_evidence.append(evidence) |
| elif "barcode_id" in evidence and evidence["barcode_id"] is not None: |
| barcode_evidence.append(evidence) |
| else: |
| visual_evidence.append(evidence) |
| |
| |
| if text_evidence: |
| st.markdown("**Text Evidence:**") |
| for evidence in text_evidence: |
| text_id = evidence.get("text_id", "Unknown") |
| evidence_text = evidence.get("evidence_text", "No description") |
| st.markdown(f"- **Text ID {text_id}:** {evidence_text}") |
| |
| |
| if barcode_evidence: |
| st.markdown("**Barcode Evidence:**") |
| for evidence in barcode_evidence: |
| barcode_id = evidence.get("barcode_id", "Unknown") |
| evidence_text = evidence.get("evidence_text", "No description") |
| st.markdown(f"- **Barcode ID {barcode_id}:** {evidence_text}") |
| |
| |
| if visual_evidence: |
| st.markdown("**Visual Evidence (from image analysis):**") |
| for i, evidence in enumerate(visual_evidence, 1): |
| evidence_text = evidence.get("evidence_text", "Visual element referenced by Claude") |
| st.markdown(f"- **Visual {i}:** {evidence_text}") |
| |
| |
| total_evidence = len(verification["evidence_found"]) |
| st.markdown(f"*Total evidence: {total_evidence} ({len(text_evidence)} text, {len(barcode_evidence)} barcode, {len(visual_evidence)} visual)*") |
| |
| |
| if "evidence_found" in verification and verification["evidence_found"]: |
| st.markdown(f"### Evidence Visualization for {req_id}") |
| |
| |
| try: |
| draw_image = page_image.copy() |
| draw = ImageDraw.Draw(draw_image) |
| img_width, img_height = draw_image.size |
| |
| |
| status_colors = { |
| "COMPLIANT": "green", |
| "NON-COMPLIANT": "red", |
| "PARTIALLY COMPLIANT": "orange", |
| "ERROR": "purple", |
| "UNKNOWN": "gray" |
| } |
| |
| |
| color = status_colors.get(status, "gray") |
| |
| |
| st.markdown(f"**Status:** <span style='color:{color}'>■</span> {status}", unsafe_allow_html=True) |
| |
| |
| text_evidence_count = 0 |
| visual_evidence_count = 0 |
| barcode_evidence_count = 0 |
| |
| |
| if "packaging_data" in st.session_state.analysis_results: |
| for evidence in verification["evidence_found"]: |
| if "text_id" in evidence and evidence["text_id"] is not None: |
| |
| text_id = evidence["text_id"] |
| try: |
| |
| if isinstance(text_id, (int, float)) or (isinstance(text_id, str) and text_id.isdigit()): |
| |
| numeric_id = int(text_id) |
| item = st.session_state.analysis_results["packaging_data"][numeric_id - 1] |
| box = item["bounding_box"] |
| |
| |
| points = [(v['x'] * img_width, v['y'] * img_height) for v in box] |
| |
| |
| draw.polygon(points, outline=color, width=3) |
| |
| |
| text_evidence_count += 1 |
| label = f"Text Evidence {text_evidence_count}" |
| draw.text(points[0], label, fill="white", stroke_width=2, stroke_fill="black") |
| else: |
| |
| text_evidence_count += 1 |
| st.info(f"Text Evidence {text_evidence_count}: {evidence.get('evidence_text', 'Text element referenced by Claude')} (ID: {text_id})") |
| |
| except (IndexError, KeyError) as e: |
| st.warning(f"Could not find bounding box for Text ID {text_id}: {e}") |
| elif "barcode_id" in evidence and evidence["barcode_id"] is not None: |
| |
| barcode_id = evidence["barcode_id"] |
| try: |
| |
| barcode_found = None |
| for barcode in st.session_state.analysis_results.get("barcode_data", []): |
| if barcode["id"] == barcode_id: |
| barcode_found = barcode |
| break |
| |
| if barcode_found: |
| pos = barcode_found["position"] |
| x, y = pos["x"], pos["y"] |
| w, h = pos["width"], pos["height"] |
| |
| |
| draw.rectangle([x, y, x + w, y + h], outline=color, width=3) |
| |
| |
| barcode_evidence_count += 1 |
| label = f"Barcode Evidence {barcode_evidence_count}" |
| draw.text((x, y - 20), label, fill="white", stroke_width=2, stroke_fill="black") |
| |
| |
| barcode_info = f"{barcode_found['type']}: {barcode_found['data']}" |
| draw.text((x, y - 40), barcode_info, fill="white", stroke_width=2, stroke_fill="black") |
| else: |
| st.warning(f"Could not find barcode data for Barcode ID {barcode_id}") |
| |
| except Exception as e: |
| st.warning(f"Could not draw barcode bounding box for Barcode ID {barcode_id}: {e}") |
| else: |
| |
| visual_evidence_count += 1 |
| st.info(f"Visual Evidence {visual_evidence_count}: {evidence.get('evidence_text', 'Visual element referenced by Claude')}") |
| |
| |
| if text_evidence_count > 0 or visual_evidence_count > 0 or barcode_evidence_count > 0: |
| |
| evidence_summary = [] |
| if text_evidence_count > 0: |
| evidence_summary.append(f"{text_evidence_count} text") |
| if barcode_evidence_count > 0: |
| evidence_summary.append(f"{barcode_evidence_count} barcode") |
| if visual_evidence_count > 0: |
| evidence_summary.append(f"{visual_evidence_count} visual") |
| |
| st.markdown(f"**Evidence Count:** {', '.join(evidence_summary)}") |
| |
| st.image(ImageUtils.crop_image(draw_image), caption=f"Evidence for {req_id} - {status}", use_container_width=True) |
| else: |
| st.info(f"No visual evidence found for {req_id}") |
| else: |
| |
| evidence_counts = { |
| 'text': len([e for e in verification["evidence_found"] if "text_id" in e and e["text_id"] is not None]), |
| 'barcode': len([e for e in verification["evidence_found"] if "barcode_id" in e and e["barcode_id"] is not None]), |
| 'visual': len([e for e in verification["evidence_found"] if ("text_id" not in e or e["text_id"] is None) and ("barcode_id" not in e or e["barcode_id"] is None)]) |
| } |
| |
| total_evidence = sum(evidence_counts.values()) |
| if total_evidence > 0: |
| evidence_summary = [] |
| if evidence_counts['text'] > 0: |
| evidence_summary.append(f"{evidence_counts['text']} text") |
| if evidence_counts['barcode'] > 0: |
| evidence_summary.append(f"{evidence_counts['barcode']} barcode") |
| if evidence_counts['visual'] > 0: |
| evidence_summary.append(f"{evidence_counts['visual']} visual") |
| |
| st.info(f"Evidence Count: {', '.join(evidence_summary)} (no bounding box data available)") |
| |
| st.image(ImageUtils.crop_image(page_image), caption=f"Original image for {req_id} - {status}", use_container_width=True) |
| else: |
| st.info("No packaging data available for visualization") |
| |
| except Exception as e: |
| st.error(f"Failed to generate visualization for {req_id}: {e}") |
| else: |
| st.info(f"No evidence found for {req_id}") |
|
|
| except Exception as e: |
| st.error(f"Error analyzing {packaging_file.name}: {str(e)}") |
| |
| finally: |
| |
| if os.path.exists(tmp_pdf_path): |
| os.unlink(tmp_pdf_path) |
| else: |
| st.warning("Please upload a requirements document and at least one packaging PDF.") |
| |
| |
| st.markdown("---") |
| st.markdown(""" |
| ### How It Works |
| 1. **Upload Requirements**: The system extracts structured requirements from your document |
| 2. **Upload Packaging**: We extract text from PDFs and analyze them against requirements |
| 3. **Analysis**: Each requirement is verified using structured reasoning and semantic matching |
| """) |
|
|
| if __name__ == "__main__": |
| |
| import pandas as pd |
| main() |