| import streamlit as st |
| import tempfile |
| import os |
| import pandas as pd |
| from src.extract_text.google_document_api import GoogleDocumentAPI |
| from pdf2image import convert_from_path |
| from PIL import Image, ImageDraw, ImageFont |
| from src.utils.image_utils import ImageUtils |
| import base64 |
| from io import BytesIO |
| from src.utils.barcode import Barcode |
| import anthropic |
| import json |
|
|
| def load_client_artwork_files(): |
| """Load all artwork PDF files from client directory""" |
| base_path = "requirements_library/client-requirements" |
| artwork_files = [] |
| |
| if not os.path.exists(base_path): |
| return artwork_files |
| |
| |
| for root, dirs, files in os.walk(base_path): |
| for file in files: |
| file_path = os.path.join(root, file) |
| relative_path = os.path.relpath(file_path, base_path) |
| |
| if file.lower().endswith('.pdf'): |
| artwork_files.append({ |
| 'name': f"{relative_path}", |
| 'path': file_path, |
| 'type': 'artwork' |
| }) |
| |
| return artwork_files |
|
|
| def load_artwork_content(file_info): |
| """Load artwork content as bytes""" |
| try: |
| with open(file_info['path'], 'rb') as f: |
| return f.read() |
| except Exception as e: |
| st.error(f"Error loading artwork file {file_info['name']}: {str(e)}") |
| return None |
|
|
| def extract_pdf_data(pdf_file, file_name): |
| """Extract text, bounding boxes, images, and barcodes from PDF""" |
| try: |
| |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file: |
| pdf_file.seek(0) |
| tmp_file.write(pdf_file.read()) |
| tmp_pdf_path = tmp_file.name |
| |
| |
| google_document_api = GoogleDocumentAPI(credentials_path="src/extract_text/photon-services-f0d3ec1417d0.json") |
| document = google_document_api.process_document(tmp_pdf_path) |
| text_content = google_document_api.extract_text_with_markdown_table(document) |
| bounding_boxes = google_document_api.extract_text_with_bounding_boxes(document) |
| |
| |
| try: |
| images = convert_from_path(tmp_pdf_path) |
| if not images: |
| raise ValueError("No pages found in PDF") |
| page_image = images[0] |
| except Exception as e: |
| st.error(f"Error converting PDF to image: {str(e)}") |
| |
| page_image = Image.new('RGB', (800, 600), color='white') |
| draw = ImageDraw.Draw(page_image) |
| draw.text((400, 300), "PDF conversion failed", fill='black', anchor='mm') |
| |
| |
| processed_image, quality, file_size = ImageUtils.process_image_for_comparison( |
| page_image, |
| target_size=(1200, 1600), |
| max_size_bytes=1024 * 1024 |
| ) |
| |
| |
| image_base64 = ImageUtils.image_to_base64_optimized( |
| page_image, |
| target_size=(1200, 1600), |
| max_size_bytes=1024 * 1024 |
| ) |
| |
| |
| barcode = Barcode() |
| barcode_results = barcode.scan_and_validate(page_image) |
| |
| |
| if os.path.exists(tmp_pdf_path): |
| os.unlink(tmp_pdf_path) |
| |
| return { |
| 'text_content': text_content, |
| 'bounding_boxes': bounding_boxes, |
| 'image': processed_image, |
| 'original_image': page_image, |
| 'image_base64': image_base64, |
| 'barcode_results': barcode_results, |
| 'file_name': file_name, |
| 'image_quality': quality, |
| 'image_size_bytes': file_size |
| } |
| |
| except Exception as e: |
| st.error(f"Error processing PDF {file_name}: {str(e)}") |
| return None |
|
|
| def compare_artworks_with_claude(artwork1_data, artwork2_data, model="claude-sonnet-4-20250514"): |
| """Compare two artworks using Claude API""" |
| |
| |
| prompt = f""" |
| You are an expert packaging compliance analyzer. Compare these two artwork PDFs and provide a detailed analysis of their differences and similarities. |
| |
| ## Artwork 1: {artwork1_data['file_name']} |
| **Text Content:** |
| {artwork1_data['text_content']} |
| |
| **Bounding Box Data:** |
| {json.dumps(artwork1_data['bounding_boxes'][:10], indent=2) if artwork1_data['bounding_boxes'] else "No text elements detected"} |
| |
| **Barcode Data:** |
| {json.dumps(artwork1_data['barcode_results'], indent=2) if artwork1_data['barcode_results'] else "No barcodes detected"} |
| |
| ## Artwork 2: {artwork2_data['file_name']} |
| **Text Content:** |
| {artwork2_data['text_content']} |
| |
| **Bounding Box Data:** |
| {json.dumps(artwork2_data['bounding_boxes'][:10], indent=2) if artwork2_data['bounding_boxes'] else "No text elements detected"} |
| |
| **Barcode Data:** |
| {json.dumps(artwork2_data['barcode_results'], indent=2) if artwork2_data['barcode_results'] else "No barcodes detected"} |
| |
| Please provide a comprehensive comparison analysis in the following JSON format: |
| |
| {{ |
| "overall_similarity": 0.85, |
| "comparison_summary": "Brief overview of the comparison results", |
| "text_differences": [ |
| {{ |
| "category": "Missing Text", |
| "artwork1_content": "Text found only in artwork 1", |
| "artwork2_content": "Text found only in artwork 2", |
| "significance": "HIGH/MEDIUM/LOW", |
| "description": "Detailed explanation of the difference" |
| }} |
| ], |
| "layout_differences": [ |
| {{ |
| "category": "Position Changes", |
| "element": "Element that moved", |
| "artwork1_position": "Description of position in artwork 1", |
| "artwork2_position": "Description of position in artwork 2", |
| "significance": "HIGH/MEDIUM/LOW", |
| "description": "Impact of this change" |
| }} |
| ], |
| "barcode_differences": [ |
| {{ |
| "category": "Barcode Changes", |
| "artwork1_barcodes": "Description of barcodes in artwork 1", |
| "artwork2_barcodes": "Description of barcodes in artwork 2", |
| "significance": "HIGH/MEDIUM/LOW", |
| "description": "Analysis of barcode differences" |
| }} |
| ], |
| "visual_differences": [ |
| {{ |
| "category": "Visual Elements", |
| "description": "Description of visual differences observed in the images", |
| "significance": "HIGH/MEDIUM/LOW", |
| "recommendation": "Suggested action or consideration" |
| }} |
| ], |
| "compliance_impact": [ |
| {{ |
| "area": "Regulatory compliance area affected", |
| "impact": "Description of potential compliance impact", |
| "risk_level": "HIGH/MEDIUM/LOW", |
| "recommendation": "Recommended action" |
| }} |
| ], |
| "recommendations": [ |
| "List of actionable recommendations based on the comparison" |
| ] |
| }} |
| |
| Analyze both the textual content and visual elements. Pay special attention to: |
| 1. Missing or changed text elements |
| 2. Repositioned elements that might affect readability |
| 3. Barcode differences that could impact functionality |
| 4. Visual changes that might affect brand consistency or compliance |
| 5. Any changes that could impact regulatory compliance |
| |
| Provide specific, actionable insights that would be valuable for quality control and compliance verification. |
| """ |
|
|
| try: |
| |
| client = anthropic.Anthropic(api_key=os.getenv('CLAUDE_API_KEY')) |
| |
| |
| message = client.messages.create( |
| model=model, |
| max_tokens=4000, |
| messages=[ |
| { |
| "role": "user", |
| "content": [ |
| { |
| "type": "text", |
| "text": prompt |
| }, |
| { |
| "type": "image", |
| "source": { |
| "type": "base64", |
| "media_type": "image/png", |
| "data": artwork1_data['image_base64'] |
| } |
| }, |
| { |
| "type": "image", |
| "source": { |
| "type": "base64", |
| "media_type": "image/png", |
| "data": artwork2_data['image_base64'] |
| } |
| } |
| ] |
| } |
| ] |
| ) |
| |
| |
| response_text = "" |
| for content_block in message.content: |
| if hasattr(content_block, 'type') and content_block.type == 'text': |
| response_text += content_block.text |
| |
| |
| try: |
| |
| start_idx = response_text.find('{') |
| end_idx = response_text.rfind('}') + 1 |
| |
| if start_idx != -1 and end_idx != -1: |
| json_str = response_text[start_idx:end_idx] |
| comparison_results = json.loads(json_str) |
| else: |
| |
| comparison_results = { |
| "overall_similarity": 0.5, |
| "comparison_summary": "Analysis completed but JSON parsing failed", |
| "raw_response": response_text, |
| "text_differences": [], |
| "layout_differences": [], |
| "barcode_differences": [], |
| "visual_differences": [], |
| "compliance_impact": [], |
| "recommendations": ["Review the raw analysis output for detailed insights"] |
| } |
| except json.JSONDecodeError: |
| |
| comparison_results = { |
| "overall_similarity": 0.5, |
| "comparison_summary": "Analysis completed but structured parsing failed", |
| "raw_response": response_text, |
| "text_differences": [], |
| "layout_differences": [], |
| "barcode_differences": [], |
| "visual_differences": [], |
| "compliance_impact": [], |
| "recommendations": ["Review the raw analysis output for detailed insights"] |
| } |
| |
| return comparison_results |
| |
| except Exception as e: |
| st.error(f"Error calling Claude API: {str(e)}") |
| return None |
|
|
| def display_comparison_results(results, artwork1_data, artwork2_data): |
| """Display the comparison results in a structured format""" |
| |
| if not results: |
| st.error("No comparison results to display") |
| return |
| |
| |
| st.markdown("## π Comparison Summary") |
| |
| col1, col2, col3 = st.columns(3) |
| with col1: |
| similarity = results.get('overall_similarity', 0.5) |
| st.metric("Overall Similarity", f"{similarity:.1%}") |
| |
| with col2: |
| total_differences = ( |
| len(results.get('text_differences', [])) + |
| len(results.get('layout_differences', [])) + |
| len(results.get('barcode_differences', [])) + |
| len(results.get('visual_differences', [])) |
| ) |
| st.metric("Total Differences", total_differences) |
| |
| with col3: |
| compliance_impacts = len(results.get('compliance_impact', [])) |
| st.metric("Compliance Impacts", compliance_impacts) |
| |
| |
| if 'comparison_summary' in results: |
| st.markdown(f"**Summary:** {results['comparison_summary']}") |
| |
| |
| tabs = st.tabs(["π Text Differences", "π Layout Changes", "π± Barcode Changes", "π¨ Visual Differences", "βοΈ Compliance Impact", "π‘ Recommendations"]) |
| |
| with tabs[0]: |
| st.markdown("### Text Content Differences") |
| text_diffs = results.get('text_differences', []) |
| if text_diffs: |
| for i, diff in enumerate(text_diffs): |
| significance_color = {"HIGH": "π΄", "MEDIUM": "π‘", "LOW": "π’"}.get(diff.get('significance', 'MEDIUM'), "π‘") |
| |
| with st.expander(f"{significance_color} {diff.get('category', 'Text Difference')} - {diff.get('significance', 'MEDIUM')} Impact"): |
| col1, col2 = st.columns(2) |
| with col1: |
| st.markdown(f"**{artwork1_data['file_name']}:**") |
| st.text(diff.get('artwork1_content', 'N/A')) |
| with col2: |
| st.markdown(f"**{artwork2_data['file_name']}:**") |
| st.text(diff.get('artwork2_content', 'N/A')) |
| |
| st.markdown(f"**Description:** {diff.get('description', 'No description available')}") |
| else: |
| st.info("No significant text differences found") |
| |
| with tabs[1]: |
| st.markdown("### Layout and Positioning Changes") |
| layout_diffs = results.get('layout_differences', []) |
| if layout_diffs: |
| for diff in layout_diffs: |
| significance_color = {"HIGH": "π΄", "MEDIUM": "π‘", "LOW": "π’"}.get(diff.get('significance', 'MEDIUM'), "π‘") |
| |
| with st.expander(f"{significance_color} {diff.get('category', 'Layout Change')} - {diff.get('significance', 'MEDIUM')} Impact"): |
| st.markdown(f"**Element:** {diff.get('element', 'Unknown element')}") |
| |
| col1, col2 = st.columns(2) |
| with col1: |
| st.markdown(f"**Position in {artwork1_data['file_name']}:**") |
| st.text(diff.get('artwork1_position', 'N/A')) |
| with col2: |
| st.markdown(f"**Position in {artwork2_data['file_name']}:**") |
| st.text(diff.get('artwork2_position', 'N/A')) |
| |
| st.markdown(f"**Impact:** {diff.get('description', 'No description available')}") |
| else: |
| st.info("No significant layout differences found") |
| |
| with tabs[2]: |
| st.markdown("### Barcode Differences") |
| barcode_diffs = results.get('barcode_differences', []) |
| if barcode_diffs: |
| for diff in barcode_diffs: |
| significance_color = {"HIGH": "π΄", "MEDIUM": "π‘", "LOW": "π’"}.get(diff.get('significance', 'MEDIUM'), "π‘") |
| |
| with st.expander(f"{significance_color} {diff.get('category', 'Barcode Change')} - {diff.get('significance', 'MEDIUM')} Impact"): |
| col1, col2 = st.columns(2) |
| with col1: |
| st.markdown(f"**{artwork1_data['file_name']} Barcodes:**") |
| st.text(diff.get('artwork1_barcodes', 'N/A')) |
| with col2: |
| st.markdown(f"**{artwork2_data['file_name']} Barcodes:**") |
| st.text(diff.get('artwork2_barcodes', 'N/A')) |
| |
| st.markdown(f"**Analysis:** {diff.get('description', 'No description available')}") |
| else: |
| st.info("No significant barcode differences found") |
| |
| with tabs[3]: |
| st.markdown("### Visual and Design Differences") |
| visual_diffs = results.get('visual_differences', []) |
| if visual_diffs: |
| for diff in visual_diffs: |
| significance_color = {"HIGH": "π΄", "MEDIUM": "π‘", "LOW": "π’"}.get(diff.get('significance', 'MEDIUM'), "π‘") |
| |
| with st.expander(f"{significance_color} {diff.get('category', 'Visual Change')} - {diff.get('significance', 'MEDIUM')} Impact"): |
| st.markdown(f"**Description:** {diff.get('description', 'No description available')}") |
| if 'recommendation' in diff: |
| st.markdown(f"**Recommendation:** {diff['recommendation']}") |
| else: |
| st.info("No significant visual differences found") |
| |
| with tabs[4]: |
| st.markdown("### Compliance and Regulatory Impact") |
| compliance_impacts = results.get('compliance_impact', []) |
| if compliance_impacts: |
| for impact in compliance_impacts: |
| risk_color = {"HIGH": "π΄", "MEDIUM": "π‘", "LOW": "π’"}.get(impact.get('risk_level', 'MEDIUM'), "π‘") |
| |
| with st.expander(f"{risk_color} {impact.get('area', 'Compliance Area')} - {impact.get('risk_level', 'MEDIUM')} Risk"): |
| st.markdown(f"**Impact:** {impact.get('impact', 'No description available')}") |
| st.markdown(f"**Recommendation:** {impact.get('recommendation', 'No recommendation provided')}") |
| else: |
| st.success("No compliance impacts identified") |
| |
| with tabs[5]: |
| st.markdown("### Action Items and Recommendations") |
| recommendations = results.get('recommendations', []) |
| if recommendations: |
| for i, rec in enumerate(recommendations, 1): |
| st.markdown(f"{i}. {rec}") |
| else: |
| st.info("No specific recommendations provided") |
| |
| |
| if 'raw_response' in results: |
| with st.expander("π Raw Analysis Output"): |
| st.text(results['raw_response']) |
|
|
| def display_side_by_side_images(artwork1_data, artwork2_data): |
| """Display the two artwork images side by side""" |
| st.markdown("## πΌοΈ Side-by-Side Comparison") |
| |
| col1, col2 = st.columns(2) |
| |
| with col1: |
| st.markdown(f"### {artwork1_data['file_name']}") |
| st.image(ImageUtils.crop_image(artwork1_data['image']), caption=artwork1_data['file_name'], use_container_width=True) |
| |
| |
| if 'image_quality' in artwork1_data and 'image_size_bytes' in artwork1_data: |
| quality = artwork1_data['image_quality'] |
| size_mb = artwork1_data['image_size_bytes'] / (1024 * 1024) |
| st.info(f"π Image Quality: {quality}% | Size: {size_mb:.2f}MB") |
| |
| |
| with st.expander("π Extracted Data Summary"): |
| text_elements = len(artwork1_data['bounding_boxes']) if artwork1_data['bounding_boxes'] else 0 |
| barcodes = len(artwork1_data['barcode_results']) if artwork1_data['barcode_results'] else 0 |
| st.metric("Text Elements", text_elements) |
| st.metric("Barcodes", barcodes) |
| |
| with col2: |
| st.markdown(f"### {artwork2_data['file_name']}") |
| st.image(ImageUtils.crop_image(artwork2_data['image']), caption=artwork2_data['file_name'], use_container_width=True) |
| |
| |
| if 'image_quality' in artwork2_data and 'image_size_bytes' in artwork2_data: |
| quality = artwork2_data['image_quality'] |
| size_mb = artwork2_data['image_size_bytes'] / (1024 * 1024) |
| st.info(f"π Image Quality: {quality}% | Size: {size_mb:.2f}MB") |
| |
| |
| with st.expander("π Extracted Data Summary"): |
| text_elements = len(artwork2_data['bounding_boxes']) if artwork2_data['bounding_boxes'] else 0 |
| barcodes = len(artwork2_data['barcode_results']) if artwork2_data['barcode_results'] else 0 |
| st.metric("Text Elements", text_elements) |
| st.metric("Barcodes", barcodes) |
|
|
| def main(): |
| st.set_page_config(layout="wide", page_title="Artwork Comparison Tool") |
| |
| |
| client_artwork_files = load_client_artwork_files() |
| |
| |
| if "artwork1_data" not in st.session_state: |
| st.session_state.artwork1_data = None |
| if "artwork2_data" not in st.session_state: |
| st.session_state.artwork2_data = None |
| if "comparison_results" not in st.session_state: |
| st.session_state.comparison_results = None |
| |
| st.title("π¨ Artwork Comparison Tool") |
| st.write("Compare two packaging artwork PDFs to identify differences in text, layout, barcodes, and visual elements.") |
| |
| |
| st.markdown("## π Select Artworks to Compare") |
| |
| col1, col2 = st.columns(2) |
| |
| with col1: |
| st.markdown("### π¨ Artwork 1") |
| |
| |
| art1_tab1, art1_tab2 = st.tabs(["π Client Files", "π€ Upload New"]) |
| |
| with art1_tab1: |
| if client_artwork_files: |
| art1_options = ["Select artwork 1..."] + [f["name"] for f in client_artwork_files] |
| selected_art1_file = st.selectbox("Choose artwork 1:", art1_options, key="art1_select") |
| |
| if selected_art1_file != "Select artwork 1...": |
| |
| for file_info in client_artwork_files: |
| if file_info["name"] == selected_art1_file: |
| file_content = load_artwork_content(file_info) |
| if file_content: |
| import io |
| temp_file = io.BytesIO(file_content) |
| temp_file.name = file_info["name"] |
| |
| |
| with st.spinner("Processing artwork 1..."): |
| st.session_state.artwork1_data = extract_pdf_data(temp_file, file_info["name"]) |
| |
| if st.session_state.artwork1_data: |
| st.success(f"β
Loaded artwork 1: {selected_art1_file}") |
| break |
| else: |
| st.info("No client artwork files found") |
| |
| with art1_tab2: |
| artwork1_file = st.file_uploader("Upload Artwork 1 (PDF)", type=["pdf"], key="art1_upload") |
| |
| if artwork1_file: |
| with st.spinner("Processing artwork 1..."): |
| st.session_state.artwork1_data = extract_pdf_data(artwork1_file, artwork1_file.name) |
| |
| if st.session_state.artwork1_data: |
| st.success(f"β
Uploaded artwork 1: {artwork1_file.name}") |
| |
| with col2: |
| st.markdown("### π¨ Artwork 2") |
| |
| |
| art2_tab1, art2_tab2 = st.tabs(["π Client Files", "π€ Upload New"]) |
| |
| with art2_tab1: |
| if client_artwork_files: |
| art2_options = ["Select artwork 2..."] + [f["name"] for f in client_artwork_files] |
| selected_art2_file = st.selectbox("Choose artwork 2:", art2_options, key="art2_select") |
| |
| if selected_art2_file != "Select artwork 2...": |
| |
| for file_info in client_artwork_files: |
| if file_info["name"] == selected_art2_file: |
| file_content = load_artwork_content(file_info) |
| if file_content: |
| import io |
| temp_file = io.BytesIO(file_content) |
| temp_file.name = file_info["name"] |
| |
| |
| with st.spinner("Processing artwork 2..."): |
| st.session_state.artwork2_data = extract_pdf_data(temp_file, file_info["name"]) |
| |
| if st.session_state.artwork2_data: |
| st.success(f"β
Loaded artwork 2: {selected_art2_file}") |
| break |
| else: |
| st.info("No client artwork files found") |
| |
| with art2_tab2: |
| artwork2_file = st.file_uploader("Upload Artwork 2 (PDF)", type=["pdf"], key="art2_upload") |
| |
| if artwork2_file: |
| with st.spinner("Processing artwork 2..."): |
| st.session_state.artwork2_data = extract_pdf_data(artwork2_file, artwork2_file.name) |
| |
| if st.session_state.artwork2_data: |
| st.success(f"β
Uploaded artwork 2: {artwork2_file.name}") |
| |
| |
| if st.session_state.artwork1_data and st.session_state.artwork2_data: |
| display_side_by_side_images(st.session_state.artwork1_data, st.session_state.artwork2_data) |
| |
| |
| model_option = "claude-sonnet-4-20250514" |
| |
| |
| if st.button("π Compare Artworks", type="primary"): |
| if st.session_state.artwork1_data and st.session_state.artwork2_data: |
| with st.spinner("Analyzing artworks with Claude..."): |
| st.session_state.comparison_results = compare_artworks_with_claude( |
| st.session_state.artwork1_data, |
| st.session_state.artwork2_data, |
| model=model_option |
| ) |
| |
| if st.session_state.comparison_results: |
| st.success("β
Comparison analysis complete!") |
| else: |
| st.error("β Comparison analysis failed") |
| else: |
| st.warning("β οΈ Please select or upload both artworks before comparing") |
| |
| |
| if st.session_state.comparison_results: |
| display_comparison_results( |
| st.session_state.comparison_results, |
| st.session_state.artwork1_data, |
| st.session_state.artwork2_data |
| ) |
| |
| |
| st.markdown("---") |
| st.markdown(""" |
| ### π οΈ How It Works |
| 1. **Extract Content**: The tool extracts text, bounding boxes, images, and barcodes from both PDFs |
| 2. **AI Analysis**: Claude analyzes the extracted data and visual elements to identify differences |
| 3. **Structured Results**: Differences are categorized by type (text, layout, barcode, visual) and significance |
| 4. **Compliance Assessment**: Potential compliance impacts are identified with risk levels and recommendations |
| |
| ### π― Use Cases |
| - **Quality Control**: Verify artwork changes between versions |
| - **Brand Consistency**: Ensure visual elements remain consistent |
| - **Compliance Review**: Identify changes that might affect regulatory compliance |
| - **Change Documentation**: Track and document artwork modifications |
| """) |
|
|
| if __name__ == "__main__": |
| main() |