Spaces:
Paused
Paused
| import gradio as gr | |
| from openai import OpenAI | |
| import base64 | |
| from pathlib import Path | |
| import json | |
| from typing import List, Tuple, Optional | |
| import time | |
| from PIL import Image | |
| import io | |
| import sys | |
| # Global client variable | |
| client = None | |
| def initialize_client(api_key: str) -> Tuple[str, bool]: | |
| """Initialize OpenAI client with OpenRouter""" | |
| global client | |
| if not api_key or not api_key.strip(): | |
| return "β οΈ Please enter a valid API key", False | |
| try: | |
| client = OpenAI( | |
| base_url="https://openrouter.ai/api/v1", | |
| api_key=api_key.strip() | |
| ) | |
| return "β API Key configured successfully! You can now start chatting.", True | |
| except Exception as e: | |
| return f"β Error initializing client: {str(e)}", False | |
| def encode_image(image_path: str) -> str: | |
| """Encode image to base64""" | |
| with open(image_path, "rb") as image_file: | |
| return base64.b64encode(image_file.read()).decode('utf-8') | |
| def pdf_to_images_pymupdf(pdf_path: str) -> List[Image.Image]: | |
| """Convert PDF to images using PyMuPDF (primary method)""" | |
| try: | |
| import fitz # PyMuPDF | |
| doc = fitz.open(pdf_path) | |
| images = [] | |
| for page_num in range(len(doc)): | |
| page = doc[page_num] | |
| # Render at 2x resolution for better quality | |
| mat = fitz.Matrix(2, 2) | |
| pix = page.get_pixmap(matrix=mat) | |
| # Convert to PIL Image | |
| img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) | |
| images.append(img) | |
| doc.close() | |
| return images | |
| except Exception as e: | |
| raise Exception(f"PyMuPDF error: {str(e)}") | |
| def pdf_to_images_pdf2image(pdf_path: str) -> List[Image.Image]: | |
| """Convert PDF to images using pdf2image (requires poppler)""" | |
| try: | |
| from pdf2image import convert_from_path | |
| images = convert_from_path(pdf_path, dpi=200) | |
| return images | |
| except Exception as e: | |
| raise Exception(f"pdf2image error: {str(e)}") | |
| def pdf_to_images(pdf_path: str) -> Tuple[List[Image.Image], str]: | |
| """ | |
| Convert PDF to images with multiple fallback methods | |
| Returns: (list of images, method used or error message) | |
| """ | |
| # Try PyMuPDF first (doesn't require poppler) | |
| try: | |
| images = pdf_to_images_pymupdf(pdf_path) | |
| return images, "PyMuPDF" | |
| except Exception as e1: | |
| pymupdf_error = str(e1) | |
| # Try pdf2image as fallback | |
| try: | |
| images = pdf_to_images_pdf2image(pdf_path) | |
| return images, "pdf2image" | |
| except Exception as e2: | |
| pdf2image_error = str(e2) | |
| # Both methods failed | |
| error_msg = f"""PDF conversion failed. Tried multiple methods: | |
| 1. PyMuPDF: {pymupdf_error} | |
| 2. pdf2image: {pdf2image_error} | |
| SOLUTION: | |
| Install PyMuPDF (recommended - no external dependencies): | |
| pip install PyMuPDF | |
| OR install pdf2image + poppler: | |
| pip install pdf2image | |
| Then install poppler: | |
| - Ubuntu/Debian: sudo apt-get install poppler-utils | |
| - macOS: brew install poppler | |
| - Windows: Download from https://github.com/oschwartz10612/poppler-windows/releases/ | |
| """ | |
| raise Exception(error_msg) | |
| def image_to_base64(image: Image.Image, format: str = "PNG") -> str: | |
| """Convert PIL Image to base64""" | |
| buffered = io.BytesIO() | |
| # Convert RGBA to RGB if needed | |
| if image.mode == 'RGBA': | |
| background = Image.new('RGB', image.size, (255, 255, 255)) | |
| background.paste(image, mask=image.split()[3]) | |
| image = background | |
| elif image.mode != 'RGB': | |
| image = image.convert('RGB') | |
| image.save(buffered, format=format, quality=95) | |
| return base64.b64encode(buffered.getvalue()).decode('utf-8') | |
| def process_file(file_path: str) -> Tuple[List[dict], str]: | |
| """ | |
| Process a file and return content blocks for API | |
| Returns: (content_blocks, status_message) | |
| """ | |
| file_extension = Path(file_path).suffix.lower() | |
| file_name = Path(file_path).name | |
| content_blocks = [] | |
| status_message = "" | |
| try: | |
| if file_extension == '.pdf': | |
| # Convert PDF pages to images | |
| images, method = pdf_to_images(file_path) | |
| status_message = f"β PDF '{file_name}' converted to {len(images)} page(s) using {method}" | |
| for idx, img in enumerate(images, 1): | |
| base64_image = image_to_base64(img, format="JPEG") | |
| content_blocks.append({ | |
| "type": "image_url", | |
| "image_url": { | |
| "url": f"data:image/jpeg;base64,{base64_image}" | |
| } | |
| }) | |
| elif file_extension == '.txt': | |
| # Read text file | |
| try: | |
| with open(file_path, 'r', encoding='utf-8') as f: | |
| text_content = f.read() | |
| except UnicodeDecodeError: | |
| # Try with different encoding | |
| with open(file_path, 'r', encoding='latin-1') as f: | |
| text_content = f.read() | |
| status_message = f"β Text file '{file_name}' loaded ({len(text_content)} characters)" | |
| content_blocks.append({ | |
| "type": "text", | |
| "text": f"π Content from '{file_name}':\n\n{text_content}" | |
| }) | |
| else: | |
| # Handle image files | |
| # Determine MIME type | |
| mime_type = "image/jpeg" | |
| if file_extension in ['.png']: | |
| mime_type = "image/png" | |
| elif file_extension in ['.webp']: | |
| mime_type = "image/webp" | |
| elif file_extension in ['.gif']: | |
| mime_type = "image/gif" | |
| elif file_extension in ['.bmp']: | |
| mime_type = "image/bmp" | |
| elif file_extension in ['.tiff', '.tif']: | |
| mime_type = "image/tiff" | |
| # Load and potentially convert the image | |
| try: | |
| img = Image.open(file_path) | |
| # Convert to RGB if necessary | |
| if img.mode in ('RGBA', 'LA', 'P'): | |
| background = Image.new('RGB', img.size, (255, 255, 255)) | |
| if img.mode == 'P': | |
| img = img.convert('RGBA') | |
| if img.mode in ('RGBA', 'LA'): | |
| background.paste(img, mask=img.split()[-1] if img.mode in ('RGBA', 'LA') else None) | |
| img = background | |
| elif img.mode != 'RGB': | |
| img = img.convert('RGB') | |
| # Convert to base64 | |
| base64_image = image_to_base64(img, format="JPEG") | |
| status_message = f"β Image '{file_name}' loaded ({img.width}x{img.height})" | |
| content_blocks.append({ | |
| "type": "image_url", | |
| "image_url": { | |
| "url": f"data:image/jpeg;base64,{base64_image}" | |
| } | |
| }) | |
| except Exception as img_error: | |
| # If image processing fails, try direct base64 encoding | |
| base64_image = encode_image(file_path) | |
| status_message = f"β Image '{file_name}' loaded (direct encoding)" | |
| content_blocks.append({ | |
| "type": "image_url", | |
| "image_url": { | |
| "url": f"data:{mime_type};base64,{base64_image}" | |
| } | |
| }) | |
| except Exception as e: | |
| error_msg = f"β Error processing '{file_name}': {str(e)}" | |
| content_blocks.append({ | |
| "type": "text", | |
| "text": error_msg | |
| }) | |
| status_message = error_msg | |
| return content_blocks, status_message | |
| def process_message( | |
| message: str, | |
| history: List[Tuple[str, str]], | |
| files: Optional[List] = None, | |
| enable_reasoning: bool = True, | |
| temperature: float = 0.7, | |
| max_tokens: int = 2000 | |
| ) -> Tuple[List[Tuple[str, str]], str, str]: | |
| """ | |
| Process user message and generate response | |
| Returns: (updated_history, reasoning_text, status_message) | |
| """ | |
| global client | |
| if client is None: | |
| return history + [(message if message else "No message", "β Please configure your API key first in the Settings tab.")], "", "β API key not configured" | |
| if not message.strip() and not files: | |
| return history + [("", "β οΈ Please enter a message or upload files.")], "", "β οΈ No input provided" | |
| status_messages = [] | |
| try: | |
| # Ensure history is a list | |
| if history is None: | |
| history = [] | |
| # Build messages array | |
| messages = [] | |
| # Add conversation history | |
| for user_msg, assistant_msg in history: | |
| if user_msg: # Only add if user message exists | |
| messages.append({"role": "user", "content": user_msg if user_msg else "..."}) | |
| if assistant_msg: # Only add if assistant message exists | |
| messages.append({"role": "assistant", "content": assistant_msg}) | |
| # Build current message content | |
| content = [] | |
| # Process files if provided | |
| if files: | |
| file_count = 0 | |
| total_pages = 0 | |
| for file in files: | |
| if file is not None: | |
| try: | |
| file_blocks, status = process_file(file) | |
| if file_blocks: # Only add if we got valid blocks | |
| content.extend(file_blocks) | |
| status_messages.append(status) | |
| file_count += 1 | |
| # Count pages for PDFs | |
| if status and status.startswith("β ") and "page(s)" in status: | |
| try: | |
| pages = int(status.split("converted to ")[1].split(" page(s)")[0]) | |
| total_pages += pages | |
| except: | |
| pass | |
| except Exception as file_error: | |
| error_msg = f"β Error processing file: {str(file_error)}" | |
| status_messages.append(error_msg) | |
| if file_count > 0: | |
| file_summary = f"π {file_count} file(s) uploaded" | |
| if total_pages > 0: | |
| file_summary += f" ({total_pages} PDF pages)" | |
| content.insert(0, {"type": "text", "text": file_summary}) | |
| # Add text message | |
| if message and message.strip(): | |
| content.append({"type": "text", "text": message}) | |
| # If no content at all, return error | |
| if not content: | |
| return history + [(message if message else "", "β No valid content to process")], "", "β No valid content" | |
| messages.append({"role": "user", "content": content}) | |
| # Prepare API call parameters | |
| api_params = { | |
| "model": "nvidia/nemotron-nano-12b-v2-vl:free", | |
| "messages": messages, | |
| "temperature": temperature, | |
| "max_tokens": max_tokens | |
| } | |
| # Add reasoning if enabled | |
| if enable_reasoning: | |
| api_params["extra_body"] = {"reasoning": {"enabled": True}} | |
| # Make API call with additional error handling | |
| try: | |
| response = client.chat.completions.create(**api_params) | |
| except Exception as api_error: | |
| error_msg = f"β API Error: {str(api_error)}" | |
| return history + [(message if message else "", error_msg)], "", error_msg | |
| # Check if response is valid | |
| if not response: | |
| error_message = "β Error: Received None response from API" | |
| return history + [(message if message else "", error_message)], "", error_message | |
| if not hasattr(response, 'choices') or not response.choices or len(response.choices) == 0: | |
| error_message = "β Error: Received empty response from API" | |
| return history + [(message if message else "", error_message)], "", error_message | |
| # Get the assistant message safely | |
| try: | |
| assistant_message = response.choices[0].message.content | |
| if not assistant_message: | |
| assistant_message = "β οΈ Model returned an empty response" | |
| except (AttributeError, IndexError) as e: | |
| assistant_message = f"β Error extracting response: {str(e)}" | |
| # Extract reasoning if available | |
| reasoning_text = "" | |
| if enable_reasoning: | |
| try: | |
| if hasattr(response.choices[0].message, 'reasoning_details'): | |
| reasoning_details = response.choices[0].message.reasoning_details | |
| if reasoning_details: | |
| reasoning_text = f"**π§ Reasoning Process:**\n{json.dumps(reasoning_details, indent=2)}" | |
| except Exception as reasoning_error: | |
| # Reasoning extraction failed, but that's okay | |
| pass | |
| # Update history | |
| display_message = message if message and message.strip() else "[Files uploaded]" | |
| new_history = history + [(display_message, assistant_message)] | |
| # Combine status messages | |
| combined_status = "\n".join(status_messages) if status_messages else "β Message processed successfully" | |
| return new_history, reasoning_text, combined_status | |
| except Exception as e: | |
| error_message = f"β Error: {str(e)}\n\nType: {type(e).__name__}" | |
| import traceback | |
| error_detail = traceback.format_exc() | |
| print(f"Full error: {error_detail}") # For debugging | |
| display_message = message if message and message.strip() else "[Error occurred]" | |
| return history + [(display_message, error_message)], "", error_message | |
| def clear_conversation(): | |
| """Clear conversation history""" | |
| return [], "", "" | |
| def check_dependencies() -> str: | |
| """Check which PDF processing libraries are available""" | |
| status = "**π¦ PDF Processing Dependencies Status:**\n\n" | |
| # Check PyMuPDF | |
| try: | |
| import fitz | |
| status += "β **PyMuPDF (fitz)**: Installed and ready!\n" | |
| status += " - No external dependencies needed\n" | |
| status += " - This is the primary PDF processing method\n\n" | |
| except ImportError: | |
| status += "β **PyMuPDF (fitz)**: Not installed\n" | |
| status += " - Install: `pip install PyMuPDF`\n\n" | |
| # Check pdf2image | |
| try: | |
| import pdf2image | |
| status += "β **pdf2image**: Installed\n" | |
| status += " - Requires poppler-utils (external)\n" | |
| # Try to check if poppler is available | |
| try: | |
| from pdf2image.exceptions import PDFInfoNotInstalledError | |
| from pdf2image import pdfinfo_from_path | |
| # This will throw an error if poppler is not found | |
| status += " - Checking poppler availability...\n" | |
| except: | |
| status += " - β οΈ poppler-utils may not be installed\n" | |
| status += "\n" | |
| except ImportError: | |
| status += "β οΈ **pdf2image**: Not installed (optional fallback)\n" | |
| status += " - Install: `pip install pdf2image`\n\n" | |
| # Check PIL/Pillow | |
| try: | |
| from PIL import Image | |
| status += "β **Pillow (PIL)**: Installed and ready!\n\n" | |
| except ImportError: | |
| status += "β **Pillow (PIL)**: Not installed\n" | |
| status += " - Install: `pip install Pillow`\n\n" | |
| status += "**π‘ Recommendation:**\n" | |
| status += "Install PyMuPDF for the best PDF support:\n" | |
| status += "`pip install PyMuPDF Pillow`" | |
| return status | |
| # Custom CSS for premium design | |
| custom_css = """ | |
| @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap'); | |
| * { | |
| font-family: 'Inter', sans-serif; | |
| } | |
| .gradio-container { | |
| background: linear-gradient(135deg, rgb(145 228 242) 0%, rgb(118, 75, 162) 100%) !important; | |
| } | |
| #main-container { | |
| background: rgba(255, 255, 255, 0.98); | |
| border-radius: 24px; | |
| padding: 32px; | |
| box-shadow: 0 20px 60px rgba(0, 0, 0, 0.3); | |
| backdrop-filter: blur(10px); | |
| } | |
| .header-title { | |
| background: linear-gradient(135deg, rgb(108 58 198) 0%, rgb(18 121 44) 100%) text; | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| font-size: 3em; | |
| font-weight: 700; | |
| text-align: center; | |
| margin-bottom: 0.3em; | |
| letter-spacing: -0.02em; | |
| } | |
| .header-subtitle { | |
| text-align: center; | |
| color: #666; | |
| font-size: 1.1em; | |
| margin-bottom: 1.5em; | |
| font-weight: 500; | |
| } | |
| .feature-badge { | |
| display: inline-block; | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| color: white; | |
| padding: 6px 16px; | |
| border-radius: 20px; | |
| font-size: 0.85em; | |
| font-weight: 600; | |
| margin: 4px; | |
| box-shadow: 0 4px 12px rgba(102, 126, 234, 0.3); | |
| } | |
| .capability-card { | |
| background: linear-gradient(135deg, #f6f8fb 0%, #ffffff 100%); | |
| border: 2px solid #e0e7ff; | |
| border-radius: 16px; | |
| padding: 20px; | |
| margin: 10px 0; | |
| transition: all 0.3s ease; | |
| } | |
| .capability-card:hover { | |
| transform: translateY(-4px); | |
| box-shadow: 0 12px 24px rgba(102, 126, 234, 0.15); | |
| border-color: #667eea; | |
| } | |
| .tab-nav button { | |
| font-weight: 600 !important; | |
| border-radius: 12px !important; | |
| transition: all 0.3s ease !important; | |
| } | |
| .tab-nav button.selected { | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important; | |
| color: white !important; | |
| } | |
| button.primary { | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important; | |
| border: none !important; | |
| color: white !important; | |
| font-weight: 600 !important; | |
| border-radius: 12px !important; | |
| padding: 12px 32px !important; | |
| transition: all 0.3s ease !important; | |
| box-shadow: 0 4px 12px rgba(102, 126, 234, 0.3) !important; | |
| } | |
| button.primary:hover { | |
| transform: translateY(-2px) !important; | |
| box-shadow: 0 8px 20px rgba(102, 126, 234, 0.4) !important; | |
| } | |
| button.secondary { | |
| background: white !important; | |
| border: 2px solid #667eea !important; | |
| color: #667eea !important; | |
| font-weight: 600 !important; | |
| border-radius: 12px !important; | |
| transition: all 0.3s ease !important; | |
| } | |
| button.secondary:hover { | |
| background: #f0f4ff !important; | |
| } | |
| .info-box { | |
| background: linear-gradient(135deg, #e0e7ff 0%, #f0f4ff 100%); | |
| border-left: 4px solid #667eea; | |
| border-radius: 12px; | |
| padding: 16px 20px; | |
| margin: 16px 0; | |
| font-size: 0.95em; | |
| line-height: 1.6; | |
| } | |
| .success-box { | |
| background: linear-gradient(135deg, #d4edda 0%, #e8f5e9 100%); | |
| border-left: 4px solid #28a745; | |
| border-radius: 12px; | |
| padding: 16px 20px; | |
| margin: 16px 0; | |
| color: #155724; | |
| font-weight: 500; | |
| } | |
| .chatbot { | |
| border-radius: 16px !important; | |
| border: 2px solid #e0e7ff !important; | |
| box-shadow: 0 8px 24px rgba(102, 126, 234, 0.1) !important; | |
| } | |
| """ | |
| # Build Gradio Interface | |
| with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as app: | |
| gr.HTML(""" | |
| <div style='text-align: center; padding: 20px 0;'> | |
| <h1 class='header-title'>π Nemotron Nano VL Studio</h1> | |
| <p class='header-subtitle'>Advanced Multimodal AI Assistant powered by NVIDIA Nemotron Nano 12B 2 VL</p> | |
| <div style='margin: 20px 0;'> | |
| <span class='feature-badge'>π Document Intelligence</span> | |
| <span class='feature-badge'>π¬ Video Understanding</span> | |
| <span class='feature-badge'>π§ Reasoning Engine</span> | |
| <span class='feature-badge'>π Chart Analysis</span> | |
| <span class='feature-badge'>π€ OCR Excellence</span> | |
| </div> | |
| </div> | |
| """) | |
| with gr.Row(elem_id="main-container"): | |
| with gr.Column(): | |
| with gr.Tabs(): | |
| # Chat Tab | |
| with gr.Tab("π¬ Chat Interface", elem_classes=["tab-nav"]): | |
| gr.HTML(""" | |
| <div class='info-box'> | |
| <strong>π― What can I do?</strong><br> | |
| β’ Analyze images, documents, and charts<br> | |
| β’ Perform OCR and text extraction from PDFs<br> | |
| β’ Reason through complex problems<br> | |
| β’ Answer questions about visual content<br> | |
| β’ Process multi-image documents and PDFs | |
| </div> | |
| """) | |
| chatbot = gr.Chatbot( | |
| label="Conversation", | |
| height=500, | |
| show_copy_button=True, | |
| avatar_images=(None, "https://www.nvidia.com/favicon.ico"), | |
| elem_classes=["chatbot"] | |
| ) | |
| file_status = gr.Textbox( | |
| label="π File Processing Status", | |
| lines=2, | |
| interactive=False, | |
| visible=True | |
| ) | |
| with gr.Row(): | |
| msg = gr.Textbox( | |
| label="Your Message", | |
| placeholder="Ask me anything about images, documents, PDFs, or reasoning tasks...", | |
| lines=3, | |
| scale=4 | |
| ) | |
| with gr.Row(): | |
| files = gr.File( | |
| label="π Upload Files (Images, PDFs, Documents - Multi-file support)", | |
| file_count="multiple", | |
| file_types=[".jpg", ".jpeg", ".png", ".gif", ".bmp", ".webp", ".tiff", ".pdf", ".txt"], | |
| scale=3 | |
| ) | |
| with gr.Row(): | |
| submit_btn = gr.Button("π Send", variant="primary", scale=2, elem_classes=["primary"]) | |
| clear_btn = gr.Button("ποΈ Clear", variant="secondary", scale=1, elem_classes=["secondary"]) | |
| reasoning_display = gr.Textbox( | |
| label="π§ Reasoning Process (when enabled)", | |
| lines=6, | |
| interactive=False | |
| ) | |
| # Settings Tab | |
| with gr.Tab("βοΈ Settings", elem_classes=["tab-nav"]): | |
| gr.HTML(""" | |
| <div class='info-box'> | |
| <strong>π API Configuration</strong><br> | |
| Get your free API key from <a href='https://openrouter.ai/keys' target='_blank'>OpenRouter</a> | |
| </div> | |
| """) | |
| api_key_input = gr.Textbox( | |
| label="OpenRouter API Key", | |
| placeholder="sk-or-v1-...", | |
| type="password", | |
| lines=1 | |
| ) | |
| api_status = gr.Textbox(label="Status", interactive=False) | |
| save_key_btn = gr.Button("πΎ Save API Key", variant="primary", elem_classes=["primary"]) | |
| gr.HTML("<hr style='margin: 30px 0; border: none; border-top: 2px solid #e0e7ff;'>") | |
| gr.HTML(""" | |
| <div class='info-box'> | |
| <strong>ποΈ Model Parameters</strong><br> | |
| Fine-tune the model's behavior | |
| </div> | |
| """) | |
| enable_reasoning = gr.Checkbox( | |
| label="π§ Enable Reasoning Mode", | |
| value=True, | |
| info="Show the model's step-by-step thinking process" | |
| ) | |
| temperature = gr.Slider( | |
| minimum=0.0, | |
| maximum=2.0, | |
| value=0.7, | |
| step=0.1, | |
| label="π‘οΈ Temperature", | |
| info="Higher = more creative, Lower = more focused" | |
| ) | |
| max_tokens = gr.Slider( | |
| minimum=256, | |
| maximum=4096, | |
| value=2000, | |
| step=256, | |
| label="π Max Tokens", | |
| info="Maximum length of response" | |
| ) | |
| gr.HTML("<hr style='margin: 30px 0; border: none; border-top: 2px solid #e0e7ff;'>") | |
| gr.HTML(""" | |
| <div class='info-box'> | |
| <strong>π¦ Check Dependencies</strong><br> | |
| Verify that PDF processing libraries are installed | |
| </div> | |
| """) | |
| check_deps_btn = gr.Button("π Check Dependencies", variant="secondary", elem_classes=["secondary"]) | |
| deps_status = gr.Markdown(label="Dependency Status") | |
| gr.HTML(""" | |
| <div class='info-box' style='margin-top: 20px;'> | |
| <strong>π¦ Installation Guide:</strong><br><br> | |
| <strong>Recommended (PyMuPDF - No external dependencies):</strong><br> | |
| <code>pip install PyMuPDF Pillow openai gradio</code><br><br> | |
| <strong>Alternative (pdf2image - Requires poppler):</strong><br> | |
| <code>pip install pdf2image Pillow openai gradio</code><br><br> | |
| <strong>Poppler installation (for pdf2image):</strong><br> | |
| β’ Ubuntu/Debian: <code>sudo apt-get install poppler-utils</code><br> | |
| β’ macOS: <code>brew install poppler</code><br> | |
| β’ Windows: Download from <a href="https://github.com/oschwartz10612/poppler-windows/releases/" target="_blank">GitHub</a> | |
| </div> | |
| """) | |
| # File Support Tab | |
| with gr.Tab("π Supported Files", elem_classes=["tab-nav"]): | |
| gr.HTML(""" | |
| <div style='text-align: center; margin-bottom: 30px;'> | |
| <h2 style='color: #667eea; font-size: 2em; margin-bottom: 10px;'>π Supported File Types</h2> | |
| <p style='color: #666; font-size: 1.1em;'>Nemotron Nano 2 VL accepts a wide variety of file formats</p> | |
| </div> | |
| <div class='capability-card' style='background: linear-gradient(135deg, #e3f2fd 0%, #f3e5f5 100%);'> | |
| <h3 style='color: #667eea; display: flex; align-items: center; gap: 10px;'> | |
| πΌοΈ Image Files | |
| </h3> | |
| <div style='display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 15px; margin-top: 15px;'> | |
| <div style='background: white; padding: 15px; border-radius: 10px; box-shadow: 0 2px 8px rgba(0,0,0,0.1);'> | |
| <strong style='color: #667eea;'>π· JPEG/JPG</strong> | |
| <p style='margin: 5px 0 0 0; color: #666; font-size: 0.9em;'>Standard photo format</p> | |
| </div> | |
| <div style='background: white; padding: 15px; border-radius: 10px; box-shadow: 0 2px 8px rgba(0,0,0,0.1);'> | |
| <strong style='color: #667eea;'>π¨ PNG</strong> | |
| <p style='margin: 5px 0 0 0; color: #666; font-size: 0.9em;'>Lossless with transparency</p> | |
| </div> | |
| <div style='background: white; padding: 15px; border-radius: 10px; box-shadow: 0 2px 8px rgba(0,0,0,0.1);'> | |
| <strong style='color: #667eea;'>π¬ GIF</strong> | |
| <p style='margin: 5px 0 0 0; color: #666; font-size: 0.9em;'>Animated images</p> | |
| </div> | |
| <div style='background: white; padding: 15px; border-radius: 10px; box-shadow: 0 2px 8px rgba(0,0,0,0.1);'> | |
| <strong style='color: #667eea;'>π WebP</strong> | |
| <p style='margin: 5px 0 0 0; color: #666; font-size: 0.9em;'>Modern web format</p> | |
| </div> | |
| <div style='background: white; padding: 15px; border-radius: 10px; box-shadow: 0 2px 8px rgba(0,0,0,0.1);'> | |
| <strong style='color: #667eea;'>πΌοΈ BMP</strong> | |
| <p style='margin: 5px 0 0 0; color: #666; font-size: 0.9em;'>Bitmap images</p> | |
| </div> | |
| <div style='background: white; padding: 15px; border-radius: 10px; box-shadow: 0 2px 8px rgba(0,0,0,0.1);'> | |
| <strong style='color: #667eea;'>π TIFF</strong> | |
| <p style='margin: 5px 0 0 0; color: #666; font-size: 0.9em;'>High-quality scans</p> | |
| </div> | |
| </div> | |
| </div> | |
| <div class='capability-card' style='background: linear-gradient(135deg, #fff3e0 0%, #fce4ec 100%); margin-top: 20px;'> | |
| <h3 style='color: #f57c00; display: flex; align-items: center; gap: 10px;'> | |
| π Document Files | |
| </h3> | |
| <div style='display: grid; grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); gap: 15px; margin-top: 15px;'> | |
| <div style='background: white; padding: 20px; border-radius: 10px; box-shadow: 0 2px 8px rgba(0,0,0,0.1);'> | |
| <strong style='color: #f57c00; font-size: 1.1em;'>π PDF Documents</strong> | |
| <p style='margin: 10px 0 0 0; color: #666; line-height: 1.6;'> | |
| β’ Multi-page support<br> | |
| β’ Automatic conversion to images<br> | |
| β’ PyMuPDF (recommended)<br> | |
| β’ Scanned documents<br> | |
| β’ Forms and tables | |
| </p> | |
| </div> | |
| <div style='background: white; padding: 20px; border-radius: 10px; box-shadow: 0 2px 8px rgba(0,0,0,0.1);'> | |
| <strong style='color: #f57c00; font-size: 1.1em;'>π Text Files (.txt)</strong> | |
| <p style='margin: 10px 0 0 0; color: #666; line-height: 1.6;'> | |
| β’ Plain text documents<br> | |
| β’ Code snippets<br> | |
| β’ Notes and logs<br> | |
| β’ UTF-8 encoding<br> | |
| β’ Configuration files | |
| </p> | |
| </div> | |
| </div> | |
| </div> | |
| <div class='success-box' style='margin-top: 20px;'> | |
| <strong>π PDF Processing:</strong><br> | |
| This app uses <strong>PyMuPDF (fitz)</strong> as the primary method for PDF conversion.<br> | |
| β’ β No external dependencies (no poppler needed)<br> | |
| β’ β Fast and reliable<br> | |
| β’ β Automatic fallback to pdf2image if needed<br> | |
| β’ β Clear error messages with installation instructions | |
| </div> | |
| """) | |
| # Examples Tab | |
| with gr.Tab("π Use Cases", elem_classes=["tab-nav"]): | |
| gr.HTML(""" | |
| <div class='capability-card'> | |
| <h3>π Financial Report Analysis</h3> | |
| <p><strong>Example:</strong> "Extract all the key metrics from this financial report"</p> | |
| <p><strong>What it extracts:</strong> Revenue, Net Profit, EBITDA, Cash Flow, Assets, Liabilities, Ratios, YoY Growth</p> | |
| </div> | |
| <div class='capability-card'> | |
| <h3>π€ OCR & Text Extraction</h3> | |
| <p><strong>Example:</strong> "What text appears in this scanned document?"</p> | |
| <p>State-of-the-art optical character recognition for any text in images or PDFs.</p> | |
| </div> | |
| <div class='capability-card'> | |
| <h3>π Chart & Data Visualization</h3> | |
| <p><strong>Example:</strong> "Analyze the trends in these charts"</p> | |
| <p>Understand bar charts, line graphs, pie charts, scatter plots, and complex visualizations.</p> | |
| </div> | |
| <div class='capability-card'> | |
| <h3>π§ Advanced Reasoning</h3> | |
| <p><strong>Example:</strong> "How many r's are in 'strawberry'? Think step by step."</p> | |
| <p>Transparent reasoning process shows how the model arrives at answers.</p> | |
| </div> | |
| <div class='capability-card'> | |
| <h3>π Multi-Page Documents</h3> | |
| <p><strong>Example:</strong> Upload a PDF and ask "Summarize the key points from all pages"</p> | |
| <p>Process entire documents with multiple pages simultaneously.</p> | |
| </div> | |
| <div class='capability-card'> | |
| <h3>π’ Business Document Processing</h3> | |
| <p><strong>Example:</strong> "Extract information from this invoice/receipt/form"</p> | |
| <p>Handle invoices, receipts, forms, contracts, and structured business documents.</p> | |
| </div> | |
| """) | |
| gr.HTML(""" | |
| <div class='success-box' style='margin-top: 30px;'> | |
| <strong>π‘ Pro Tips:</strong><br> | |
| β’ Upload high-quality scans for best OCR results<br> | |
| β’ Enable reasoning mode for complex financial analysis<br> | |
| β’ Ask specific questions to get targeted information<br> | |
| β’ Upload multiple related documents for comparison<br> | |
| β’ Use clear, descriptive questions for better answers | |
| </div> | |
| """) | |
| # About Tab | |
| with gr.Tab("βΉοΈ About", elem_classes=["tab-nav"]): | |
| gr.Markdown(""" | |
| # π About Nemotron Nano 12B 2 VL | |
| ## π― Model Overview | |
| **NVIDIA Nemotron Nano 2 VL** is a cutting-edge 12-billion-parameter open multimodal reasoning model | |
| designed for video understanding and document intelligence. | |
| ## β¨ Key Features | |
| - **ποΈ Hybrid Architecture**: Combines Transformer accuracy with Mamba's efficient sequence modeling | |
| - **β‘ High Performance**: Superior throughput and lower latency | |
| - **π Leading Results**: ~74 average score across major benchmarks | |
| - **π― Specialized Training**: NVIDIA-curated synthetic datasets | |
| - **π¬ Video Support**: Efficient Video Sampling (EVS) for long-form content | |
| - **π Open Source**: Released under permissive NVIDIA open license | |
| ## π Benchmark Performance | |
| Achieves leading results on: | |
| - OCRBench v2 | |
| - MMMU | |
| - MathVista | |
| - AI2D | |
| - OCR-Reasoning | |
| - ChartQA | |
| - DocVQA | |
| - Video-MME | |
| ## π§ Deployment | |
| Supported across: | |
| - NVIDIA NeMo | |
| - NVIDIA NIM | |
| - Major inference runtimes | |
| ## π Learn More | |
| - [OpenRouter API](https://openrouter.ai/) | |
| - [NVIDIA NeMo](https://www.nvidia.com/en-us/ai-data-science/products/nemo/) | |
| --- | |
| <div style='text-align: center; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 12px; color: white;'> | |
| <strong>Built with β€οΈ using Gradio and powered by NVIDIA Nemotron</strong> | |
| </div> | |
| """) | |
| # Event Handlers | |
| save_key_btn.click( | |
| fn=initialize_client, | |
| inputs=[api_key_input], | |
| outputs=[api_status] | |
| ) | |
| check_deps_btn.click( | |
| fn=check_dependencies, | |
| outputs=[deps_status] | |
| ) | |
| submit_btn.click( | |
| fn=process_message, | |
| inputs=[msg, chatbot, files, enable_reasoning, temperature, max_tokens], | |
| outputs=[chatbot, reasoning_display, file_status] | |
| ).then( | |
| lambda: ("", None), | |
| outputs=[msg, files] | |
| ) | |
| msg.submit( | |
| fn=process_message, | |
| inputs=[msg, chatbot, files, enable_reasoning, temperature, max_tokens], | |
| outputs=[chatbot, reasoning_display, file_status] | |
| ).then( | |
| lambda: ("", None), | |
| outputs=[msg, files] | |
| ) | |
| clear_btn.click( | |
| fn=clear_conversation, | |
| outputs=[chatbot, reasoning_display, file_status] | |
| ) | |
| # Launch the app | |
| if __name__ == "__main__": | |
| app.launch( | |
| share=True | |
| ) |