Spaces:

PaddlePaddle
/

doc2page

Running

App Files Files Community

root commited on 5 days ago

Commit

7928625

1 Parent(s): 1f4004d

add VL

Browse files

Files changed (1) hide show

app.py +120 -604

app.py CHANGED Viewed

@@ -9,13 +9,14 @@ from typing import Tuple
 import markdown
 from dotenv import load_dotenv
 from openai import OpenAI
 # Load environment variables from .env file
 load_dotenv()
-# API Configuration
 API_URL = os.getenv("API_URL", "")
-API_TOKEN = os.getenv("API_TOKEN", "")
 class Doc2PageConverter:
@@ -29,172 +30,91 @@ class Doc2PageConverter:
                 base_url="https://qianfan.baidubce.com/v2",
                 api_key=self.qianfan_token
             )
-    def extract_text_with_api(self, file_path: str) -> str:
-        """Extract text and structure using PP-StructureV3 API"""
         try:
-            if not API_URL or not API_TOKEN:
-                raise ValueError(
-                    "API_URL and API_TOKEN must be configured in .env file")
-            # Determine file type
-            file_extension = Path(file_path).suffix.lower()
-            if file_extension == ".pdf":
-                file_type = 0  # PDF
             else:
-                file_type = 1  # Image
-            # Read file content
-            with open(file_path, "rb") as f:
-                file_bytes = f.read()
-            # Encode file to base64
-            file_data = base64.b64encode(file_bytes).decode("ascii")
-            # Prepare API request
-            headers = {
-                "Authorization": f"token {API_TOKEN}",
-                "Content-Type": "application/json",
-            }
-            # Use default settings for simplicity
-            payload = {
-                "file": file_data,
-                "fileType": file_type,
-                "useFormulaRecognition": True,
-                "useChartRecognition": False,
-                "useDocOrientationClassify": False,
-                "useDocUnwarping": False,
-                "useTextlineOrientation": False,
-                "useSealRecognition": True,
-                "useRegionDetection": True,
-                "useTableRecognition": True,
-                "layoutThreshold": 0.5,
-                "layoutNms": True,
-                "layoutUnclipRatio": 1.0,
-                "textDetLimitType": "min",
-                "textTetLimitSideLen": 736,
-                "textDetThresh": 0.30,
-                "textDetBoxThresh": 0.60,
-                "textDetUnclipRatio": 1.5,
-                "textRecScoreThresh": 0.00,
-                "sealDetLimitType": "min",
-                "sealDetLimitSideLen": 736,
-                "sealDetThresh": 0.20,
-                "sealDetBoxThresh": 0.60,
-                "sealDetUnclipRatio": 0.5,
-                "sealRecScoreThresh": 0.00,
-                "useOcrResultsWithTableCells": True,
-                "useE2eWiredTableRecModel": False,
-                "useE2eWirelessTableRecModel": False,
-                "useWiredTableCellsTransToHtml": False,
-                "useWirelessWableCellsTransToHtml": False,
-                "useTableOrientationClassify": True,
-            }
-            # Call API
-            response = requests.post(
-                API_URL,
-                json=payload,
-                headers=headers,
-                timeout=300,  # 5 minutes timeout
-            )
             response.raise_for_status()
-            result = response.json()
-            # Process API response
-            layout_results = result.get("result", {}).get(
-                "layoutParsingResults", [])
-            markdown_content_list = []
-            markdown_list = []
-            for res in layout_results:
-                markdown_data = res["markdown"]
-                markdown_text = markdown_data["text"]
-                img_path_to_url = markdown_data["images"]
-                # Embed images into markdown
-                markdown_content = self.embed_images_into_markdown_text(
-                    markdown_text, img_path_to_url
-                )
-                markdown_content_list.append(markdown_content)
-                # Prepare for concatenation
-                markdown_with_content = markdown_data.copy()
-                markdown_with_content["text"] = markdown_content
-                markdown_list.append(markdown_with_content)
-            # Concatenate all pages
-            concatenated_markdown = self.concatenate_markdown_pages(markdown_list)
-            return concatenated_markdown
         except requests.exceptions.RequestException as e:
-            raise RuntimeError(f"API request failed: {str(e)}")
-        except Exception as e:
-            print(f"Error in API extraction: {e}")
             return ""
-    def embed_images_into_markdown_text(self, markdown_text, markdown_images):
-        """Embed images into markdown text"""
-        for img_path, img_url in markdown_images.items():
-            markdown_text = markdown_text.replace(
-                f'<img src="{img_path}"', f'<img src="{img_url}"'
-            )
-        return markdown_text
-    def concatenate_markdown_pages(self, markdown_list):
-        """Concatenate markdown pages into single document"""
-        markdown_texts = ""
-        previous_page_last_element_paragraph_end_flag = True
-        for res in markdown_list:
-            page_first_element_paragraph_start_flag: bool = res["isStart"]
-            page_last_element_paragraph_end_flag: bool = res["isEnd"]
-            if (
-                not page_first_element_paragraph_start_flag
-                and not previous_page_last_element_paragraph_end_flag
-            ):
-                last_char_of_markdown = (markdown_texts[-1]
-                                       if markdown_texts else "")
-                first_char_of_handler = res["text"]
-                last_is_chinese_char = (
-                    re.match(r"[\u4e00-\u9fff]", last_char_of_markdown)
-                    if last_char_of_markdown
-                    else False
-                )
-                first_is_chinese_char = (
-                    re.match(r"[\u4e00-\u9fff]", first_char_of_handler)
-                    if first_char_of_handler
-                    else False
-                )
-                if not (last_is_chinese_char or first_is_chinese_char):
-                    markdown_texts += " " + res["text"]
-                else:
-                    markdown_texts += res["text"]
-            else:
-                markdown_texts += "\n\n" + res["text"]
-            previous_page_last_element_paragraph_end_flag = (
-                page_last_element_paragraph_end_flag
-            )
-        return markdown_texts
     def markdown_to_html_with_ernie(self, markdown_text: str) -> str:
-        """Convert markdown to HTML using ERNIE API"""
         if not self.client:
-            # Fallback to basic markdown conversion if no API client
             return self.basic_markdown_to_html(markdown_text)
         try:
             prompt = f"""Please convert the following markdown text into a modern, clean HTML page. Use contemporary typography with the Inter font family and clean design principles. Make it visually appealing with proper CSS styling, responsive design, and excellent readability.
 Design requirements:
 - Use Inter font from Google Fonts
 - Clean, modern spacing and typography
@@ -202,12 +122,9 @@ Design requirements:
 - Good color contrast and hierarchy
 - Responsive design that works on all devices
 - Include proper HTML structure with head, body, and semantic elements
-Important: Add a footer at the bottom with "Powered by PaddleOCR and ERNIE" where PaddleOCR links to https://github.com/PaddlePaddle/PaddleOCR and ERNIE links to https://huggingface.co/BAIDU. Style it with modern, subtle styling.
 Markdown content:
 {markdown_text}
 IMPORTANT: Return ONLY the raw HTML code starting with <!DOCTYPE html> and ending with </html>. Do NOT wrap it in markdown code blocks or add any explanations. I need the pure HTML content that can be directly saved as an .html file."""
             messages = [{"role": "user", "content": prompt}]
@@ -218,31 +135,26 @@ IMPORTANT: Return ONLY the raw HTML code starting with <!DOCTYPE html> and endin
                 max_tokens=64000,
             )
-            html_content = response.choices[0].message.content
-            # Clean up markdown code block markers if present
             if html_content.startswith('```html'):
-                html_content = html_content[7:]  # Remove ```html
             elif html_content.startswith('```'):
-                html_content = html_content[3:]   # Remove ```
             if html_content.endswith('```'):
-                html_content = html_content[:-3]  # Remove ending ```
-            # Strip any extra whitespace
-            html_content = html_content.strip()
-            return html_content
         except Exception as e:
             print(f"Error calling ERNIE API: {e}")
             return self.basic_markdown_to_html(markdown_text)
     def basic_markdown_to_html(self, markdown_text: str) -> str:
-        """Fallback markdown to HTML conversion"""
         html = markdown.markdown(markdown_text)
-        # Wrap in a complete HTML document with styling
         complete_html = f"""
         <!DOCTYPE html>
         <html lang="en">
@@ -251,201 +163,29 @@ IMPORTANT: Return ONLY the raw HTML code starting with <!DOCTYPE html> and endin
             <meta name="viewport" content="width=device-width, initial-scale=1.0">
             <title>Converted Document</title>
             <style>
-                /* Modern, clean typography */
                 @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');
-                * {{
-                    margin: 0;
-                    padding: 0;
-                    box-sizing: border-box;
-                }}
                 body {{
-                    font-family: 'Inter', system-ui, -apple-system, sans-serif;
-                    font-weight: 400;
-                    line-height: 1.7;
-                    color: #1a1a1a;
-                    max-width: 850px;
-                    margin: 0 auto;
-                    padding: 32px 24px;
-                    background: #fafafa;
-                    font-size: 16px;
                 }}
                 .container {{
-                    background: #ffffff;
-                    padding: 48px;
-                    border-radius: 12px;
                     box-shadow: 0 1px 3px rgba(0,0,0,0.08), 0 4px 24px rgba(0,0,0,0.04);
-                    border: 1px solid rgba(0,0,0,0.06);
-                }}
-                /* Typography hierarchy */
-                h1, h2, h3, h4, h5, h6 {{
-                    font-weight: 600;
-                    color: #0f0f0f;
-                    margin: 32px 0 16px 0;
-                    letter-spacing: -0.02em;
-                }}
-                h1 {{
-                    font-size: 2.25rem;
-                    font-weight: 700;
-                    margin-top: 0;
-                    margin-bottom: 24px;
-                    border-bottom: 2px solid #e5e7eb;
-                    padding-bottom: 16px;
-                }}
-                h2 {{
-                    font-size: 1.75rem;
-                    margin-top: 48px;
-                }}
-                h3 {{
-                    font-size: 1.375rem;
-                    margin-top: 40px;
-                }}
-                h4 {{
-                    font-size: 1.125rem;
-                }}
-                p {{
-                    margin-bottom: 20px;
-                    color: #374151;
-                    line-height: 1.75;
-                }}
-                /* Code styling */
-                code {{
-                    font-family: 'SF Mono', Consolas, 'Liberation Mono', monospace;
-                    background-color: #f3f4f6;
-                    color: #1f2937;
-                    padding: 3px 6px;
-                    border-radius: 4px;
-                    font-size: 0.875rem;
-                    font-weight: 500;
-                }}
-                pre {{
-                    background-color: #f8fafc;
-                    border: 1px solid #e5e7eb;
-                    padding: 20px;
-                    border-radius: 8px;
-                    overflow-x: auto;
-                    margin: 24px 0;
-                    font-size: 0.875rem;
-                    line-height: 1.6;
                 }}
-                pre code {{
-                    background: none;
-                    padding: 0;
-                    border-radius: 0;
-                }}
-                /* Blockquotes */
-                blockquote {{
-                    border-left: 4px solid #6366f1;
-                    padding-left: 20px;
-                    margin: 24px 0;
-                    font-style: normal;
-                    color: #4b5563;
-                    background-color: #f8fafc;
-                    padding: 16px 20px;
-                    border-radius: 0 8px 8px 0;
-                }}
-                /* Images */
-                img {{
-                    max-width: 100%;
-                    height: auto;
-                    border-radius: 8px;
-                    margin: 20px 0;
-                    box-shadow: 0 4px 12px rgba(0,0,0,0.1);
-                }}
-                /* Tables */
-                table {{
-                    border-collapse: collapse;
-                    width: 100%;
-                    margin: 24px 0;
-                    background: #ffffff;
-                    border-radius: 8px;
-                    overflow: hidden;
-                    box-shadow: 0 1px 3px rgba(0,0,0,0.1);
-                }}
-                th, td {{
-                    padding: 16px;
-                    text-align: left;
-                    border-bottom: 1px solid #e5e7eb;
-                }}
-                th {{
-                    background-color: #f9fafb;
-                    font-weight: 600;
-                    color: #374151;
-                    font-size: 0.875rem;
-                    text-transform: uppercase;
-                    letter-spacing: 0.05em;
-                }}
-                tr:last-child td {{
-                    border-bottom: none;
-                }}
-                /* Lists */
-                ul, ol {{
-                    margin: 16px 0 20px 24px;
-                    color: #374151;
-                }}
-                li {{
-                    margin-bottom: 8px;
-                    line-height: 1.6;
-                }}
-                /* Links */
-                a {{
-                    color: #6366f1;
-                    text-decoration: none;
-                    font-weight: 500;
-                }}
-                a:hover {{
-                    color: #4f46e5;
-                    text-decoration: underline;
-                }}
-                /* Footer */
                 .footer {{
-                    margin-top: 64px;
-                    padding-top: 24px;
-                    border-top: 1px solid #e5e7eb;
-                    text-align: center;
-                    font-size: 14px;
-                    color: #6b7280;
-                    font-weight: 400;
-                }}
-                .footer a {{
-                    color: #6366f1;
-                    font-weight: 500;
-                    text-decoration: none;
-                }}
-                .footer a:hover {{
-                    color: #4f46e5;
-                    text-decoration: underline;
                 }}
             </style>
         </head>
         <body>
             <div class="container">
                 {html}
                 <div class="footer">
-                    Powered by <a href="https://github.com/PaddlePaddle/PaddleOCR" target="_blank">PaddleOCR</a> and
                     <a href="https://huggingface.co/BAIDU" target="_blank">ERNIE</a>
                 </div>
             </div>
@@ -457,22 +197,11 @@ IMPORTANT: Return ONLY the raw HTML code starting with <!DOCTYPE html> and endin
     def process_document(self, file_path: str) -> Tuple[str, str]:
         """Process uploaded document and convert to HTML"""
         try:
-            file_extension = Path(file_path).suffix.lower()
-            # Check supported formats
-            if file_extension == '.pdf' or file_extension in [
-                '.png', '.jpg', '.jpeg', '.bmp', '.tiff']:
-                # Process with PP-StructureV3 API
-                markdown_content = self.extract_text_with_api(file_path)
-            else:
-                return ("Error: Unsupported file format. "
-                       "Please upload PDF or image files."), ""
             if not markdown_content.strip():
-                return ("Warning: No text content extracted "
-                       "from the document."), ""
-            # Convert markdown to HTML using ERNIE or fallback
             html_content = self.markdown_to_html_with_ernie(markdown_content)
             return markdown_content, html_content
@@ -480,311 +209,98 @@ IMPORTANT: Return ONLY the raw HTML code starting with <!DOCTYPE html> and endin
         except Exception as e:
             return f"Error processing document: {str(e)}", ""
-# Initialize converter
 converter = Doc2PageConverter()
 def process_upload(file):
-    """Process uploaded file and return markdown and HTML"""
     if file is None:
         return "Please upload a file.", "", ""
     try:
-        # Process the document
         markdown_result, html_result = converter.process_document(file.name)
         if html_result:
             return "Document processed successfully!", markdown_result, html_result
         else:
-            return markdown_result, "", ""  # Error message in markdown_result
     except Exception as e:
         return f"Error: {str(e)}", "", ""
 def save_html_file(html_content, filename="converted_page"):
-    """Save HTML content to file for download"""
     if not html_content:
         return None
-    # Create temporary file
     temp_file = tempfile.NamedTemporaryFile(mode='w', suffix='.html', delete=False,
                                           prefix=f"{filename}_")
     temp_file.write(html_content)
     temp_file.close()
     return temp_file.name
-# Create custom theme for a clean, modern look
 custom_theme = gr.themes.Default(
-    primary_hue="blue",
-    secondary_hue="gray",
-    neutral_hue="gray",
     font=("Inter", "system-ui", "sans-serif"),
-    font_mono=("SF Mono", "Consolas", "monospace")
 ).set(
-    body_background_fill="#fafafa",
-    background_fill_primary="#ffffff",
-    background_fill_secondary="#f8f9fa",
-    border_color_primary="#e5e7eb",
-    button_primary_background_fill="#6366f1",
-    button_primary_background_fill_hover="#4f46e5",
-    button_primary_text_color="#ffffff",
 )
-# Create Gradio interface
 with gr.Blocks(
-    title="Doc2Page - Simple Document Converter",
     theme=custom_theme,
-    css="""
-    .gradio-container {
-        max-width: 1200px !important;
-        margin: auto;
-        padding: 32px 16px;
-    }
-    /* Enhanced button styling */
-    .gr-button {
-        font-weight: 500;
-        border-radius: 10px;
-        font-size: 14px;
-        transition: all 0.2s ease;
-        box-shadow: 0 2px 4px rgba(99, 102, 241, 0.1);
-    }
-    .gr-button:hover {
-        transform: translateY(-1px);
-        box-shadow: 0 4px 8px rgba(99, 102, 241, 0.2);
-    }
-    /* Input styling */
-    .gr-textbox, .gr-file {
-        border-radius: 10px;
-        font-family: 'Inter', system-ui, sans-serif;
-        border: 1px solid #e5e7eb;
-        transition: border-color 0.2s ease;
-    }
-    .gr-textbox:focus, .gr-file:focus {
-        border-color: #6366f1;
-        box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.1);
-    }
-    /* Typography */
-    h1 {
-        font-weight: 700;
-        color: #1a1a1a;
-        margin-bottom: 8px;
-        font-size: 2.5rem;
-    }
-    .app-description {
-        color: #6b7280;
-        font-size: 18px;
-        margin-bottom: 40px;
-        font-weight: 400;
-    }
-    /* Tab styling */
-    .gr-tab {
-        border-radius: 8px 8px 0 0;
-        font-weight: 500;
-    }
-    /* Card-like sections */
-    .gr-column {
-        background: rgba(255, 255, 255, 0.5);
-        border-radius: 12px;
-        padding: 16px;
-        margin: 8px;
-    }
-    /* Status styling */
-    .gr-textbox[data-testid*="status"] {
-        background-color: #f8fafc;
-        border: 1px solid #e2e8f0;
-    }
-    /* Download section styling */
-    .download-section {
-        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-        border-radius: 12px;
-        padding: 20px;
-        color: white;
-        margin-top: 20px;
-    }
-    """
 ) as app:
-    # Header
-    gr.Markdown(
-        "# Doc2Page",
-        elem_classes="main-title"
-    )
-    gr.Markdown(
-        "🥃 Transform your documents into beautiful webpages!",
-        elem_classes="app-description"
-    )
-    # Main interface
     with gr.Row():
         with gr.Column(scale=1, min_width=350):
-            with gr.Group():
-                gr.Markdown("### 📄 Upload Document")
-                file_input = gr.File(
-                    label="Choose your file",
-                    file_types=[".pdf", ".png", ".jpg", ".jpeg", ".bmp", ".tiff"],
-                    file_count="single",
-                    height=140
-                )
-                process_btn = gr.Button(
-                    "✨ Convert to Webpage",
-                    variant="primary",
-                    size="lg",
-                    scale=1
-                )
-                status_output = gr.Textbox(
-                    label="Status",
-                    placeholder="Ready to convert your document...",
-                    interactive=False,
-                    lines=3,
-                    max_lines=3
-                )
         with gr.Column(scale=2):
-            gr.Markdown("### 📋 Results")
             with gr.Tabs():
-                with gr.TabItem("❤️ Preview", id="preview"):
-                    html_preview = gr.HTML(
-                        label="",
-                        value="<div style='padding: 40px; text-align: center; color: #6b7280;'>Your converted webpage will appear here</div>",
-                    )
-                with gr.TabItem("📝 Markdown Source", id="markdown"):
-                    markdown_output = gr.Textbox(
-                        label="",
-                        placeholder="Extracted markdown content will appear here...",
-                        lines=22,
-                        interactive=False,
-                        show_copy_button=True
-                    )
-                with gr.TabItem("🌐 HTML Source", id="html"):
-                    html_output = gr.Code(
-                        label="",
-                        language="html",
-                        lines=22,
-                        interactive=False
-                    )
-    # Success & Download section
     with gr.Row(visible=False) as download_section:
-        with gr.Column():
-            gr.Markdown("""
-            <div style="background: linear-gradient(135deg, #10b981, #059669); border-radius: 12px; padding: 20px; color: white; text-align: center; margin: 20px 0;">
-                <h3 style="margin: 0 0 8px 0; color: white;">✅ Conversion Successful!</h3>
-                <p style="margin: 0; opacity: 0.9;">Your document has been converted to a beautiful webpage</p>
-            </div>
-            """)
-            with gr.Row():
-                with gr.Column(scale=1):
-                    gr.Markdown("### 📥 Download Your Webpage")
-                    download_btn = gr.File(
-                        label="HTML File",
-                        visible=True
-                    )
-                with gr.Column(scale=1):
-                    gr.Markdown("### 🚀 Quick Deploy Guide")
-                    gr.Markdown("""
-                    1. **GitHub Pages**: Upload as `index.html` to your repo
-                    2. **Netlify**: Drag & drop the file to netlify.app
-                    3. **Vercel**: Use their simple file deployment
-                    4. **Local**: Double-click to open in browser
-                    """, elem_classes="deploy-guide")
-    # Event handlers
     def process_and_update(file):
         status, markdown_content, html_content = process_upload(file)
-        # Create download file if HTML was generated
         download_file = None
         show_download = False
         if html_content:
             filename = Path(file.name).stem if file else "converted_page"
             download_file = save_html_file(html_content, filename)
             show_download = True
-        # Preview content with better styling when no content
-        preview_content = html_content if html_content else """
-        <div style='padding: 60px 20px; text-align: center; color: #6b7280;
-                    background: #f9fafb; border-radius: 8px; border: 2px dashed #d1d5db;'>
-            <h3 style='color: #9ca3af; margin: 0;'>No preview available</h3>
-            <p style='margin: 8px 0 0 0;'>Convert a document to see the preview</p>
-        </div>
-        """
         return (
-            status,  # status_output
-            markdown_content,  # markdown_output
-            html_content,  # html_output
-            preview_content,  # html_preview
-            download_file,  # download_btn
-            gr.update(visible=show_download)  # download_section
         )
     process_btn.click(
         fn=process_and_update,
         inputs=[file_input],
-        outputs=[
-            status_output,
-            markdown_output,
-            html_output,
-            html_preview,
-            download_btn,
-            download_section
-        ]
     )
-    # Footer
     gr.Markdown(
-        """
-        <div style="text-align: center; padding: 20px 0; margin-top: 40px; border-top: 1px solid #e5e7eb; color: #6b7280; font-size: 14px;">
-            Powered by <a href="https://github.com/PaddlePaddle/PaddleOCR" target="_blank" style="color: #6366f1; text-decoration: none;">PaddleOCR</a>
-            for text extraction and <a href="https://huggingface.co/BAIDU" target="_blank" style="color: #6366f1; text-decoration: none;">ERNIE</a>
-            for HTML generation
-        </div>
-        """,
-        elem_id="footer"
     )
-    # Tips section
-    with gr.Accordion("💡 Tips for Best Results", open=False):
-        gr.Markdown("""
-        **File Types:** PDF, PNG, JPG, JPEG, BMP, TIFF
-        **For Best OCR Results:**
-        - Use high-resolution, clear images
-        - Ensure good contrast between text and background
-        - Avoid skewed or rotated documents
-        - PDFs generally produce the best results
-        **🚀 Deploy to GitHub Pages:**
-        1. Create a new GitHub repository or use an existing one
-        2. Download the generated HTML file from above
-        3. Upload it to your repository as `index.html`
-        4. Go to repository Settings → Pages
-        5. Select "Deploy from a branch" → Choose "main" branch
-        6. Your page will be live at `https://yourusername.github.io/yourrepository`
-        **💡 Pro Tips:**
-        - Enable custom domains in GitHub Pages settings
-        - Use GitHub Actions for automated deployments
-        - Consider using Jekyll themes for enhanced styling
-        """)
 if __name__ == "__main__":
     app.launch()

 import markdown
 from dotenv import load_dotenv
 from openai import OpenAI
+from urllib.parse import urlparse
 # Load environment variables from .env file
 load_dotenv()
+# API Configuration for PaddleOCR-VL
 API_URL = os.getenv("API_URL", "")
+TOKEN = os.getenv("TOKEN", "")
 class Doc2PageConverter:
                 base_url="https://qianfan.baidubce.com/v2",
                 api_key=self.qianfan_token
             )
+    def extract_text_with_vl_api(self, file_path: str) -> str:
+        if not API_URL:
+            raise ValueError("API_URL must be configured in .env file")
+        headers = {"Content-Type": "application/json"}
+        if TOKEN:
+            headers["Authorization"] = f"bearer {TOKEN}"
         try:
+            is_url = isinstance(file_path, str) and file_path.startswith(("http://", "https://"))
+            if is_url:
+                path = urlparse(file_path).path
+                ext = os.path.splitext(path)[1].lower()
             else:
+                ext = os.path.splitext(file_path)[1].lower()
+            if ext == '.pdf':
+                file_type = 0  # PDF 文件
+            elif ext in ['.png', '.jpg', '.jpeg', '.bmp', '.gif']:
+                file_type = 1  # 图片文件
+            else:
+                raise ValueError(f"不支持的文件类型: '{ext}'")
+            if is_url:
+                response = requests.get(file_path, timeout=60)
+                response.raise_for_status()
+                content = response.content
+            else:
+                with open(file_path, "rb") as f:
+                    content = f.read()
+            b64_content = base64.b64encode(content).decode("utf-8")
+        except Exception as e:
+            raise RuntimeError(f"读取和编码文件失败: {e}")
+        payload = {
+            "file": b64_content,
+            "fileType": file_type,
+            "useLayoutDetection": True,
+            "useDocUnwarping": False,
+            "useDocOrientationClassify": False,
+            "useChartRecognition": False,
+        }
+        try:
+            print(f"Sending PaddleOCR-VL API request to {API_URL}...")
+            response = requests.post(API_URL, json=payload, headers=headers, timeout=300)
             response.raise_for_status()
+            result_data = response.json()
         except requests.exceptions.RequestException as e:
+            raise RuntimeError(f"PaddleOCR-VL API request failed: {e}")
+        except json.JSONDecodeError:
+            raise RuntimeError(f"Invalid JSON response from VL API: {response.text}")
+        if result_data.get("errorCode", -1) != 0:
+            error_msg = result_data.get("errorMessage", "Unknown API error")
+            raise RuntimeError(f"PaddleOCR-VL API returned an error: {error_msg}")
+        layout_results = result_data.get("result", {}).get("layoutParsingResults", [])
+        if not layout_results:
             return ""
+        first_page_result = layout_results[0]
+    #    print(first_page_result.get("prunedResult"))
+        markdown_data = first_page_result.get("markdown", {})
+        full_markdown_text = markdown_data.get("text", "")
+        image_map = markdown_data.get("images", {})
+        if image_map:
+            for placeholder, real_url in image_map.items():
+                full_markdown_text = full_markdown_text.replace(f'src="{placeholder}"', f'src="{real_url}"')
+        return full_markdown_text
     def markdown_to_html_with_ernie(self, markdown_text: str) -> str:
+        """Convert markdown to HTML using ERNIE API. (No changes needed)"""
         if not self.client:
             return self.basic_markdown_to_html(markdown_text)
         try:
             prompt = f"""Please convert the following markdown text into a modern, clean HTML page. Use contemporary typography with the Inter font family and clean design principles. Make it visually appealing with proper CSS styling, responsive design, and excellent readability.
 Design requirements:
 - Use Inter font from Google Fonts
 - Clean, modern spacing and typography
 - Good color contrast and hierarchy
 - Responsive design that works on all devices
 - Include proper HTML structure with head, body, and semantic elements
+Important: Add a footer at the bottom with "Powered by PaddleOCR-VL and ERNIE" where PaddleOCR-VL links to https://github.com/PaddlePaddle/PaddleOCR and ERNIE links to https://huggingface.co/BAIDU. Style it with modern, subtle styling.
 Markdown content:
 {markdown_text}
 IMPORTANT: Return ONLY the raw HTML code starting with <!DOCTYPE html> and ending with </html>. Do NOT wrap it in markdown code blocks or add any explanations. I need the pure HTML content that can be directly saved as an .html file."""
             messages = [{"role": "user", "content": prompt}]
                 max_tokens=64000,
             )
+            html_content = response.choices[0].message.content.strip()
             if html_content.startswith('```html'):
+                html_content = html_content[7:]
             elif html_content.startswith('```'):
+                html_content = html_content[3:]
             if html_content.endswith('```'):
+                html_content = html_content[:-3]
+            return html_content.strip()
         except Exception as e:
             print(f"Error calling ERNIE API: {e}")
             return self.basic_markdown_to_html(markdown_text)
     def basic_markdown_to_html(self, markdown_text: str) -> str:
+        """Fallback markdown to HTML conversion. (No changes needed)"""
         html = markdown.markdown(markdown_text)
         complete_html = f"""
         <!DOCTYPE html>
         <html lang="en">
             <meta name="viewport" content="width=device-width, initial-scale=1.0">
             <title>Converted Document</title>
             <style>
                 @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');
                 body {{
+                    font-family: 'Inter', system-ui, sans-serif; line-height: 1.7; color: #1a1a1a;
+                    max-width: 850px; margin: 0 auto; padding: 32px 24px; background: #fafafa;
                 }}
                 .container {{
+                    background: #ffffff; padding: 48px; border-radius: 12px;
                     box-shadow: 0 1px 3px rgba(0,0,0,0.08), 0 4px 24px rgba(0,0,0,0.04);
                 }}
+                img {{ max-width: 100%; height: auto; border-radius: 8px; margin: 20px 0; }}
                 .footer {{
+                    margin-top: 64px; padding-top: 24px; border-top: 1px solid #e5e7eb;
+                    text-align: center; font-size: 14px; color: #6b7280;
                 }}
+                .footer a {{ color: #6366f1; text-decoration: none; }}
+                .footer a:hover {{ text-decoration: underline; }}
             </style>
         </head>
         <body>
             <div class="container">
                 {html}
                 <div class="footer">
+                    Powered by <a href="https://github.com/PaddlePaddle/PaddleOCR" target="_blank">PaddleOCR-VL</a> and
                     <a href="https://huggingface.co/BAIDU" target="_blank">ERNIE</a>
                 </div>
             </div>
     def process_document(self, file_path: str) -> Tuple[str, str]:
         """Process uploaded document and convert to HTML"""
         try:
+            markdown_content = self.extract_text_with_vl_api(file_path)
             if not markdown_content.strip():
+                return ("Warning: No text content extracted from the document."), ""
             html_content = self.markdown_to_html_with_ernie(markdown_content)
             return markdown_content, html_content
         except Exception as e:
             return f"Error processing document: {str(e)}", ""
+# --- Gradio UI and event handling logic (unchanged) ---
 converter = Doc2PageConverter()
 def process_upload(file):
     if file is None:
         return "Please upload a file.", "", ""
     try:
         markdown_result, html_result = converter.process_document(file.name)
         if html_result:
             return "Document processed successfully!", markdown_result, html_result
         else:
+            return markdown_result, "", ""
     except Exception as e:
         return f"Error: {str(e)}", "", ""
 def save_html_file(html_content, filename="converted_page"):
     if not html_content:
         return None
     temp_file = tempfile.NamedTemporaryFile(mode='w', suffix='.html', delete=False,
                                           prefix=f"{filename}_")
     temp_file.write(html_content)
     temp_file.close()
     return temp_file.name
 custom_theme = gr.themes.Default(
+    primary_hue="blue", secondary_hue="gray", neutral_hue="gray",
     font=("Inter", "system-ui", "sans-serif"),
 ).set(
+    body_background_fill="#fafafa", background_fill_primary="#ffffff",
+    border_color_primary="#e5e7eb", button_primary_background_fill="#6366f1",
+    button_primary_background_fill_hover="#4f46e5", button_primary_text_color="#ffffff",
 )
 with gr.Blocks(
+    title="Doc2Page - Document to Webpage Converter",
     theme=custom_theme,
+    css=".gradio-container { max-width: 1200px !important; margin: auto; }"
 ) as app:
+    gr.Markdown("# Doc2Page\n🥃 Transform your documents into beautiful webpages!")
     with gr.Row():
         with gr.Column(scale=1, min_width=350):
+            file_input = gr.File(
+                label="📄 Upload Document",
+                file_types=[".pdf", ".png", ".jpg", ".jpeg", ".bmp", ".tiff"],
+            )
+            process_btn = gr.Button("✨ Convert to Webpage", variant="primary")
+            status_output = gr.Textbox(label="Status", interactive=False)
         with gr.Column(scale=2):
             with gr.Tabs():
+                with gr.TabItem("❤️ Preview"):
+                    html_preview = gr.HTML(label="", value="<div style='text-align: center; color: #6b7280;'>Your converted webpage will appear here</div>")
+                with gr.TabItem("📝 Markdown Source"):
+                    markdown_output = gr.Textbox(label="", interactive=False, show_copy_button=True)
+                with gr.TabItem("🌐 HTML Source"):
+                    html_output = gr.Code(label="", language="html", interactive=False)
     with gr.Row(visible=False) as download_section:
+        gr.Markdown("### 📥 Download Your Webpage")
+        download_btn = gr.File(label="HTML File", visible=True)
     def process_and_update(file):
         status, markdown_content, html_content = process_upload(file)
         download_file = None
         show_download = False
         if html_content:
             filename = Path(file.name).stem if file else "converted_page"
             download_file = save_html_file(html_content, filename)
             show_download = True
+        preview_content = html_content or "<div style='text-align: center; color: #9ca3af;'>No preview available</div>"
         return (
+            status, markdown_content, html_content, preview_content,
+            download_file, gr.update(visible=show_download)
         )
     process_btn.click(
         fn=process_and_update,
         inputs=[file_input],
+        outputs=[status_output, markdown_output, html_output, html_preview, download_btn, download_section]
     )
     gr.Markdown(
+        """<div style="text-align: center; padding: 20px 0; color: #6b7280;">
+        Powered by <a href="https://github.com/PaddlePaddle/PaddleOCR" target="_blank">PaddleOCR-VL</a> &
+        <a href="https://huggingface.co/BAIDU" target="_blank">ERNIE</a>
+        </div>"""
     )
 if __name__ == "__main__":
     app.launch()