Spaces:

awacke1
/

Pillow-PyMuPDF-ReportLab

Sleeping

App Files Files Community

awacke1 commited on Mar 31

Commit

cd13cab

verified ·

1 Parent(s): 089f999

Update app.py

Browse files

Files changed (1) hide show

app.py +477 -50

app.py CHANGED Viewed

@@ -1,4 +1,130 @@
-import streamlit as st
 import base64
 from reportlab.lib.pagesizes import A4
 from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
@@ -13,7 +139,75 @@ from PIL import Image
 import io
 import os
-# Define the 12-point ML outline with emojis
 ml_outline = [
     "🌟 1. Mixture of Experts (MoE)",
     "🔥 2. Supervised Fine-Tuning (SFT) using PyTorch",
@@ -29,24 +223,93 @@ ml_outline = [
     "💻 12. ML Code Generation with Streamlit/Gradio/HTML5+JS"
 ]
 # Demo functions for PDF libraries
 def demo_pikepdf():
-    """Create a simple PDF with pikepdf"""
-    pdf = pikepdf.Pdf.new()
-    # Create a proper pikepdf Page object
-    page_dict = pikepdf.Dictionary(
-        Type=pikepdf.Name.Page,
-        MediaBox=[0, 0, 595, 842],
-        Contents=pdf.make_stream(b"BT /F1 12 Tf 100 700 Td (PikePDF Demo) Tj ET")
-    )
-    # Create a proper Pages dictionary
-    pages_dict = pikepdf.Dictionary(
-        Type=pikepdf.Name.Pages,
-        Count=1,
-        Kids=[pdf.make_indirect(page_dict)]
-    )
-    # Set the root
-    pdf.Root.Pages = pdf.make_indirect(pages_dict)
     # Save to buffer
     buffer = io.BytesIO()
     pdf.save(buffer)
@@ -54,21 +317,127 @@ def demo_pikepdf():
     return buffer.getvalue()
 def demo_fpdf():
-    """Create a simple PDF with fpdf"""
-    pdf = fpdf.FPDF()
     pdf.add_page()
-    pdf.set_font("Arial", size=12)
-    pdf.cell(200, 10, txt="FPDF Demo", ln=True)
     buffer = io.BytesIO()
     pdf.output(buffer)
     buffer.seek(0)
     return buffer.getvalue()
 def demo_pymupdf():
-    """Create a simple PDF with pymupdf"""
     doc = fitz.open()
-    page = doc.new_page()
-    page.insert_text((100, 100), "PyMuPDF Demo")
     buffer = io.BytesIO()
     doc.save(buffer)
     buffer.seek(0)
@@ -110,38 +479,96 @@ def demo_image_capture():
     return buffer.getvalue()
 # Main PDF creation using ReportLab
-def create_main_pdf(outline_items):
-    """Create a two-page landscape PDF with the outline split between pages"""
     buffer = io.BytesIO()
-    doc = SimpleDocTemplate(buffer, pagesize=(A4[1], A4[0]))  # Landscape
     styles = getSampleStyleSheet()
     story = []
-    # Title style
     title_style = styles['Heading1']
     title_style.textColor = colors.darkblue
-    # Normal style
-    normal_style = styles['Normal']
-    normal_style.fontSize = 12
-    normal_style.leading = 14
-    # Page 1: Items 1-6
-    story.append(Paragraph("Cutting-Edge ML Areas (1-6)", title_style))
-    story.append(Spacer(1, 12))
-    for item in outline_items[:6]:
-        story.append(Paragraph(item, normal_style))
-        story.append(Spacer(1, 6))
-    # Page break
-    story.append(Spacer(1, 500))  # Force new page
-    # Page 2: Items 7-12
-    story.append(Paragraph("Cutting-Edge ML Areas (7-12)", title_style))
-    story.append(Spacer(1, 12))
-    for item in outline_items[6:]:
-        story.append(Paragraph(item, normal_style))
-        story.append(Spacer(1, 6))
     doc.build(story)
     buffer.seek(0)

+# Streamlit UI
+st.title("🚀 Cutting-Edge ML Outline Generator")
+col1, col2 = st.columns(2)
+with col1:
+    st.header("📝 Markdown Outline")
+    # Display the markdown content
+    st.markdown(ml_markdown)
+    # Create a download button for the markdown file
+    st.download_button(
+        label="Download Markdown",
+        data=ml_markdown,
+        file_name="ml_outline.md",
+        mime="text/markdown"
+    )
+    # Show the markdown source code in an expandable section
+    with st.expander("View Markdown Source"):
+        st.code(ml_markdown, language="markdown")
+with col2:
+    st.header("📑 PDF Preview & Demos")
+    # Library Demos
+    st.subheader("Library Demos")
+    if st.button("Run PDF Library Demos"):
+        with st.spinner("Running demos..."):
+            # Create tabs for each demo
+            demo_tabs = st.tabs(["PikePDF", "FPDF", "PyMuPDF", "Image Demo"])
+            with demo_tabs[0]:
+                # pikepdf demo
+                pike_pdf = demo_pikepdf()
+                st.download_button("Download pikepdf Demo", pike_pdf, "pikepdf_demo.pdf")
+                st.write("PikePDF demo created successfully!")
+                st.info("This PDF contains the multilevel markdown outline in a two-column layout.")
+            with demo_tabs[1]:
+                # fpdf demo
+                fpdf_pdf = demo_fpdf()
+                st.download_button("Download fpdf Demo", fpdf_pdf, "fpdf_demo.pdf")
+                st.write("FPDF demo created successfully!")
+                st.info("This PDF contains the multilevel markdown outline in a two-column layout.")
+            with demo_tabs[2]:
+                # pymupdf demo
+                pymupdf_pdf = demo_pymupdf()
+                st.download_button("Download pymupdf Demo", pymupdf_pdf, "pymupdf_demo.pdf")
+                st.write("PyMuPDF demo created successfully!")
+                st.info("This PDF contains the multilevel markdown outline in a two-column layout.")
+            with demo_tabs[3]:
+                # Image demo
+                img_data = demo_image_capture()
+                st.image(img_data, caption="Demo Image (Camera simulation)")
+    # Main PDF Generation
+    st.subheader("Main Outline PDF")
+    if st.button("Generate Main PDF"):
+        with st.spinner("Generating PDF..."):
+            try:
+                pdf_bytes = create_main_pdf(ml_markdown)
+                st.download_button(
+                    label="Download Main PDF",
+                    data=pdf_bytes,
+                    file_name="ml_outline.pdf",
+                    mime="application/pdf"
+                )
+                # Display the PDF in the app
+                base64_pdf = base64.b64encode(pdf_bytes).decode('utf-8')
+                pdf_display = f'''
+                    <embed
+                        src="data:application/pdf;base64,{base64_pdf}"
+                        width="100%"
+                        height="400px"
+                        type="application/pdf">
+                '''
+                st.markdown(pdf_display, unsafe_allow_html=True)
+                st.success("PDF generated successfully! The PDF displays the multilevel markdown outline in a two-column layout.")
+            except Exception as e:
+                st.error(f"Error generating PDF: {str(e)}")
+    # Show the PDF rendering code in an expandable section
+    with st.expander("View PDF Rendering Code"):
+        st.code("""
+# Process multilevel markdown for PDF output
+def markdown_to_pdf_content(markdown_text):
+    # Convert markdown headers to styled text for PDF
+    lines = markdown_text.strip().split('\\n')
+    pdf_content = []
+    for line in lines:
+        if line.startswith('# '):
+            # Main header - will be handled separately
+            pass
+        elif line.startswith('## '):
+            # Section header - add as a bold item
+            section = line.replace('## ', '').strip()
+            pdf_content.append(f"<b>{section}</b>")
+        elif line.startswith('- '):
+            # List item - add as a normal item
+            item = line.replace('- ', '').strip()
+            pdf_content.append(item)
+    # Split the content for two columns
+    mid_point = len(pdf_content) // 2
+    left_column = pdf_content[:mid_point]
+    right_column = pdf_content[mid_point:]
+    return left_column, right_column
+        """, language="python")
+# Add custom CSS for better appearance
+st.markdown("""
+<style>
+    .stButton>button {
+        background-color: #4CAF50;
+        color: white;
+        font-weight: bold;
+    }
+    .stTabsimport streamlit as st
 import base64
 from reportlab.lib.pagesizes import A4
 from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
 import io
 import os
+# Define the ML outline as a markdown string for multilevel content
+ml_markdown = """# Cutting-Edge ML Outline
+## Core ML Techniques
+- 🌟 **1. Mixture of Experts (MoE)**
+  - Conditional computation techniques
+  - Sparse gating mechanisms
+  - Training specialized sub-models
+- 🔥 **2. Supervised Fine-Tuning (SFT) using PyTorch**
+  - Loss function customization
+  - Gradient accumulation strategies
+  - Learning rate schedulers
+- 🤖 **3. Large Language Models (LLM) using Transformers**
+  - Attention mechanisms
+  - Tokenization strategies
+  - Position encodings
+## Training Methods
+- 📊 **4. Self-Rewarding Learning using NPS 0-10 and Verbatims**
+  - Custom reward functions
+  - Feedback categorization
+  - Signal extraction from text
+- 👍 **5. Reinforcement Learning from Human Feedback (RLHF)**
+  - Preference datasets
+  - PPO implementation
+  - KL divergence constraints
+- 🔗 **6. MergeKit: Merging Models to Same Embedding Space**
+  - TIES merging
+  - Task arithmetic
+  - SLERP interpolation
+## Optimization & Deployment
+- 📏 **7. DistillKit: Model Size Reduction with Spectrum Analysis**
+  - Knowledge distillation
+  - Quantization techniques
+  - Model pruning strategies
+- 🧠 **8. Agentic RAG Agents using Document Inputs**
+  - Vector database integration
+  - Query planning
+  - Self-reflection mechanisms
+- ⏳ **9. Longitudinal Data Summarization from Multiple Docs**
+  - Multi-document compression
+  - Timeline extraction
+  - Entity tracking
+## Knowledge Representation
+- 📑 **10. Knowledge Extraction using Markdown Knowledge Graphs**
+  - Entity recognition
+  - Relationship mapping
+  - Hierarchical structuring
+- 🗺️ **11. Knowledge Mapping with Mermaid Diagrams**
+  - Flowchart generation
+  - Sequence diagram creation
+  - State diagrams
+- 💻 **12. ML Code Generation with Streamlit/Gradio/HTML5+JS**
+  - Code completion
+  - Unit test generation
+  - Documentation synthesis
+"""
+# For compatibility with previous code, also maintain the list version
 ml_outline = [
     "🌟 1. Mixture of Experts (MoE)",
     "🔥 2. Supervised Fine-Tuning (SFT) using PyTorch",
     "💻 12. ML Code Generation with Streamlit/Gradio/HTML5+JS"
 ]
+# Process multilevel markdown for PDF output
+def markdown_to_pdf_content(markdown_text):
+    """Convert markdown text to a format suitable for PDF generation"""
+    import re
+    # Convert markdown headers to styled text for PDF
+    lines = markdown_text.strip().split('\n')
+    pdf_content = []
+    for line in lines:
+        if line.startswith('# '):
+            # Main header - will be handled separately in the PDF generation
+            pass
+        elif line.startswith('## '):
+            # Section header - add as a bold item
+            section = line.replace('## ', '').strip()
+            pdf_content.append(f"<b>{section}</b>")
+        elif line.startswith('- '):
+            # List item - add as a normal item
+            item = line.replace('- ', '').strip()
+            pdf_content.append(item)
+        elif line.strip() == '':
+            # Add a small spacer for empty lines
+            pass
+    # Remove empty items
+    pdf_content = [item for item in pdf_content if item.strip()]
+    # Split the content for two columns
+    mid_point = len(pdf_content) // 2
+    left_column = pdf_content[:mid_point]
+    right_column = pdf_content[mid_point:]
+    return left_column, right_column
 # Demo functions for PDF libraries
 def demo_pikepdf():
+    """Create a two-column PDF with the markdown outline using pikepdf"""
+    # Process markdown content
+    left_column, right_column = markdown_to_pdf_content(ml_markdown)
+    # We'll use pymupdf (fitz) to create the content, then save with pikepdf
+    doc = fitz.open()
+    page = doc.new_page(width=842, height=595)  # A4 Landscape
+    # Set up fonts and colors
+    title_font = "helv-b"
+    section_font = "helv-b"
+    normal_font = "helv"
+    blue_color = (0, 0, 0.8)
+    black_color = (0, 0, 0)
+    # Add title
+    page.insert_text((50, 40), "Cutting-Edge ML Outline (PikePDF Demo)", fontname=title_font, fontsize=16, color=blue_color)
+    # First column
+    x1, y1 = 50, 80
+    for i, item in enumerate(left_column):
+        if item.startswith('<b>'):
+            # Section header
+            text = item.replace('<b>', '').replace('</b>', '')
+            page.insert_text((x1, y1 + i*25), text, fontname=section_font, fontsize=14, color=blue_color)
+        else:
+            # Normal item
+            page.insert_text((x1, y1 + i*25), item, fontname=normal_font, fontsize=11, color=black_color)
+    # Second column
+    x2, y2 = 450, 80
+    for i, item in enumerate(right_column):
+        if item.startswith('<b>'):
+            # Section header
+            text = item.replace('<b>', '').replace('</b>', '')
+            page.insert_text((x2, y2 + i*25), text, fontname=section_font, fontsize=14, color=blue_color)
+        else:
+            # Normal item
+            page.insert_text((x2, y2 + i*25), item, fontname=normal_font, fontsize=11, color=black_color)
+    # Draw a dividing line
+    page.draw_line((421, 70), (421, 550))
+    # Convert to pikepdf
+    temp_buffer = io.BytesIO()
+    doc.save(temp_buffer)
+    temp_buffer.seek(0)
+    pdf = pikepdf.Pdf.open(temp_buffer)
     # Save to buffer
     buffer = io.BytesIO()
     pdf.save(buffer)
     return buffer.getvalue()
 def demo_fpdf():
+    """Create a two-column PDF with the markdown outline using FPDF"""
+    # Process markdown content
+    left_column, right_column = markdown_to_pdf_content(ml_markdown)
+    pdf = fpdf.FPDF(orientation='L')  # Landscape
     pdf.add_page()
+    # Set title
+    pdf.set_font("Arial", 'B', size=16)
+    pdf.set_text_color(0, 0, 128)  # Dark blue
+    pdf.cell(0, 10, txt="Cutting-Edge ML Outline (FPDF Demo)", ln=True, align='C')
+    pdf.ln(10)
+    # First column
+    x_col1 = 20
+    y_start = pdf.get_y()
+    for item in left_column:
+        if item.startswith('<b>'):
+            # Section header
+            text = item.replace('<b>', '').replace('</b>', '')
+            pdf.set_font("Arial", 'B', size=14)
+            pdf.set_text_color(0, 0, 128)  # Dark blue
+        else:
+            # Normal item
+            text = item
+            pdf.set_font("Arial", size=11)
+            pdf.set_text_color(0, 0, 0)  # Black
+        pdf.set_x(x_col1)
+        pdf.multi_cell(180, 10, txt=text, align='L')
+        pdf.ln(2)
+    # Second column
+    x_col2 = pdf.w / 2 + 10
+    pdf.set_y(y_start)
+    for item in right_column:
+        if item.startswith('<b>'):
+            # Section header
+            text = item.replace('<b>', '').replace('</b>', '')
+            pdf.set_font("Arial", 'B', size=14)
+            pdf.set_text_color(0, 0, 128)  # Dark blue
+        else:
+            # Normal item
+            text = item
+            pdf.set_font("Arial", size=11)
+            pdf.set_text_color(0, 0, 0)  # Black
+        pdf.set_x(x_col2)
+        pdf.multi_cell(180, 10, txt=text, align='L')
+        pdf.ln(2)
+    # Draw a dividing line
+    pdf.line(pdf.w/2, 30, pdf.w/2, 280)
     buffer = io.BytesIO()
     pdf.output(buffer)
     buffer.seek(0)
     return buffer.getvalue()
 def demo_pymupdf():
+    """Create a two-column PDF with the markdown outline using PyMuPDF"""
+    # Process markdown content
+    left_column, right_column = markdown_to_pdf_content(ml_markdown)
     doc = fitz.open()
+    page = doc.new_page(width=842, height=595)  # A4 Landscape
+    # Set up fonts and colors
+    title_font = "helv-b"
+    section_font = "helv-b"
+    normal_font = "helv"
+    blue_color = (0, 0, 0.8)
+    black_color = (0, 0, 0)
+    # Add title
+    page.insert_text((300, 40), "Cutting-Edge ML Outline (PyMuPDF Demo)", fontname=title_font, fontsize=16, color=blue_color)
+    # First column
+    x1, y1 = 50, 80
+    line_height = 25
+    current_y = y1
+    for item in left_column:
+        if item.startswith('<b>'):
+            # Add extra space before sections (except the first one)
+            if current_y > y1:
+                current_y += 10
+            # Section header
+            text = item.replace('<b>', '').replace('</b>', '')
+            page.insert_text((x1, current_y), text, fontname=section_font, fontsize=14, color=blue_color)
+        else:
+            # Normal item
+            page.insert_text((x1 + 10, current_y), item, fontname=normal_font, fontsize=11, color=black_color)
+        current_y += line_height
+    # Second column
+    x2, y2 = 450, 80
+    current_y = y2
+    for item in right_column:
+        if item.startswith('<b>'):
+            # Add extra space before sections (except the first one)
+            if current_y > y2:
+                current_y += 10
+            # Section header
+            text = item.replace('<b>', '').replace('</b>', '')
+            page.insert_text((x2, current_y), text, fontname=section_font, fontsize=14, color=blue_color)
+        else:
+            # Normal item
+            page.insert_text((x2 + 10, current_y), item, fontname=normal_font, fontsize=11, color=black_color)
+        current_y += line_height
+    # Draw a dividing line
+    page.draw_line((421, 70), (421, 550))
     buffer = io.BytesIO()
     doc.save(buffer)
     buffer.seek(0)
     return buffer.getvalue()
 # Main PDF creation using ReportLab
+def create_main_pdf(markdown_text):
+    """Create a single-page landscape PDF with the outline in two columns"""
+    from reportlab.platypus import Table, TableStyle, Paragraph, Spacer
+    from reportlab.lib import pagesizes
+    from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
+    # Process markdown content
+    left_column, right_column = markdown_to_pdf_content(markdown_text)
     buffer = io.BytesIO()
+    doc = SimpleDocTemplate(
+        buffer,
+        pagesize=(A4[1], A4[0]),  # Landscape
+        leftMargin=50,
+        rightMargin=50,
+        topMargin=50,
+        bottomMargin=50
+    )
     styles = getSampleStyleSheet()
     story = []
+    # Create custom styles
     title_style = styles['Heading1']
     title_style.textColor = colors.darkblue
+    title_style.alignment = 1  # Center alignment
+    section_style = ParagraphStyle(
+        'SectionStyle',
+        parent=styles['Heading2'],
+        textColor=colors.darkblue,
+        spaceAfter=6
+    )
+    item_style = ParagraphStyle(
+        'ItemStyle',
+        parent=styles['Normal'],
+        fontSize=11,
+        leading=14,
+        leftIndent=10
+    )
+    # Add title
+    story.append(Paragraph("Cutting-Edge ML Outline (ReportLab)", title_style))
+    story.append(Spacer(1, 20))
+    # Prepare data for table
+    table_data = []
+    max_rows = max(len(left_column), len(right_column))
+    # Process left and right columns into paragraphs
+    left_paragraphs = []
+    for item in left_column:
+        if item.startswith('<b>'):
+            text = item.replace('<b>', '').replace('</b>', '')
+            left_paragraphs.append(Paragraph(text, section_style))
+        else:
+            left_paragraphs.append(Paragraph(item, item_style))
+    right_paragraphs = []
+    for item in right_column:
+        if item.startswith('<b>'):
+            text = item.replace('<b>', '').replace('</b>', '')
+            right_paragraphs.append(Paragraph(text, section_style))
+        else:
+            right_paragraphs.append(Paragraph(item, item_style))
+    # Create one row per paragraph
+    for i in range(max_rows):
+        left_p = left_paragraphs[i] if i < len(left_paragraphs) else ""
+        right_p = right_paragraphs[i] if i < len(right_paragraphs) else ""
+        table_data.append([left_p, right_p])
+    # Calculate column widths
+    col_width = (A4[1] - 120) / 2.0  # Page width minus margins divided by 2
+    # Create the table with the data
+    table = Table(table_data, colWidths=[col_width, col_width])
+    # Style the table
+    table.setStyle(TableStyle([
+        ('VALIGN', (0, 0), (-1, -1), 'TOP'),
+        ('ALIGN', (0, 0), (0, -1), 'LEFT'),
+        ('ALIGN', (1, 0), (1, -1), 'LEFT'),
+        ('BACKGROUND', (0, 0), (-1, -1), colors.white),
+        ('GRID', (0, 0), (-1, -1), 0.5, colors.white),
+        ('LINEAFTER', (0, 0), (0, -1), 1, colors.grey),
+    ]))
+    story.append(table)
     doc.build(story)
     buffer.seek(0)