Spaces:

lexicalspace
/

Blogger-Toolkit

Paused

App Files Files Community

lexicalspace commited on 21 days ago

Commit

569e7a5

verified ·

1 Parent(s): df96de6

Update app.py

Browse files

Files changed (1) hide show

app.py +138 -142

app.py CHANGED Viewed

@@ -32,19 +32,24 @@ from io import BytesIO
 def run_ultimate_pdf_converter():
     """
-    The Ultimate Text-to-PDF Converter.
-    Contains ~55 features grouped into:
-    1. Smart Typography (Symbols, Quotes)
-    2. Markdown Engine (Headers, Tables, Code Blocks, Lists)
-    3. Media Handler (Images, Links)
-    4. Layout Engine (Margins, Orientation, Fonts)
-    5. LMS Sanitizer (Cleaning junk text)
     """
-    # --- CONSTANTS & CONFIG ---
-    # Feature Group 1: Smart Symbol Map (20+ symbols)
     SMART_SYMBOLS = {
         r'<->': '↔', r'->': '→', r'<-': '←', r'=>': '⇒', r'<=': '≤', r'>=': '≥', r'!=': '≠',
         r'\.\.\.': '…', r'\(c\)': '©', r'\(r\)': '®', r'\(tm\)': '™',
@@ -54,124 +59,131 @@ def run_ultimate_pdf_converter():
         r'deg': '°', r'infinity': '∞', r'sqrt': '√'
     }
-    # --- INTERNAL CLASS: PDF GENERATOR ---
     class UltimatePDF(FPDF):
-        def __init__(self, orientation='P', unit='mm', format='A4', font_cache_dir="."):
             super().__init__(orientation=orientation, unit=unit, format=format)
-            self.font_cache_dir = font_cache_dir
-            self.ensure_fonts()
             self.set_auto_page_break(auto=True, margin=15)
         def ensure_fonts(self):
-            # Feature: Auto-download Unicode Font
-            font_path = os.path.join(self.font_cache_dir, "DejaVuSans.ttf")
             font_url = "https://github.com/dejavu-fonts/dejavu-fonts/raw/master/ttf/DejaVuSans.ttf"
-            if not os.path.exists(font_path):
-                try:
-                    r = requests.get(font_url, timeout=10)
-                    with open(font_path, "wb") as f:
-                        f.write(r.content)
-                except:
-                    pass # Fallback handled later
-            if os.path.exists(font_path):
-                self.add_font('DejaVu', '', font_path, uni=True)
-                self.add_font('DejaVu', 'B', font_path, uni=True) # Bold attempt
-                self.main_font = 'DejaVu'
-            else:
-                self.main_font = 'Arial'
         def header(self):
-            # Feature: Automatic Header with Date/Page
             if getattr(self, 'show_header', False):
                 self.set_font(self.main_font, '', 8)
                 self.set_text_color(128)
-                self.cell(0, 10, f'Generated by Ultimate PDF | {self.title_meta}', 0, 0, 'R')
                 self.ln(10)
         def footer(self):
-            # Feature: Page Numbering
             self.set_y(-15)
             self.set_font(self.main_font, '', 8)
             self.set_text_color(128)
             self.cell(0, 10, f'Page {self.page_no()}', 0, 0, 'C')
         def add_markdown_header(self, text, level):
-            # Feature: Dynamic Header Sizes (H1, H2, H3)
-            sizes = {1: 24, 2: 18, 3: 14}
             self.set_font(self.main_font, '', sizes.get(level, 12))
-            self.set_text_color(0, 50, 100) # Navy Blue
             self.cell(0, 10, text, ln=True)
-            self.set_text_color(0) # Reset
             self.set_font(self.main_font, '', 12)
         def add_code_block(self, code_lines):
-            # Feature: Code Block Formatting (Gray background, Monospace)
             self.set_font("Courier", size=10)
-            self.set_fill_color(240, 240, 240) # Light Gray
             for line in code_lines:
-                self.cell(0, 6, line, ln=True, fill=True, border=0)
-            self.set_font(self.main_font, '', 12) # Reset
-            self.ln(2)
         def add_table(self, table_lines):
-            # Feature: ASCII Table Parsing (Lines with |)
             self.set_font(self.main_font, '', 10)
-            cell_height = 8
             for row in table_lines:
                 cols = [c.strip() for c in row.split('|') if c.strip()]
                 if not cols: continue
-                # Dynamic width calculation
-                col_width = (self.w - 30) // len(cols)
                 for col in cols:
-                    self.cell(col_width, cell_height, col, border=1)
                 self.ln()
-            self.set_font(self.main_font, '', 12) # Reset
             self.ln(5)
         def add_blockquote(self, text):
-            # Feature: Blockquotes (Indented, Italic)
-            self.set_font(self.main_font, '', 12)
-            self.set_text_color(100)
-            self.set_x(self.l_margin + 10) # Indent
-            self.multi_cell(0, 8, f"“ {text} ”")
-            self.set_x(self.l_margin) # Reset
             self.set_text_color(0)
             self.ln(2)
         def add_image_from_url(self, url):
-            # Feature: Image Embedding
             try:
                 r = requests.get(url, timeout=5)
                 if r.status_code == 200:
                     img_data = BytesIO(r.content)
-                    self.image(img_data, w=100) # Width 100mm
                     self.ln(5)
-                else:
-                    self.set_text_color(255, 0, 0)
-                    self.cell(0, 10, f"[Image Error: {url}]", ln=True)
             except:
-                self.set_text_color(255, 0, 0)
-                self.cell(0, 10, f"[Invalid URL]", ln=True)
-            self.set_text_color(0)
-    # --- LOGIC: TEXT PROCESSOR ---
     def clean_and_parse(raw_text, use_smart_symbols=True, clean_lms=True):
         processed_lines = []
-        # 1. LMS CLEANING
         if clean_lms:
-            # Feature: Remove ID tags [ID:123]
-            raw_text = re.sub(r'\[ID:?\s*\w+\]', '', raw_text, flags=re.IGNORECASE)
-            # Feature: Remove Point values (1 pts)
-            raw_text = re.sub(r'\(\d+\s*pts?\)', '', raw_text, flags=re.IGNORECASE)
-            # Feature: Remove "Select one:" instructions
-            raw_text = raw_text.replace("Select one:", "")
-            # Feature: Remove excessive newlines
-            raw_text = re.sub(r'\n{3,}', '\n\n', raw_text)
-        # 2. SMART SYMBOLS
         if use_smart_symbols:
             for pattern, symbol in SMART_SYMBOLS.items():
                 if pattern.isalpha():
@@ -181,114 +193,109 @@ def run_ultimate_pdf_converter():
         lines = raw_text.split('\n')
-        # 3. STRUCTURE PARSING (Block detection)
-        buffer_type = None # 'code', 'table'
         buffer_content = []
         for line in lines:
             line_stripped = line.strip()
-            # A. CODE BLOCKS
             if line_stripped.startswith('```'):
-                if buffer_type == 'code': # End of code block
                     processed_lines.append({'type': 'code', 'content': buffer_content})
                     buffer_content = []
                     buffer_type = None
-                else: # Start of code block
-                    if buffer_type == 'table': # Flush table if open
                         processed_lines.append({'type': 'table', 'content': buffer_content})
                         buffer_content = []
                     buffer_type = 'code'
                 continue
             if buffer_type == 'code':
-                buffer_content.append(line)
                 continue
-            # B. TABLES (Lines containing |)
             if '|' in line_stripped and len(line_stripped) > 3:
                 if buffer_type != 'table':
                     buffer_type = 'table'
                 buffer_content.append(line_stripped)
                 continue
-            elif buffer_type == 'table': # End of table
                 processed_lines.append({'type': 'table', 'content': buffer_content})
                 buffer_content = []
                 buffer_type = None
-            # C. HEADERS
             if line_stripped.startswith('#'):
                 level = line_stripped.count('#')
                 text = line_stripped.replace('#', '').strip()
                 processed_lines.append({'type': 'header', 'level': min(level, 3), 'content': text})
                 continue
-            # D. BLOCKQUOTES
             if line_stripped.startswith('> '):
                 processed_lines.append({'type': 'quote', 'content': line_stripped[2:]})
                 continue
-            # E. IMAGES
             if line_stripped.startswith('http') and line_stripped.lower().endswith(('.png', '.jpg', '.jpeg', '.webp')):
                 processed_lines.append({'type': 'image', 'url': line_stripped})
                 continue
-            # F. LISTS
-            if line_stripped.startswith('* ') or line_stripped.startswith('- '):
                  processed_lines.append({'type': 'list', 'content': line_stripped[2:]})
                  continue
-            # G. HORIZONTAL RULE
             if line_stripped == '---':
                  processed_lines.append({'type': 'hr'})
                  continue
-            # H. STANDARD TEXT
             if line_stripped:
                 processed_lines.append({'type': 'text', 'content': line_stripped})
             else:
                  processed_lines.append({'type': 'empty'})
-        # Flush buffers
-        if buffer_type == 'table': processed_lines.append({'type': 'table', 'content': buffer_content})
         return processed_lines
-    # --- UI: STREAMLIT APP ---
     st.title("⚡ Ultimate PDF Engine")
-    st.markdown("""
-    <style>
-        .reportview-container { background: #f0f2f6; }
-    </style>
-    """, unsafe_allow_html=True)
     with st.sidebar:
-        st.header("⚙️ 55+ Features Control")
-        # Group 1: Output Settings
-        filename = st.text_input("Filename", "Ultimate_Notes.pdf")
-        orientation = st.radio("Orientation", ["Portrait", "Landscape"], index=0)
-        # Group 2: Features Toggle
-        st.subheader("Processing")
-        enable_lms = st.checkbox("LMS Cleaner (Regex)", True)
-        enable_smart = st.checkbox("Smart Symbols (α, →)", True)
-        enable_header = st.checkbox("Add Header/Footer", True)
-        # Group 3: Style
-        st.subheader("Styling")
-        font_size = st.slider("Base Font Size", 8, 20, 12)
-    # Main Input
-    raw_input = st.text_area("Paste Content (Supports Markdown, Tables, Links, Images):", height=400)
     if st.button("🚀 Generate PDF", type="primary"):
-        if not raw_input:
-            st.error("Input is empty!")
             return
-        with st.spinner("Engaging 55 features... Parsing blocks... Rendering..."):
-            # 1. Init PDF
             orient_code = 'P' if orientation == "Portrait" else 'L'
             pdf = UltimatePDF(orientation=orient_code)
             pdf.title_meta = filename.replace('.pdf', '')
@@ -297,60 +304,49 @@ def run_ultimate_pdf_converter():
             pdf.add_page()
             pdf.set_font(pdf.main_font, '', font_size)
-            # 2. Parse Content
             blocks = clean_and_parse(raw_input, use_smart_symbols=enable_smart, clean_lms=enable_lms)
-            # 3. Render Blocks
             for block in blocks:
                 if block['type'] == 'header':
                     pdf.add_markdown_header(block['content'], block['level'])
                 elif block['type'] == 'code':
                     pdf.add_code_block(block['content'])
                 elif block['type'] == 'table':
                     pdf.add_table(block['content'])
                 elif block['type'] == 'quote':
                     pdf.add_blockquote(block['content'])
                 elif block['type'] == 'image':
                     pdf.add_image_from_url(block['url'])
                 elif block['type'] == 'list':
                     pdf.set_x(pdf.l_margin + 5)
                     pdf.write(8, f"• {block['content']}")
                     pdf.ln()
                     pdf.set_x(pdf.l_margin)
                 elif block['type'] == 'hr':
-                    pdf.ln(5)
                     pdf.line(pdf.l_margin, pdf.get_y(), pdf.w - pdf.r_margin, pdf.get_y())
                     pdf.ln(5)
                 elif block['type'] == 'text':
                     pdf.write(8, block['content'])
                     pdf.ln()
                 elif block['type'] == 'empty':
                     pdf.ln(4)
-            # 4. Output
-            # encode to latin-1 with 'ignore' is a fallback for st.download,
-            # but FPDF2 'S' output is actually a string that needs encoding.
-            # Better to use output(dest='S').encode('latin-1')
-            pdf_bytes = pdf.output(dest='S').encode('latin-1', 'replace')
-            col1, col2 = st.columns([3,1])
-            with col1:
-                st.success(f"Processed {len(blocks)} blocks successfully.")
-            with col2:
                 st.download_button(
-                    "⬇️ Download",
                     data=pdf_bytes,
                     file_name=filename if filename.endswith('.pdf') else f"{filename}.pdf",
                     mime="application/pdf"
                 )

+import streamlit as st
+from fpdf import FPDF
+import requests
+import re
+import os
+from io import BytesIO
 def run_ultimate_pdf_converter():
     """
+    The Ultimate Text-to-PDF Converter (Stable Version).
+    Features:
+    - Auto-Healing Font Loader (Fixes TTLibError)
+    - Smart Symbols & Typography
+    - Markdown Engine (Headers, Tables, Code Blocks)
+    - LMS Junk Cleaner
     """
+    # --- CONSTANTS ---
     SMART_SYMBOLS = {
         r'<->': '↔', r'->': '→', r'<-': '←', r'=>': '⇒', r'<=': '≤', r'>=': '≥', r'!=': '≠',
         r'\.\.\.': '…', r'\(c\)': '©', r'\(r\)': '®', r'\(tm\)': '™',
         r'deg': '°', r'infinity': '∞', r'sqrt': '√'
     }
+    # --- INTERNAL PDF CLASS ---
     class UltimatePDF(FPDF):
+        def __init__(self, orientation='P', unit='mm', format='A4'):
             super().__init__(orientation=orientation, unit=unit, format=format)
             self.set_auto_page_break(auto=True, margin=15)
+            self.main_font = 'Arial' # Default fallback
+            self.ensure_fonts()
         def ensure_fonts(self):
+            font_filename = "DejaVuSans.ttf"
             font_url = "https://github.com/dejavu-fonts/dejavu-fonts/raw/master/ttf/DejaVuSans.ttf"
+            # 1. Check if file exists and is valid size (HTML error pages are small)
+            if os.path.exists(font_filename):
+                if os.path.getsize(font_filename) < 1000: # Less than 1KB is definitely garbage
+                    os.remove(font_filename)
+            # 2. Download if missing
+            if not os.path.exists(font_filename):
+                try:
+                    # Fake user-agent to avoid GitHub blocking scripts
+                    headers = {'User-Agent': 'Mozilla/5.0'}
+                    r = requests.get(font_url, headers=headers, timeout=10)
+                    if r.status_code == 200:
+                        with open(font_filename, "wb") as f:
+                            f.write(r.content)
+                except Exception as e:
+                    print(f"Font download failed: {e}")
+            # 3. Try Loading the Font
+            try:
+                if os.path.exists(font_filename):
+                    self.add_font('DejaVu', '', font_filename, uni=True)
+                    self.main_font = 'DejaVu'
+            except Exception:
+                # If loading fails (corrupt file), delete it to retry next time
+                try: os.remove(font_filename)
+                except: pass
+                self.main_font = 'Arial' # Fallback to standard
+                st.toast("⚠️ Font failed to load. Using standard font (some symbols may be missing).", icon="⚠️")
         def header(self):
             if getattr(self, 'show_header', False):
                 self.set_font(self.main_font, '', 8)
                 self.set_text_color(128)
+                self.cell(0, 10, f'Generated by Ultimate PDF | {getattr(self, "title_meta", "Doc")}', 0, 0, 'R')
                 self.ln(10)
         def footer(self):
             self.set_y(-15)
             self.set_font(self.main_font, '', 8)
             self.set_text_color(128)
             self.cell(0, 10, f'Page {self.page_no()}', 0, 0, 'C')
+        # --- MARKDOWN RENDERING HELPERS ---
         def add_markdown_header(self, text, level):
+            sizes = {1: 20, 2: 16, 3: 14}
             self.set_font(self.main_font, '', sizes.get(level, 12))
+            self.set_text_color(0, 50, 100)
             self.cell(0, 10, text, ln=True)
+            self.set_text_color(0)
             self.set_font(self.main_font, '', 12)
         def add_code_block(self, code_lines):
             self.set_font("Courier", size=10)
+            self.set_fill_color(245, 245, 245)
             for line in code_lines:
+                # Replace tabs with spaces to prevent alignment issues
+                safe_line = line.replace('\t', '    ')
+                self.cell(0, 5, safe_line, ln=True, fill=True, border=0)
+            self.set_font(self.main_font, '', 12)
+            self.ln(3)
         def add_table(self, table_lines):
             self.set_font(self.main_font, '', 10)
+            cell_h = 7
             for row in table_lines:
                 cols = [c.strip() for c in row.split('|') if c.strip()]
                 if not cols: continue
+                col_w = (self.w - 30) // len(cols)
                 for col in cols:
+                    self.cell(col_w, cell_h, col, border=1)
                 self.ln()
+            self.set_font(self.main_font, '', 12)
             self.ln(5)
         def add_blockquote(self, text):
+            self.set_text_color(80)
+            self.set_x(self.l_margin + 8)
+            self.multi_cell(0, 6, f"“ {text}")
+            self.set_x(self.l_margin)
             self.set_text_color(0)
             self.ln(2)
         def add_image_from_url(self, url):
             try:
                 r = requests.get(url, timeout=5)
                 if r.status_code == 200:
                     img_data = BytesIO(r.content)
+                    self.image(img_data, w=100)
                     self.ln(5)
             except:
+                self.set_text_color(200, 0, 0)
+                self.cell(0, 10, f"[Image load failed: {url}]", ln=True)
+                self.set_text_color(0)
+    # --- TEXT PROCESSOR ---
     def clean_and_parse(raw_text, use_smart_symbols=True, clean_lms=True):
         processed_lines = []
+        # 1. LMS Regex Cleaning
         if clean_lms:
+            # Common LMS patterns
+            patterns = [
+                r'\[ID:?\s*\w+\]',             # [ID: 123]
+                r'Question\s+ID\s*[:\-]\s*\w+', # Question ID: 123
+                r'\(\d+\s*pts?\)',              # (1 pts)
+                r'Select one:',                 # Moodle/Blackboard prompt
+                r'\[\d{1,2}:\d{2}\s*(AM|PM)?\]' # Timestamps
+            ]
+            for p in patterns:
+                raw_text = re.sub(p, '', raw_text, flags=re.IGNORECASE)
+            raw_text = re.sub(r'\n{3,}', '\n\n', raw_text) # Fix spacing
+        # 2. Smart Symbols
         if use_smart_symbols:
             for pattern, symbol in SMART_SYMBOLS.items():
                 if pattern.isalpha():
         lines = raw_text.split('\n')
+        # 3. Block Parser
+        buffer_type = None
         buffer_content = []
         for line in lines:
             line_stripped = line.strip()
+            # Detect Code Block
             if line_stripped.startswith('```'):
+                if buffer_type == 'code': # Close code
                     processed_lines.append({'type': 'code', 'content': buffer_content})
                     buffer_content = []
                     buffer_type = None
+                else: # Open code
+                    if buffer_type == 'table': # Close table if open
                         processed_lines.append({'type': 'table', 'content': buffer_content})
                         buffer_content = []
                     buffer_type = 'code'
                 continue
             if buffer_type == 'code':
+                buffer_content.append(line) # Preserve whitespace in code
                 continue
+            # Detect Table
             if '|' in line_stripped and len(line_stripped) > 3:
                 if buffer_type != 'table':
                     buffer_type = 'table'
                 buffer_content.append(line_stripped)
                 continue
+            elif buffer_type == 'table': # Close table
                 processed_lines.append({'type': 'table', 'content': buffer_content})
                 buffer_content = []
                 buffer_type = None
+            # Detect Headers
             if line_stripped.startswith('#'):
                 level = line_stripped.count('#')
                 text = line_stripped.replace('#', '').strip()
                 processed_lines.append({'type': 'header', 'level': min(level, 3), 'content': text})
                 continue
+            # Detect Quotes
             if line_stripped.startswith('> '):
                 processed_lines.append({'type': 'quote', 'content': line_stripped[2:]})
                 continue
+            # Detect Images
             if line_stripped.startswith('http') and line_stripped.lower().endswith(('.png', '.jpg', '.jpeg', '.webp')):
                 processed_lines.append({'type': 'image', 'url': line_stripped})
                 continue
+            # Detect Lists
+            if line_stripped.startswith(('* ', '- ')):
                  processed_lines.append({'type': 'list', 'content': line_stripped[2:]})
                  continue
+            # Detect HR
             if line_stripped == '---':
                  processed_lines.append({'type': 'hr'})
                  continue
             if line_stripped:
                 processed_lines.append({'type': 'text', 'content': line_stripped})
             else:
                  processed_lines.append({'type': 'empty'})
+        if buffer_type == 'table':
+            processed_lines.append({'type': 'table', 'content': buffer_content})
         return processed_lines
+    # --- UI RENDER ---
     st.title("⚡ Ultimate PDF Engine")
+    with st.expander("ℹ️ Help & Features", expanded=False):
+        st.write("- **Smart Symbols:** Writes 'alpha' as α, '->' as →")
+        st.write("- **Tables:** Use `| Name | Score |` format")
+        st.write("- **Code:** Use ` ``` ` for code blocks")
+        st.write("- **Images:** Paste URL on new line")
+    # Settings Sidebar
     with st.sidebar:
+        st.header("⚙️ PDF Config")
+        filename = st.text_input("Filename", "My_Notes.pdf")
+        orientation = st.radio("Orientation", ["Portrait", "Landscape"])
+        st.subheader("Filters")
+        enable_lms = st.checkbox("Clean LMS Junk", True)
+        enable_smart = st.checkbox("Smart Symbols", True)
+        enable_header = st.checkbox("Show Header", True)
+        font_size = st.slider("Font Size", 8, 24, 12)
+    # Input
+    raw_input = st.text_area("Paste text here...", height=350)
+    # Action
     if st.button("🚀 Generate PDF", type="primary"):
+        if not raw_input.strip():
+            st.warning("Input is empty.")
             return
+        with st.spinner("Processing..."):
+            # Setup PDF
             orient_code = 'P' if orientation == "Portrait" else 'L'
             pdf = UltimatePDF(orientation=orient_code)
             pdf.title_meta = filename.replace('.pdf', '')
             pdf.add_page()
             pdf.set_font(pdf.main_font, '', font_size)
+            # Process
             blocks = clean_and_parse(raw_input, use_smart_symbols=enable_smart, clean_lms=enable_lms)
+            # Render
             for block in blocks:
                 if block['type'] == 'header':
                     pdf.add_markdown_header(block['content'], block['level'])
                 elif block['type'] == 'code':
                     pdf.add_code_block(block['content'])
                 elif block['type'] == 'table':
                     pdf.add_table(block['content'])
                 elif block['type'] == 'quote':
                     pdf.add_blockquote(block['content'])
                 elif block['type'] == 'image':
                     pdf.add_image_from_url(block['url'])
                 elif block['type'] == 'list':
                     pdf.set_x(pdf.l_margin + 5)
                     pdf.write(8, f"• {block['content']}")
                     pdf.ln()
                     pdf.set_x(pdf.l_margin)
                 elif block['type'] == 'hr':
+                    pdf.ln(2)
                     pdf.line(pdf.l_margin, pdf.get_y(), pdf.w - pdf.r_margin, pdf.get_y())
                     pdf.ln(5)
                 elif block['type'] == 'text':
                     pdf.write(8, block['content'])
                     pdf.ln()
                 elif block['type'] == 'empty':
                     pdf.ln(4)
+            # Download
+            try:
+                pdf_bytes = pdf.output(dest='S').encode('latin-1', 'replace')
+                st.success("PDF Generated Successfully!")
                 st.download_button(
+                    "⬇️ Download PDF",
                     data=pdf_bytes,
                     file_name=filename if filename.endswith('.pdf') else f"{filename}.pdf",
                     mime="application/pdf"
                 )
+            except Exception as e:
+                st.error(f"Error creating PDF file: {e}")