Spaces:

awacke1
/

SeleniumPDF

Build error

App Files Files Community

awacke1 commited on Jan 14

Commit

25d433d

verified ·

1 Parent(s): a3ea8b1

Create app.py

Browse files

Files changed (1) hide show

app.py +110 -0

app.py ADDED Viewed

	@@ -0,0 +1,110 @@

+import streamlit as st
+from selenium import webdriver
+from selenium.webdriver.chrome.service import Service
+from selenium.webdriver.chrome.options import Options
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+import pdfkit
+import time
+import os
+from pathlib import Path
+class StreamlitPDFCapture:
+    def __init__(self):
+        self.options = {
+            'page-size': 'A4',
+            'margin-top': '0mm',
+            'margin-right': '0mm',
+            'margin-bottom': '0mm',
+            'margin-left': '0mm',
+            'encoding': 'UTF-8',
+            'custom-header': [('Accept-Encoding', 'gzip')],
+            'no-outline': None,
+            'enable-local-file-access': None
+        }
+    def setup_chrome_driver(self):
+        """Setup Chrome driver with appropriate options."""
+        chrome_options = Options()
+        chrome_options.add_argument("--headless")  # Run in headless mode
+        chrome_options.add_argument("--window-size=1920,1080")
+        chrome_options.add_argument("--disable-gpu")
+        chrome_options.add_argument("--no-sandbox")
+        return webdriver.Chrome(options=chrome_options)
+    def capture_page(self, url, output_path, wait_time=5):
+        """Capture a single Streamlit page as PDF."""
+        driver = self.setup_chrome_driver()
+        try:
+            # Navigate to page
+            driver.get(url)
+            time.sleep(wait_time)  # Wait for page to fully load
+            # Get page height and set window size
+            height = driver.execute_script("return document.body.scrollHeight")
+            driver.set_window_size(1920, height + 100)
+            # Save as PDF using pdfkit
+            html_content = driver.page_source
+            pdfkit.from_string(html_content, output_path, options=self.options)
+        finally:
+            driver.quit()
+    def capture_multiple_pages(self, urls, output_dir, prefix="page"):
+        """Capture multiple Streamlit pages as separate PDFs."""
+        Path(output_dir).mkdir(parents=True, exist_ok=True)
+        pdfs = []
+        for i, url in enumerate(urls):
+            output_path = os.path.join(output_dir, f"{prefix}_{i+1}.pdf")
+            self.capture_page(url, output_path)
+            pdfs.append(output_path)
+        return pdfs
+def add_pdf_download_button():
+    """Add a PDF download button to your Streamlit app."""
+    if st.button("📑 Download as PDF"):
+        with st.spinner("Generating PDF..."):
+            # Get current page URL
+            ctx = st.runtime.get_instance()
+            url = ctx.serverAddress if hasattr(ctx, 'serverAddress') else "http://localhost:8501"
+            # Initialize capture utility
+            pdf_capture = StreamlitPDFCapture()
+            # Generate PDF
+            output_path = "streamlit_page.pdf"
+            pdf_capture.capture_page(url, output_path)
+            # Provide download link
+            with open(output_path, "rb") as pdf_file:
+                pdfd = pdf_file.read()
+                st.download_button(
+                    label="⬇️ Download PDF",
+                    data=pdfd,
+                    file_name="streamlit_page.pdf",
+                    mime="application/pdf"
+                )
+# Example usage in your Streamlit app:
+if __name__ == "__main__":
+    st.title("Streamlit PDF Capture Demo")
+    # Add the PDF download button
+    add_pdf_download_button()
+    # Your regular Streamlit content here
+    st.write("This is a demo of PDF capture functionality")
+    # For multiple pages
+    if st.checkbox("Capture multiple pages"):
+        urls = [
+            "http://localhost:8501/page1",
+            "http://localhost:8501/page2"
+        ]
+        if st.button("Generate PDFs for all pages"):
+            pdf_capture = StreamlitPDFCapture()
+            pdfs = pdf_capture.capture_multiple_pages(urls, "output_pdfs")
+            st.success(f"Generated {len(pdfs)} PDFs in output_pdfs directory")