Spaces:

awacke1
/

Pillow-PyMuPDF-ReportLab

Sleeping

App Files Files Community

Pillow-PyMuPDF-ReportLab / app.py

awacke1

Update app.py

cd13cab verified about 1 month ago

raw

history blame

22.9 kB

	# Streamlit UI
	st.title("🚀 Cutting-Edge ML Outline Generator")

	col1, col2 = st.columns(2)

	with col1:
	st.header("📝 Markdown Outline")

	# Display the markdown content
	st.markdown(ml_markdown)

	# Create a download button for the markdown file
	st.download_button(
	label="Download Markdown",
	data=ml_markdown,
	file_name="ml_outline.md",
	mime="text/markdown"
	)

	# Show the markdown source code in an expandable section
	with st.expander("View Markdown Source"):
	st.code(ml_markdown, language="markdown")

	with col2:
	st.header("📑 PDF Preview & Demos")

	# Library Demos
	st.subheader("Library Demos")
	if st.button("Run PDF Library Demos"):
	with st.spinner("Running demos..."):
	# Create tabs for each demo
	demo_tabs = st.tabs(["PikePDF", "FPDF", "PyMuPDF", "Image Demo"])

	with demo_tabs[0]:
	# pikepdf demo
	pike_pdf = demo_pikepdf()
	st.download_button("Download pikepdf Demo", pike_pdf, "pikepdf_demo.pdf")
	st.write("PikePDF demo created successfully!")
	st.info("This PDF contains the multilevel markdown outline in a two-column layout.")

	with demo_tabs[1]:
	# fpdf demo
	fpdf_pdf = demo_fpdf()
	st.download_button("Download fpdf Demo", fpdf_pdf, "fpdf_demo.pdf")
	st.write("FPDF demo created successfully!")
	st.info("This PDF contains the multilevel markdown outline in a two-column layout.")

	with demo_tabs[2]:
	# pymupdf demo
	pymupdf_pdf = demo_pymupdf()
	st.download_button("Download pymupdf Demo", pymupdf_pdf, "pymupdf_demo.pdf")
	st.write("PyMuPDF demo created successfully!")
	st.info("This PDF contains the multilevel markdown outline in a two-column layout.")

	with demo_tabs[3]:
	# Image demo
	img_data = demo_image_capture()
	st.image(img_data, caption="Demo Image (Camera simulation)")

	# Main PDF Generation
	st.subheader("Main Outline PDF")
	if st.button("Generate Main PDF"):
	with st.spinner("Generating PDF..."):
	try:
	pdf_bytes = create_main_pdf(ml_markdown)

	st.download_button(
	label="Download Main PDF",
	data=pdf_bytes,
	file_name="ml_outline.pdf",
	mime="application/pdf"
	)

	# Display the PDF in the app
	base64_pdf = base64.b64encode(pdf_bytes).decode('utf-8')
	pdf_display = f'''
	<embed
	src="data:application/pdf;base64,{base64_pdf}"
	width="100%"
	height="400px"
	type="application/pdf">
	'''
	st.markdown(pdf_display, unsafe_allow_html=True)

	st.success("PDF generated successfully! The PDF displays the multilevel markdown outline in a two-column layout.")
	except Exception as e:
	st.error(f"Error generating PDF: {str(e)}")

	# Show the PDF rendering code in an expandable section
	with st.expander("View PDF Rendering Code"):
	st.code("""
	# Process multilevel markdown for PDF output
	def markdown_to_pdf_content(markdown_text):
	# Convert markdown headers to styled text for PDF
	lines = markdown_text.strip().split('\\n')
	pdf_content = []

	for line in lines:
	if line.startswith('# '):
	# Main header - will be handled separately
	pass
	elif line.startswith('## '):
	# Section header - add as a bold item
	section = line.replace('## ', '').strip()
	pdf_content.append(f"<b>{section}</b>")
	elif line.startswith('- '):
	# List item - add as a normal item
	item = line.replace('- ', '').strip()
	pdf_content.append(item)

	# Split the content for two columns
	mid_point = len(pdf_content) // 2
	left_column = pdf_content[:mid_point]
	right_column = pdf_content[mid_point:]

	return left_column, right_column
	""", language="python")

	# Add custom CSS for better appearance
	st.markdown("""
	<style>
	.stButton>button {
	background-color: #4CAF50;
	color: white;
	font-weight: bold;
	}
	.stTabsimport streamlit as st
	import base64
	from reportlab.lib.pagesizes import A4
	from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
	from reportlab.lib.styles import getSampleStyleSheet
	from reportlab.lib import colors
	import pikepdf
	import fpdf
	import fitz # pymupdf
	import cv2
	import numpy as np
	from PIL import Image
	import io
	import os

	# Define the ML outline as a markdown string for multilevel content
	ml_markdown = """# Cutting-Edge ML Outline

	## Core ML Techniques
	- 🌟 1. Mixture of Experts (MoE)
	- Conditional computation techniques
	- Sparse gating mechanisms
	- Training specialized sub-models

	- 🔥 2. Supervised Fine-Tuning (SFT) using PyTorch
	- Loss function customization
	- Gradient accumulation strategies
	- Learning rate schedulers

	- 🤖 3. Large Language Models (LLM) using Transformers
	- Attention mechanisms
	- Tokenization strategies
	- Position encodings

	## Training Methods
	- 📊 4. Self-Rewarding Learning using NPS 0-10 and Verbatims
	- Custom reward functions
	- Feedback categorization
	- Signal extraction from text

	- 👍 5. Reinforcement Learning from Human Feedback (RLHF)
	- Preference datasets
	- PPO implementation
	- KL divergence constraints

	- 🔗 6. MergeKit: Merging Models to Same Embedding Space
	- TIES merging
	- Task arithmetic
	- SLERP interpolation

	## Optimization & Deployment
	- 📏 7. DistillKit: Model Size Reduction with Spectrum Analysis
	- Knowledge distillation
	- Quantization techniques
	- Model pruning strategies

	- 🧠 8. Agentic RAG Agents using Document Inputs
	- Vector database integration
	- Query planning
	- Self-reflection mechanisms

	- ⏳ 9. Longitudinal Data Summarization from Multiple Docs
	- Multi-document compression
	- Timeline extraction
	- Entity tracking

	## Knowledge Representation
	- 📑 10. Knowledge Extraction using Markdown Knowledge Graphs
	- Entity recognition
	- Relationship mapping
	- Hierarchical structuring

	- 🗺️ 11. Knowledge Mapping with Mermaid Diagrams
	- Flowchart generation
	- Sequence diagram creation
	- State diagrams

	- 💻 12. ML Code Generation with Streamlit/Gradio/HTML5+JS
	- Code completion
	- Unit test generation
	- Documentation synthesis
	"""

	# For compatibility with previous code, also maintain the list version
	ml_outline = [
	"🌟 1. Mixture of Experts (MoE)",
	"🔥 2. Supervised Fine-Tuning (SFT) using PyTorch",
	"🤖 3. Large Language Models (LLM) using Transformers",
	"📊 4. Self-Rewarding Learning using NPS 0-10 and Verbatims",
	"👍 5. Reinforcement Learning from Human Feedback (RLHF)",
	"🔗 6. MergeKit: Merging Models to Same Embedding Space",
	"📏 7. DistillKit: Model Size Reduction with Spectrum Analysis",
	"🧠 8. Agentic RAG Agents using Document Inputs",
	"⏳ 9. Longitudinal Data Summarization from Multiple Docs",
	"📑 10. Knowledge Extraction using Markdown Knowledge Graphs",
	"🗺️ 11. Knowledge Mapping with Mermaid Diagrams",
	"💻 12. ML Code Generation with Streamlit/Gradio/HTML5+JS"
	]

	# Process multilevel markdown for PDF output
	def markdown_to_pdf_content(markdown_text):
	"""Convert markdown text to a format suitable for PDF generation"""
	import re

	# Convert markdown headers to styled text for PDF
	lines = markdown_text.strip().split('\n')
	pdf_content = []

	for line in lines:
	if line.startswith('# '):
	# Main header - will be handled separately in the PDF generation
	pass
	elif line.startswith('## '):
	# Section header - add as a bold item
	section = line.replace('## ', '').strip()
	pdf_content.append(f"<b>{section}</b>")
	elif line.startswith('- '):
	# List item - add as a normal item
	item = line.replace('- ', '').strip()
	pdf_content.append(item)
	elif line.strip() == '':
	# Add a small spacer for empty lines
	pass

	# Remove empty items
	pdf_content = [item for item in pdf_content if item.strip()]

	# Split the content for two columns
	mid_point = len(pdf_content) // 2
	left_column = pdf_content[:mid_point]
	right_column = pdf_content[mid_point:]

	return left_column, right_column

	# Demo functions for PDF libraries
	def demo_pikepdf():
	"""Create a two-column PDF with the markdown outline using pikepdf"""
	# Process markdown content
	left_column, right_column = markdown_to_pdf_content(ml_markdown)

	# We'll use pymupdf (fitz) to create the content, then save with pikepdf
	doc = fitz.open()
	page = doc.new_page(width=842, height=595) # A4 Landscape

	# Set up fonts and colors
	title_font = "helv-b"
	section_font = "helv-b"
	normal_font = "helv"
	blue_color = (0, 0, 0.8)
	black_color = (0, 0, 0)

	# Add title
	page.insert_text((50, 40), "Cutting-Edge ML Outline (PikePDF Demo)", fontname=title_font, fontsize=16, color=blue_color)

	# First column
	x1, y1 = 50, 80
	for i, item in enumerate(left_column):
	if item.startswith('<b>'):
	# Section header
	text = item.replace('<b>', '').replace('</b>', '')
	page.insert_text((x1, y1 + i*25), text, fontname=section_font, fontsize=14, color=blue_color)
	else:
	# Normal item
	page.insert_text((x1, y1 + i*25), item, fontname=normal_font, fontsize=11, color=black_color)

	# Second column
	x2, y2 = 450, 80
	for i, item in enumerate(right_column):
	if item.startswith('<b>'):
	# Section header
	text = item.replace('<b>', '').replace('</b>', '')
	page.insert_text((x2, y2 + i*25), text, fontname=section_font, fontsize=14, color=blue_color)
	else:
	# Normal item
	page.insert_text((x2, y2 + i*25), item, fontname=normal_font, fontsize=11, color=black_color)

	# Draw a dividing line
	page.draw_line((421, 70), (421, 550))

	# Convert to pikepdf
	temp_buffer = io.BytesIO()
	doc.save(temp_buffer)
	temp_buffer.seek(0)

	pdf = pikepdf.Pdf.open(temp_buffer)

	# Save to buffer
	buffer = io.BytesIO()
	pdf.save(buffer)
	buffer.seek(0)
	return buffer.getvalue()

	def demo_fpdf():
	"""Create a two-column PDF with the markdown outline using FPDF"""
	# Process markdown content
	left_column, right_column = markdown_to_pdf_content(ml_markdown)

	pdf = fpdf.FPDF(orientation='L') # Landscape
	pdf.add_page()

	# Set title
	pdf.set_font("Arial", 'B', size=16)
	pdf.set_text_color(0, 0, 128) # Dark blue
	pdf.cell(0, 10, txt="Cutting-Edge ML Outline (FPDF Demo)", ln=True, align='C')
	pdf.ln(10)

	# First column
	x_col1 = 20
	y_start = pdf.get_y()

	for item in left_column:
	if item.startswith('<b>'):
	# Section header
	text = item.replace('<b>', '').replace('</b>', '')
	pdf.set_font("Arial", 'B', size=14)
	pdf.set_text_color(0, 0, 128) # Dark blue
	else:
	# Normal item
	text = item
	pdf.set_font("Arial", size=11)
	pdf.set_text_color(0, 0, 0) # Black

	pdf.set_x(x_col1)
	pdf.multi_cell(180, 10, txt=text, align='L')
	pdf.ln(2)

	# Second column
	x_col2 = pdf.w / 2 + 10
	pdf.set_y(y_start)

	for item in right_column:
	if item.startswith('<b>'):
	# Section header
	text = item.replace('<b>', '').replace('</b>', '')
	pdf.set_font("Arial", 'B', size=14)
	pdf.set_text_color(0, 0, 128) # Dark blue
	else:
	# Normal item
	text = item
	pdf.set_font("Arial", size=11)
	pdf.set_text_color(0, 0, 0) # Black

	pdf.set_x(x_col2)
	pdf.multi_cell(180, 10, txt=text, align='L')
	pdf.ln(2)

	# Draw a dividing line
	pdf.line(pdf.w/2, 30, pdf.w/2, 280)

	buffer = io.BytesIO()
	pdf.output(buffer)
	buffer.seek(0)
	return buffer.getvalue()

	def demo_pymupdf():
	"""Create a two-column PDF with the markdown outline using PyMuPDF"""
	# Process markdown content
	left_column, right_column = markdown_to_pdf_content(ml_markdown)

	doc = fitz.open()
	page = doc.new_page(width=842, height=595) # A4 Landscape

	# Set up fonts and colors
	title_font = "helv-b"
	section_font = "helv-b"
	normal_font = "helv"
	blue_color = (0, 0, 0.8)
	black_color = (0, 0, 0)

	# Add title
	page.insert_text((300, 40), "Cutting-Edge ML Outline (PyMuPDF Demo)", fontname=title_font, fontsize=16, color=blue_color)

	# First column
	x1, y1 = 50, 80
	line_height = 25
	current_y = y1

	for item in left_column:
	if item.startswith('<b>'):
	# Add extra space before sections (except the first one)
	if current_y > y1:
	current_y += 10

	# Section header
	text = item.replace('<b>', '').replace('</b>', '')
	page.insert_text((x1, current_y), text, fontname=section_font, fontsize=14, color=blue_color)
	else:
	# Normal item
	page.insert_text((x1 + 10, current_y), item, fontname=normal_font, fontsize=11, color=black_color)

	current_y += line_height

	# Second column
	x2, y2 = 450, 80
	current_y = y2

	for item in right_column:
	if item.startswith('<b>'):
	# Add extra space before sections (except the first one)
	if current_y > y2:
	current_y += 10

	# Section header
	text = item.replace('<b>', '').replace('</b>', '')
	page.insert_text((x2, current_y), text, fontname=section_font, fontsize=14, color=blue_color)
	else:
	# Normal item
	page.insert_text((x2 + 10, current_y), item, fontname=normal_font, fontsize=11, color=black_color)

	current_y += line_height

	# Draw a dividing line
	page.draw_line((421, 70), (421, 550))

	buffer = io.BytesIO()
	doc.save(buffer)
	buffer.seek(0)
	return buffer.getvalue()

	# Demo function for image capture
	def demo_image_capture():
	"""Generate a demo image (fake capture) since we can't access the camera in this environment"""
	# Create a simple gradient image using numpy and PIL
	width, height = 640, 480

	# Create a gradient array
	x = np.linspace(0, 1, width)
	y = np.linspace(0, 1, height)
	xx, yy = np.meshgrid(x, y)
	gradient = (xx + yy) / 2

	# Convert to RGB image
	img_array = (gradient * 255).astype(np.uint8)
	rgb_array = np.stack([img_array, img_array//2, img_array*2], axis=2)

	# Create PIL Image
	img = Image.fromarray(rgb_array)

	# Add text to the image
	from PIL import ImageDraw, ImageFont
	draw = ImageDraw.Draw(img)
	try:
	font = ImageFont.truetype("arial.ttf", 30)
	except:
	font = ImageFont.load_default()

	draw.text((width//4, height//2), "OpenCV Demo Image", fill=(255, 255, 255), font=font)

	# Save to buffer
	buffer = io.BytesIO()
	img.save(buffer, format="JPEG")
	buffer.seek(0)
	return buffer.getvalue()

	# Main PDF creation using ReportLab
	def create_main_pdf(markdown_text):
	"""Create a single-page landscape PDF with the outline in two columns"""
	from reportlab.platypus import Table, TableStyle, Paragraph, Spacer
	from reportlab.lib import pagesizes
	from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle

	# Process markdown content
	left_column, right_column = markdown_to_pdf_content(markdown_text)

	buffer = io.BytesIO()
	doc = SimpleDocTemplate(
	buffer,
	pagesize=(A4[1], A4[0]), # Landscape
	leftMargin=50,
	rightMargin=50,
	topMargin=50,
	bottomMargin=50
	)

	styles = getSampleStyleSheet()
	story = []

	# Create custom styles
	title_style = styles['Heading1']
	title_style.textColor = colors.darkblue
	title_style.alignment = 1 # Center alignment

	section_style = ParagraphStyle(
	'SectionStyle',
	parent=styles['Heading2'],
	textColor=colors.darkblue,
	spaceAfter=6
	)

	item_style = ParagraphStyle(
	'ItemStyle',
	parent=styles['Normal'],
	fontSize=11,
	leading=14,
	leftIndent=10
	)

	# Add title
	story.append(Paragraph("Cutting-Edge ML Outline (ReportLab)", title_style))
	story.append(Spacer(1, 20))

	# Prepare data for table
	table_data = []
	max_rows = max(len(left_column), len(right_column))

	# Process left and right columns into paragraphs
	left_paragraphs = []
	for item in left_column:
	if item.startswith('<b>'):
	text = item.replace('<b>', '').replace('</b>', '')
	left_paragraphs.append(Paragraph(text, section_style))
	else:
	left_paragraphs.append(Paragraph(item, item_style))

	right_paragraphs = []
	for item in right_column:
	if item.startswith('<b>'):
	text = item.replace('<b>', '').replace('</b>', '')
	right_paragraphs.append(Paragraph(text, section_style))
	else:
	right_paragraphs.append(Paragraph(item, item_style))

	# Create one row per paragraph
	for i in range(max_rows):
	left_p = left_paragraphs[i] if i < len(left_paragraphs) else ""
	right_p = right_paragraphs[i] if i < len(right_paragraphs) else ""
	table_data.append([left_p, right_p])

	# Calculate column widths
	col_width = (A4[1] - 120) / 2.0 # Page width minus margins divided by 2

	# Create the table with the data
	table = Table(table_data, colWidths=[col_width, col_width])

	# Style the table
	table.setStyle(TableStyle([
	('VALIGN', (0, 0), (-1, -1), 'TOP'),
	('ALIGN', (0, 0), (0, -1), 'LEFT'),
	('ALIGN', (1, 0), (1, -1), 'LEFT'),
	('BACKGROUND', (0, 0), (-1, -1), colors.white),
	('GRID', (0, 0), (-1, -1), 0.5, colors.white),
	('LINEAFTER', (0, 0), (0, -1), 1, colors.grey),
	]))

	story.append(table)

	doc.build(story)
	buffer.seek(0)
	return buffer.getvalue()

	def get_binary_file_downloader_html(bin_data, file_label='File'):
	"""Create a download link for binary data"""
	bin_str = base64.b64encode(bin_data).decode()
	href = f'<a href="data:application/octet-stream;base64,{bin_str}" download="{file_label}">Download {file_label}</a>'
	return href

	# Streamlit UI
	st.title("🚀 Cutting-Edge ML Outline Generator")

	col1, col2 = st.columns(2)

	with col1:
	st.header("📝 Markdown Outline")
	outline_text = "\n".join(ml_outline)
	st.markdown(outline_text)

	# Create a download button for the markdown file
	st.download_button(
	label="Download Markdown",
	data=outline_text,
	file_name="ml_outline.md",
	mime="text/markdown"
	)

	with col2:
	st.header("📑 PDF Preview & Demos")

	# Library Demos
	st.subheader("Library Demos")
	if st.button("Run PDF Library Demos"):
	with st.spinner("Running demos..."):
	# Create tabs for each demo
	demo_tabs = st.tabs(["PikePDF", "FPDF", "PyMuPDF", "Image Demo"])

	with demo_tabs[0]:
	# pikepdf demo
	pike_pdf = demo_pikepdf()
	st.download_button("Download pikepdf Demo", pike_pdf, "pikepdf_demo.pdf")
	st.write("PikePDF demo created successfully!")

	with demo_tabs[1]:
	# fpdf demo
	fpdf_pdf = demo_fpdf()
	st.download_button("Download fpdf Demo", fpdf_pdf, "fpdf_demo.pdf")
	st.write("FPDF demo created successfully!")

	with demo_tabs[2]:
	# pymupdf demo
	pymupdf_pdf = demo_pymupdf()
	st.download_button("Download pymupdf Demo", pymupdf_pdf, "pymupdf_demo.pdf")
	st.write("PyMuPDF demo created successfully!")

	with demo_tabs[3]:
	# Image demo
	img_data = demo_image_capture()
	st.image(img_data, caption="Demo Image (Camera simulation)")

	# Main PDF Generation
	st.subheader("Main Outline PDF")
	if st.button("Generate Main PDF"):
	with st.spinner("Generating PDF..."):
	try:
	pdf_bytes = create_main_pdf(ml_outline)

	st.download_button(
	label="Download Main PDF",
	data=pdf_bytes,
	file_name="ml_outline.pdf",
	mime="application/pdf"
	)

	# Display the PDF in the app
	base64_pdf = base64.b64encode(pdf_bytes).decode('utf-8')
	pdf_display = f'''
	<embed
	src="data:application/pdf;base64,{base64_pdf}"
	width="100%"
	height="400px"
	type="application/pdf">
	'''
	st.markdown(pdf_display, unsafe_allow_html=True)

	st.success("PDF generated successfully! You can view it above and download it using the button.")
	except Exception as e:
	st.error(f"Error generating PDF: {str(e)}")

	# Add custom CSS for better appearance
	st.markdown("""
	<style>
	.stButton>button {
	background-color: #4CAF50;
	color: white;
	font-weight: bold;
	}
	.stTabs [data-baseweb="tab-list"] {
	gap: 2px;
	}
	.stTabs [data-baseweb="tab"] {
	height: 50px;
	white-space: pre-wrap;
	background-color: #f0f2f6;
	border-radius: 4px 4px 0px 0px;
	gap: 1px;
	padding-top: 10px;
	padding-bottom: 10px;
	}
	</style>
	""", unsafe_allow_html=True)