awacke1's picture
Update app.py
cd13cab verified
raw
history blame
22.9 kB
# Streamlit UI
st.title("πŸš€ Cutting-Edge ML Outline Generator")
col1, col2 = st.columns(2)
with col1:
st.header("πŸ“ Markdown Outline")
# Display the markdown content
st.markdown(ml_markdown)
# Create a download button for the markdown file
st.download_button(
label="Download Markdown",
data=ml_markdown,
file_name="ml_outline.md",
mime="text/markdown"
)
# Show the markdown source code in an expandable section
with st.expander("View Markdown Source"):
st.code(ml_markdown, language="markdown")
with col2:
st.header("πŸ“‘ PDF Preview & Demos")
# Library Demos
st.subheader("Library Demos")
if st.button("Run PDF Library Demos"):
with st.spinner("Running demos..."):
# Create tabs for each demo
demo_tabs = st.tabs(["PikePDF", "FPDF", "PyMuPDF", "Image Demo"])
with demo_tabs[0]:
# pikepdf demo
pike_pdf = demo_pikepdf()
st.download_button("Download pikepdf Demo", pike_pdf, "pikepdf_demo.pdf")
st.write("PikePDF demo created successfully!")
st.info("This PDF contains the multilevel markdown outline in a two-column layout.")
with demo_tabs[1]:
# fpdf demo
fpdf_pdf = demo_fpdf()
st.download_button("Download fpdf Demo", fpdf_pdf, "fpdf_demo.pdf")
st.write("FPDF demo created successfully!")
st.info("This PDF contains the multilevel markdown outline in a two-column layout.")
with demo_tabs[2]:
# pymupdf demo
pymupdf_pdf = demo_pymupdf()
st.download_button("Download pymupdf Demo", pymupdf_pdf, "pymupdf_demo.pdf")
st.write("PyMuPDF demo created successfully!")
st.info("This PDF contains the multilevel markdown outline in a two-column layout.")
with demo_tabs[3]:
# Image demo
img_data = demo_image_capture()
st.image(img_data, caption="Demo Image (Camera simulation)")
# Main PDF Generation
st.subheader("Main Outline PDF")
if st.button("Generate Main PDF"):
with st.spinner("Generating PDF..."):
try:
pdf_bytes = create_main_pdf(ml_markdown)
st.download_button(
label="Download Main PDF",
data=pdf_bytes,
file_name="ml_outline.pdf",
mime="application/pdf"
)
# Display the PDF in the app
base64_pdf = base64.b64encode(pdf_bytes).decode('utf-8')
pdf_display = f'''
<embed
src="data:application/pdf;base64,{base64_pdf}"
width="100%"
height="400px"
type="application/pdf">
'''
st.markdown(pdf_display, unsafe_allow_html=True)
st.success("PDF generated successfully! The PDF displays the multilevel markdown outline in a two-column layout.")
except Exception as e:
st.error(f"Error generating PDF: {str(e)}")
# Show the PDF rendering code in an expandable section
with st.expander("View PDF Rendering Code"):
st.code("""
# Process multilevel markdown for PDF output
def markdown_to_pdf_content(markdown_text):
# Convert markdown headers to styled text for PDF
lines = markdown_text.strip().split('\\n')
pdf_content = []
for line in lines:
if line.startswith('# '):
# Main header - will be handled separately
pass
elif line.startswith('## '):
# Section header - add as a bold item
section = line.replace('## ', '').strip()
pdf_content.append(f"<b>{section}</b>")
elif line.startswith('- '):
# List item - add as a normal item
item = line.replace('- ', '').strip()
pdf_content.append(item)
# Split the content for two columns
mid_point = len(pdf_content) // 2
left_column = pdf_content[:mid_point]
right_column = pdf_content[mid_point:]
return left_column, right_column
""", language="python")
# Add custom CSS for better appearance
st.markdown("""
<style>
.stButton>button {
background-color: #4CAF50;
color: white;
font-weight: bold;
}
.stTabsimport streamlit as st
import base64
from reportlab.lib.pagesizes import A4
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.lib import colors
import pikepdf
import fpdf
import fitz # pymupdf
import cv2
import numpy as np
from PIL import Image
import io
import os
# Define the ML outline as a markdown string for multilevel content
ml_markdown = """# Cutting-Edge ML Outline
## Core ML Techniques
- 🌟 **1. Mixture of Experts (MoE)**
- Conditional computation techniques
- Sparse gating mechanisms
- Training specialized sub-models
- πŸ”₯ **2. Supervised Fine-Tuning (SFT) using PyTorch**
- Loss function customization
- Gradient accumulation strategies
- Learning rate schedulers
- πŸ€– **3. Large Language Models (LLM) using Transformers**
- Attention mechanisms
- Tokenization strategies
- Position encodings
## Training Methods
- πŸ“Š **4. Self-Rewarding Learning using NPS 0-10 and Verbatims**
- Custom reward functions
- Feedback categorization
- Signal extraction from text
- πŸ‘ **5. Reinforcement Learning from Human Feedback (RLHF)**
- Preference datasets
- PPO implementation
- KL divergence constraints
- πŸ”— **6. MergeKit: Merging Models to Same Embedding Space**
- TIES merging
- Task arithmetic
- SLERP interpolation
## Optimization & Deployment
- πŸ“ **7. DistillKit: Model Size Reduction with Spectrum Analysis**
- Knowledge distillation
- Quantization techniques
- Model pruning strategies
- 🧠 **8. Agentic RAG Agents using Document Inputs**
- Vector database integration
- Query planning
- Self-reflection mechanisms
- ⏳ **9. Longitudinal Data Summarization from Multiple Docs**
- Multi-document compression
- Timeline extraction
- Entity tracking
## Knowledge Representation
- πŸ“‘ **10. Knowledge Extraction using Markdown Knowledge Graphs**
- Entity recognition
- Relationship mapping
- Hierarchical structuring
- πŸ—ΊοΈ **11. Knowledge Mapping with Mermaid Diagrams**
- Flowchart generation
- Sequence diagram creation
- State diagrams
- πŸ’» **12. ML Code Generation with Streamlit/Gradio/HTML5+JS**
- Code completion
- Unit test generation
- Documentation synthesis
"""
# For compatibility with previous code, also maintain the list version
ml_outline = [
"🌟 1. Mixture of Experts (MoE)",
"πŸ”₯ 2. Supervised Fine-Tuning (SFT) using PyTorch",
"πŸ€– 3. Large Language Models (LLM) using Transformers",
"πŸ“Š 4. Self-Rewarding Learning using NPS 0-10 and Verbatims",
"πŸ‘ 5. Reinforcement Learning from Human Feedback (RLHF)",
"πŸ”— 6. MergeKit: Merging Models to Same Embedding Space",
"πŸ“ 7. DistillKit: Model Size Reduction with Spectrum Analysis",
"🧠 8. Agentic RAG Agents using Document Inputs",
"⏳ 9. Longitudinal Data Summarization from Multiple Docs",
"πŸ“‘ 10. Knowledge Extraction using Markdown Knowledge Graphs",
"πŸ—ΊοΈ 11. Knowledge Mapping with Mermaid Diagrams",
"πŸ’» 12. ML Code Generation with Streamlit/Gradio/HTML5+JS"
]
# Process multilevel markdown for PDF output
def markdown_to_pdf_content(markdown_text):
"""Convert markdown text to a format suitable for PDF generation"""
import re
# Convert markdown headers to styled text for PDF
lines = markdown_text.strip().split('\n')
pdf_content = []
for line in lines:
if line.startswith('# '):
# Main header - will be handled separately in the PDF generation
pass
elif line.startswith('## '):
# Section header - add as a bold item
section = line.replace('## ', '').strip()
pdf_content.append(f"<b>{section}</b>")
elif line.startswith('- '):
# List item - add as a normal item
item = line.replace('- ', '').strip()
pdf_content.append(item)
elif line.strip() == '':
# Add a small spacer for empty lines
pass
# Remove empty items
pdf_content = [item for item in pdf_content if item.strip()]
# Split the content for two columns
mid_point = len(pdf_content) // 2
left_column = pdf_content[:mid_point]
right_column = pdf_content[mid_point:]
return left_column, right_column
# Demo functions for PDF libraries
def demo_pikepdf():
"""Create a two-column PDF with the markdown outline using pikepdf"""
# Process markdown content
left_column, right_column = markdown_to_pdf_content(ml_markdown)
# We'll use pymupdf (fitz) to create the content, then save with pikepdf
doc = fitz.open()
page = doc.new_page(width=842, height=595) # A4 Landscape
# Set up fonts and colors
title_font = "helv-b"
section_font = "helv-b"
normal_font = "helv"
blue_color = (0, 0, 0.8)
black_color = (0, 0, 0)
# Add title
page.insert_text((50, 40), "Cutting-Edge ML Outline (PikePDF Demo)", fontname=title_font, fontsize=16, color=blue_color)
# First column
x1, y1 = 50, 80
for i, item in enumerate(left_column):
if item.startswith('<b>'):
# Section header
text = item.replace('<b>', '').replace('</b>', '')
page.insert_text((x1, y1 + i*25), text, fontname=section_font, fontsize=14, color=blue_color)
else:
# Normal item
page.insert_text((x1, y1 + i*25), item, fontname=normal_font, fontsize=11, color=black_color)
# Second column
x2, y2 = 450, 80
for i, item in enumerate(right_column):
if item.startswith('<b>'):
# Section header
text = item.replace('<b>', '').replace('</b>', '')
page.insert_text((x2, y2 + i*25), text, fontname=section_font, fontsize=14, color=blue_color)
else:
# Normal item
page.insert_text((x2, y2 + i*25), item, fontname=normal_font, fontsize=11, color=black_color)
# Draw a dividing line
page.draw_line((421, 70), (421, 550))
# Convert to pikepdf
temp_buffer = io.BytesIO()
doc.save(temp_buffer)
temp_buffer.seek(0)
pdf = pikepdf.Pdf.open(temp_buffer)
# Save to buffer
buffer = io.BytesIO()
pdf.save(buffer)
buffer.seek(0)
return buffer.getvalue()
def demo_fpdf():
"""Create a two-column PDF with the markdown outline using FPDF"""
# Process markdown content
left_column, right_column = markdown_to_pdf_content(ml_markdown)
pdf = fpdf.FPDF(orientation='L') # Landscape
pdf.add_page()
# Set title
pdf.set_font("Arial", 'B', size=16)
pdf.set_text_color(0, 0, 128) # Dark blue
pdf.cell(0, 10, txt="Cutting-Edge ML Outline (FPDF Demo)", ln=True, align='C')
pdf.ln(10)
# First column
x_col1 = 20
y_start = pdf.get_y()
for item in left_column:
if item.startswith('<b>'):
# Section header
text = item.replace('<b>', '').replace('</b>', '')
pdf.set_font("Arial", 'B', size=14)
pdf.set_text_color(0, 0, 128) # Dark blue
else:
# Normal item
text = item
pdf.set_font("Arial", size=11)
pdf.set_text_color(0, 0, 0) # Black
pdf.set_x(x_col1)
pdf.multi_cell(180, 10, txt=text, align='L')
pdf.ln(2)
# Second column
x_col2 = pdf.w / 2 + 10
pdf.set_y(y_start)
for item in right_column:
if item.startswith('<b>'):
# Section header
text = item.replace('<b>', '').replace('</b>', '')
pdf.set_font("Arial", 'B', size=14)
pdf.set_text_color(0, 0, 128) # Dark blue
else:
# Normal item
text = item
pdf.set_font("Arial", size=11)
pdf.set_text_color(0, 0, 0) # Black
pdf.set_x(x_col2)
pdf.multi_cell(180, 10, txt=text, align='L')
pdf.ln(2)
# Draw a dividing line
pdf.line(pdf.w/2, 30, pdf.w/2, 280)
buffer = io.BytesIO()
pdf.output(buffer)
buffer.seek(0)
return buffer.getvalue()
def demo_pymupdf():
"""Create a two-column PDF with the markdown outline using PyMuPDF"""
# Process markdown content
left_column, right_column = markdown_to_pdf_content(ml_markdown)
doc = fitz.open()
page = doc.new_page(width=842, height=595) # A4 Landscape
# Set up fonts and colors
title_font = "helv-b"
section_font = "helv-b"
normal_font = "helv"
blue_color = (0, 0, 0.8)
black_color = (0, 0, 0)
# Add title
page.insert_text((300, 40), "Cutting-Edge ML Outline (PyMuPDF Demo)", fontname=title_font, fontsize=16, color=blue_color)
# First column
x1, y1 = 50, 80
line_height = 25
current_y = y1
for item in left_column:
if item.startswith('<b>'):
# Add extra space before sections (except the first one)
if current_y > y1:
current_y += 10
# Section header
text = item.replace('<b>', '').replace('</b>', '')
page.insert_text((x1, current_y), text, fontname=section_font, fontsize=14, color=blue_color)
else:
# Normal item
page.insert_text((x1 + 10, current_y), item, fontname=normal_font, fontsize=11, color=black_color)
current_y += line_height
# Second column
x2, y2 = 450, 80
current_y = y2
for item in right_column:
if item.startswith('<b>'):
# Add extra space before sections (except the first one)
if current_y > y2:
current_y += 10
# Section header
text = item.replace('<b>', '').replace('</b>', '')
page.insert_text((x2, current_y), text, fontname=section_font, fontsize=14, color=blue_color)
else:
# Normal item
page.insert_text((x2 + 10, current_y), item, fontname=normal_font, fontsize=11, color=black_color)
current_y += line_height
# Draw a dividing line
page.draw_line((421, 70), (421, 550))
buffer = io.BytesIO()
doc.save(buffer)
buffer.seek(0)
return buffer.getvalue()
# Demo function for image capture
def demo_image_capture():
"""Generate a demo image (fake capture) since we can't access the camera in this environment"""
# Create a simple gradient image using numpy and PIL
width, height = 640, 480
# Create a gradient array
x = np.linspace(0, 1, width)
y = np.linspace(0, 1, height)
xx, yy = np.meshgrid(x, y)
gradient = (xx + yy) / 2
# Convert to RGB image
img_array = (gradient * 255).astype(np.uint8)
rgb_array = np.stack([img_array, img_array//2, img_array*2], axis=2)
# Create PIL Image
img = Image.fromarray(rgb_array)
# Add text to the image
from PIL import ImageDraw, ImageFont
draw = ImageDraw.Draw(img)
try:
font = ImageFont.truetype("arial.ttf", 30)
except:
font = ImageFont.load_default()
draw.text((width//4, height//2), "OpenCV Demo Image", fill=(255, 255, 255), font=font)
# Save to buffer
buffer = io.BytesIO()
img.save(buffer, format="JPEG")
buffer.seek(0)
return buffer.getvalue()
# Main PDF creation using ReportLab
def create_main_pdf(markdown_text):
"""Create a single-page landscape PDF with the outline in two columns"""
from reportlab.platypus import Table, TableStyle, Paragraph, Spacer
from reportlab.lib import pagesizes
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
# Process markdown content
left_column, right_column = markdown_to_pdf_content(markdown_text)
buffer = io.BytesIO()
doc = SimpleDocTemplate(
buffer,
pagesize=(A4[1], A4[0]), # Landscape
leftMargin=50,
rightMargin=50,
topMargin=50,
bottomMargin=50
)
styles = getSampleStyleSheet()
story = []
# Create custom styles
title_style = styles['Heading1']
title_style.textColor = colors.darkblue
title_style.alignment = 1 # Center alignment
section_style = ParagraphStyle(
'SectionStyle',
parent=styles['Heading2'],
textColor=colors.darkblue,
spaceAfter=6
)
item_style = ParagraphStyle(
'ItemStyle',
parent=styles['Normal'],
fontSize=11,
leading=14,
leftIndent=10
)
# Add title
story.append(Paragraph("Cutting-Edge ML Outline (ReportLab)", title_style))
story.append(Spacer(1, 20))
# Prepare data for table
table_data = []
max_rows = max(len(left_column), len(right_column))
# Process left and right columns into paragraphs
left_paragraphs = []
for item in left_column:
if item.startswith('<b>'):
text = item.replace('<b>', '').replace('</b>', '')
left_paragraphs.append(Paragraph(text, section_style))
else:
left_paragraphs.append(Paragraph(item, item_style))
right_paragraphs = []
for item in right_column:
if item.startswith('<b>'):
text = item.replace('<b>', '').replace('</b>', '')
right_paragraphs.append(Paragraph(text, section_style))
else:
right_paragraphs.append(Paragraph(item, item_style))
# Create one row per paragraph
for i in range(max_rows):
left_p = left_paragraphs[i] if i < len(left_paragraphs) else ""
right_p = right_paragraphs[i] if i < len(right_paragraphs) else ""
table_data.append([left_p, right_p])
# Calculate column widths
col_width = (A4[1] - 120) / 2.0 # Page width minus margins divided by 2
# Create the table with the data
table = Table(table_data, colWidths=[col_width, col_width])
# Style the table
table.setStyle(TableStyle([
('VALIGN', (0, 0), (-1, -1), 'TOP'),
('ALIGN', (0, 0), (0, -1), 'LEFT'),
('ALIGN', (1, 0), (1, -1), 'LEFT'),
('BACKGROUND', (0, 0), (-1, -1), colors.white),
('GRID', (0, 0), (-1, -1), 0.5, colors.white),
('LINEAFTER', (0, 0), (0, -1), 1, colors.grey),
]))
story.append(table)
doc.build(story)
buffer.seek(0)
return buffer.getvalue()
def get_binary_file_downloader_html(bin_data, file_label='File'):
"""Create a download link for binary data"""
bin_str = base64.b64encode(bin_data).decode()
href = f'<a href="data:application/octet-stream;base64,{bin_str}" download="{file_label}">Download {file_label}</a>'
return href
# Streamlit UI
st.title("πŸš€ Cutting-Edge ML Outline Generator")
col1, col2 = st.columns(2)
with col1:
st.header("πŸ“ Markdown Outline")
outline_text = "\n".join(ml_outline)
st.markdown(outline_text)
# Create a download button for the markdown file
st.download_button(
label="Download Markdown",
data=outline_text,
file_name="ml_outline.md",
mime="text/markdown"
)
with col2:
st.header("πŸ“‘ PDF Preview & Demos")
# Library Demos
st.subheader("Library Demos")
if st.button("Run PDF Library Demos"):
with st.spinner("Running demos..."):
# Create tabs for each demo
demo_tabs = st.tabs(["PikePDF", "FPDF", "PyMuPDF", "Image Demo"])
with demo_tabs[0]:
# pikepdf demo
pike_pdf = demo_pikepdf()
st.download_button("Download pikepdf Demo", pike_pdf, "pikepdf_demo.pdf")
st.write("PikePDF demo created successfully!")
with demo_tabs[1]:
# fpdf demo
fpdf_pdf = demo_fpdf()
st.download_button("Download fpdf Demo", fpdf_pdf, "fpdf_demo.pdf")
st.write("FPDF demo created successfully!")
with demo_tabs[2]:
# pymupdf demo
pymupdf_pdf = demo_pymupdf()
st.download_button("Download pymupdf Demo", pymupdf_pdf, "pymupdf_demo.pdf")
st.write("PyMuPDF demo created successfully!")
with demo_tabs[3]:
# Image demo
img_data = demo_image_capture()
st.image(img_data, caption="Demo Image (Camera simulation)")
# Main PDF Generation
st.subheader("Main Outline PDF")
if st.button("Generate Main PDF"):
with st.spinner("Generating PDF..."):
try:
pdf_bytes = create_main_pdf(ml_outline)
st.download_button(
label="Download Main PDF",
data=pdf_bytes,
file_name="ml_outline.pdf",
mime="application/pdf"
)
# Display the PDF in the app
base64_pdf = base64.b64encode(pdf_bytes).decode('utf-8')
pdf_display = f'''
<embed
src="data:application/pdf;base64,{base64_pdf}"
width="100%"
height="400px"
type="application/pdf">
'''
st.markdown(pdf_display, unsafe_allow_html=True)
st.success("PDF generated successfully! You can view it above and download it using the button.")
except Exception as e:
st.error(f"Error generating PDF: {str(e)}")
# Add custom CSS for better appearance
st.markdown("""
<style>
.stButton>button {
background-color: #4CAF50;
color: white;
font-weight: bold;
}
.stTabs [data-baseweb="tab-list"] {
gap: 2px;
}
.stTabs [data-baseweb="tab"] {
height: 50px;
white-space: pre-wrap;
background-color: #f0f2f6;
border-radius: 4px 4px 0px 0px;
gap: 1px;
padding-top: 10px;
padding-bottom: 10px;
}
</style>
""", unsafe_allow_html=True)