Spaces:
Sleeping
Sleeping
import streamlit as st | |
import base64 | |
from reportlab.lib.pagesizes import A4 | |
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle | |
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle | |
from reportlab.lib import colors | |
import io | |
import re | |
import fitz # PyMuPDF | |
from PIL import Image | |
# Set page config for wide layout and collapsed sidebar | |
st.set_page_config(layout="wide", initial_sidebar_state="collapsed") | |
# Initial markdown content | |
default_markdown = """# Cutting-Edge ML Outline | |
## Core ML Techniques | |
1. π **Mixture of Experts (MoE)** | |
- Conditional computation techniques | |
- Sparse gating mechanisms | |
- Training specialized sub-models | |
2. π₯ **Supervised Fine-Tuning (SFT) using PyTorch** | |
- Loss function customization | |
- Gradient accumulation strategies | |
- Learning rate schedulers | |
3. π€ **Large Language Models (LLM) using Transformers** | |
- Attention mechanisms | |
- Tokenization strategies | |
- Position encodings | |
## Training Methods | |
4. π **Self-Rewarding Learning using NPS 0-10 and Verbatims** | |
- Custom reward functions | |
- Feedback categorization | |
- Signal extraction from text | |
5. π **Reinforcement Learning from Human Feedback (RLHF)** | |
- Preference datasets | |
- PPO implementation | |
- KL divergence constraints | |
6. π **MergeKit: Merging Models to Same Embedding Space** | |
- TIES merging | |
- Task arithmetic | |
- SLERP interpolation | |
## Optimization & Deployment | |
7. π **DistillKit: Model Size Reduction with Spectrum Analysis** | |
- Knowledge distillation | |
- Quantization techniques | |
- Model pruning strategies | |
8. π§ **Agentic RAG Agents using Document Inputs** | |
- Vector database integration | |
- Query planning | |
- Self-reflection mechanisms | |
9. β³ **Longitudinal Data Summarization from Multiple Docs** | |
- Multi-document compression | |
- Timeline extraction | |
- Entity tracking | |
## Knowledge Representation | |
10. π **Knowledge Extraction using Markdown Knowledge Graphs** | |
- Entity recognition | |
- Relationship mapping | |
- Hierarchical structuring | |
11. πΊοΈ **Knowledge Mapping with Mermaid Diagrams** | |
- Flowchart generation | |
- Sequence diagram creation | |
- State diagrams | |
12. π» **ML Code Generation with Streamlit/Gradio/HTML5+JS** | |
- Code completion | |
- Unit test generation | |
- Documentation synthesis | |
""" | |
# Process multilevel markdown for PDF output | |
def markdown_to_pdf_content(markdown_text): | |
lines = markdown_text.strip().split('\n') | |
pdf_content = [] | |
in_list_item = False | |
current_item = None | |
sub_items = [] | |
for line in lines: | |
line = line.strip() | |
if not line: | |
continue | |
if line.startswith('# '): | |
pass | |
elif line.startswith('## '): | |
if current_item and sub_items: | |
pdf_content.append([current_item, sub_items]) | |
sub_items = [] | |
current_item = None | |
section = line.replace('## ', '').strip() | |
pdf_content.append(f"<b>{section}</b>") | |
in_list_item = False | |
elif re.match(r'^\d+\.', line): | |
if current_item and sub_items: | |
pdf_content.append([current_item, sub_items]) | |
sub_items = [] | |
current_item = line.strip() | |
in_list_item = True | |
elif line.startswith('- ') and in_list_item: | |
sub_items.append(line.strip()) | |
else: | |
if not in_list_item: | |
pdf_content.append(line.strip()) | |
if current_item and sub_items: | |
pdf_content.append([current_item, sub_items]) | |
mid_point = len(pdf_content) // 2 | |
left_column = pdf_content[:mid_point] | |
right_column = pdf_content[mid_point:] | |
return left_column, right_column | |
# Main PDF creation with parameterized text sizes | |
def create_main_pdf(markdown_text, base_font_size=10, auto_size=False): | |
buffer = io.BytesIO() | |
doc = SimpleDocTemplate( | |
buffer, | |
pagesize=(A4[1], A4[0]), | |
leftMargin=36, | |
rightMargin=36, | |
topMargin=36, | |
bottomMargin=36 | |
) | |
styles = getSampleStyleSheet() | |
story = [] | |
page_height = A4[0] - 72 | |
title_height = 20 | |
spacer_height = 10 | |
left_column, right_column = markdown_to_pdf_content(markdown_text) | |
total_items = 0 | |
for col in (left_column, right_column): | |
for item in col: | |
if isinstance(item, list): | |
main_item, sub_items = item | |
total_items += 1 + len(sub_items) | |
else: | |
total_items += 1 | |
# π§ Adjust this multiplier to control autosizing sensitivity | |
if auto_size: | |
base_font_size = max(6, min(12, 200 / total_items)) | |
# π§ Font size parameters - tweak these ratios as needed | |
item_font_size = base_font_size | |
subitem_font_size = base_font_size * 0.9 | |
section_font_size = base_font_size * 1.2 | |
title_font_size = min(16, base_font_size * 1.5) | |
title_style = styles['Heading1'] | |
title_style.textColor = colors.darkblue | |
title_style.alignment = 1 | |
title_style.fontSize = title_font_size | |
section_style = ParagraphStyle( | |
'SectionStyle', | |
parent=styles['Heading2'], | |
textColor=colors.darkblue, | |
fontSize=section_font_size, | |
leading=section_font_size * 1.2, | |
spaceAfter=2 | |
) | |
item_style = ParagraphStyle( | |
'ItemStyle', | |
parent=styles['Normal'], | |
fontSize=item_font_size, | |
leading=item_font_size * 1.2, | |
fontName='Helvetica-Bold', | |
spaceAfter=1 | |
) | |
subitem_style = ParagraphStyle( | |
'SubItemStyle', | |
parent=styles['Normal'], | |
fontSize=subitem_font_size, | |
leading=subitem_font_size * 1.2, | |
leftIndent=10, | |
spaceAfter=1 | |
) | |
story.append(Paragraph("Cutting-Edge ML Outline (ReportLab)", title_style)) | |
story.append(Spacer(1, spacer_height)) | |
left_cells = [] | |
for item in left_column: | |
if isinstance(item, str) and item.startswith('<b>'): | |
text = item.replace('<b>', '').replace('</b>', '') | |
left_cells.append(Paragraph(text, section_style)) | |
elif isinstance(item, list): | |
main_item, sub_items = item | |
left_cells.append(Paragraph(main_item, item_style)) | |
for sub_item in sub_items: | |
left_cells.append(Paragraph(sub_item, subitem_style)) | |
else: | |
left_cells.append(Paragraph(item, item_style)) | |
right_cells = [] | |
for item in right_column: | |
if isinstance(item, str) and item.startswith('<b>'): | |
text = item.replace('<b>', '').replace('</b>', '') | |
right_cells.append(Paragraph(text, section_style)) | |
elif isinstance(item, list): | |
main_item, sub_items = item | |
right_cells.append(Paragraph(main_item, item_style)) | |
for sub_item in sub_items: | |
right_cells.append(Paragraph(sub_item, subitem_style)) | |
else: | |
right_cells.append(Paragraph(item, item_style)) | |
max_cells = max(len(left_cells), len(right_cells)) | |
left_cells.extend([""] * (max_cells - len(left_cells))) | |
right_cells.extend([""] * (max_cells - len(right_cells))) | |
table_data = list(zip(left_cells, right_cells)) | |
col_width = (A4[1] - 72) / 2.0 | |
table = Table(table_data, colWidths=[col_width, col_width], hAlign='CENTER') | |
table.setStyle(TableStyle([ | |
('VALIGN', (0, 0), (-1, -1), 'TOP'), | |
('ALIGN', (0, 0), (-1, -1), 'LEFT'), | |
('BACKGROUND', (0, 0), (-1, -1), colors.white), | |
('GRID', (0, 0), (-1, -1), 0, colors.white), | |
('LINEAFTER', (0, 0), (0, -1), 0.5, colors.grey), | |
('LEFTPADDING', (0, 0), (-1, -1), 2), | |
('RIGHTPADDING', (0, 0), (-1, -1), 2), | |
('TOPPADDING', (0, 0), (-1, -1), 1), | |
('BOTTOMPADDING', (0, 0), (-1, -1), 1), | |
])) | |
story.append(table) | |
doc.build(story) | |
buffer.seek(0) | |
return buffer.getvalue() | |
# Function to convert PDF bytes to image using fitz | |
def pdf_to_image(pdf_bytes): | |
try: | |
doc = fitz.open(stream=pdf_bytes, filetype="pdf") | |
page = doc[0] | |
pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0)) # 2x zoom | |
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) | |
doc.close() | |
return img | |
except Exception as e: | |
st.error(f"Failed to render PDF preview: {e}") | |
return None | |
# Sidebar for settings (collapsed by default) | |
with st.sidebar: | |
auto_size = st.checkbox("Auto-size text", value=True) | |
if not auto_size: | |
base_font_size = st.slider("Base Font Size (points)", min_value=6, max_value=16, value=10, step=1) | |
else: | |
base_font_size = 10 | |
st.info("Font size will auto-adjust between 6-12 points based on content length.") | |
# Use session state to persist markdown content | |
if 'markdown_content' not in st.session_state: | |
st.session_state.markdown_content = default_markdown | |
# Generate PDF | |
with st.spinner("Generating PDF..."): | |
pdf_bytes = create_main_pdf(st.session_state.markdown_content, base_font_size, auto_size) | |
# Display PDF preview in a full-width container | |
with st.container(): | |
pdf_image = pdf_to_image(pdf_bytes) | |
if pdf_image: | |
st.image(pdf_image, use_container_width=True) | |
else: | |
st.info("Download the PDF to view it locally.") | |
# Download button | |
st.download_button( | |
label="Download PDF", | |
data=pdf_bytes, | |
file_name="ml_outline.pdf", | |
mime="application/pdf" | |
) | |
# Markdown editor | |
edited_markdown = st.text_area( | |
"Modify the markdown content below:", | |
value=st.session_state.markdown_content, | |
height=300 | |
) | |
# Update markdown and regenerate PDF on change | |
if st.button("Update PDF"): | |
st.session_state.markdown_content = edited_markdown | |
st.rerun() | |
# Save markdown option | |
st.download_button( | |
label="Save Markdown", | |
data=st.session_state.markdown_content, | |
file_name="ml_outline.md", | |
mime="text/markdown" | |
) |