math-validator / compile_all_pdfs.py
igriv's picture
Update validator app
1ea9c72 verified
#!/usr/bin/env python
"""
Batch compile all LaTeX reconciliation documents to PDFs
Can be run after validation to generate all PDFs at once
"""
import os
import sys
import argparse
from pathlib import Path
from latex_compiler import compile_latex_batch, check_latex_available
import time
def find_tex_files(base_dir="validation_results"):
"""Find all .tex files in validation results"""
tex_files = []
base_path = Path(base_dir)
if not base_path.exists():
print(f"Directory not found: {base_dir}")
return tex_files
# Find all .tex files recursively
for tex_file in base_path.rglob("*.tex"):
# Skip auxiliary files
if not any(skip in tex_file.name for skip in ['.aux', '.log', '.out']):
tex_files.append(str(tex_file))
return tex_files
def compile_validation_pdfs(run_dir=None, max_workers=4):
"""
Compile all LaTeX files from a validation run
Args:
run_dir: Specific run directory, or None for latest
max_workers: Number of parallel workers
"""
if not check_latex_available():
print("Error: pdflatex not installed")
print("Install with:")
print(" Linux: apt-get install texlive-latex-base")
print(" Windows: Install MiKTeX")
print(" macOS: brew install --cask mactex")
return
# Find run directory
if run_dir:
base_dir = run_dir
else:
# Find latest run
base_path = Path("validation_results")
if not base_path.exists():
print("No validation_results directory found")
return
runs = [d for d in base_path.iterdir() if d.is_dir() and d.name.startswith("run_")]
if not runs:
print("No validation runs found")
return
# Get latest by timestamp
latest_run = max(runs, key=lambda x: x.stat().st_mtime)
base_dir = str(latest_run)
print(f"Using latest run: {latest_run.name}")
# Find LaTeX documents directory
latex_dir = Path(base_dir) / "latex_documents"
if not latex_dir.exists():
print(f"No latex_documents directory in {base_dir}")
return
# Find all .tex files
tex_files = list(latex_dir.glob("*.tex"))
if not tex_files:
print(f"No .tex files found in {latex_dir}")
return
print(f"Found {len(tex_files)} LaTeX files to compile")
# Check for already compiled PDFs
existing_pdfs = list(latex_dir.glob("*.pdf"))
if existing_pdfs:
print(f" ({len(existing_pdfs)} PDFs already exist)")
# Filter to only uncompiled
tex_names = {f.stem for f in tex_files}
pdf_names = {f.stem for f in existing_pdfs}
new_tex = [f for f in tex_files if f.stem not in pdf_names]
if new_tex:
print(f" Compiling {len(new_tex)} new PDFs...")
tex_files = new_tex
else:
print(" All PDFs already compiled")
recompile = input("Recompile all? (y/N): ").strip().lower()
if recompile != 'y':
return
# Compile in parallel
print(f"\nCompiling with {max_workers} parallel workers...")
start_time = time.time()
results = compile_latex_batch(
[str(f) for f in tex_files],
output_dir=str(latex_dir),
max_workers=max_workers,
timeout=30
)
# Summary
elapsed = time.time() - start_time
successful = sum(1 for r in results.values() if r[0])
failed = len(results) - successful
print(f"\n{'='*60}")
print(f"Compilation complete in {elapsed:.1f} seconds")
print(f" Successful: {successful}")
print(f" Failed: {failed}")
if failed > 0:
print("\nFailed files:")
for tex_file, (success, _, error) in results.items():
if not success:
print(f" - {Path(tex_file).name}: {error[:50]}...")
print(f"\nPDFs saved to: {latex_dir}")
def main():
parser = argparse.ArgumentParser(description='Compile LaTeX reconciliation documents to PDFs')
parser.add_argument('--run-dir', help='Specific run directory (default: latest)')
parser.add_argument('--workers', type=int, default=4, help='Number of parallel workers')
parser.add_argument('--all', action='store_true', help='Compile all runs, not just latest')
args = parser.parse_args()
if args.all:
# Compile all runs
base_path = Path("validation_results")
if base_path.exists():
runs = [d for d in base_path.iterdir() if d.is_dir() and d.name.startswith("run_")]
print(f"Found {len(runs)} validation runs")
for run in runs:
print(f"\n{'='*60}")
print(f"Processing: {run.name}")
print('='*60)
compile_validation_pdfs(str(run), args.workers)
else:
compile_validation_pdfs(args.run_dir, args.workers)
if __name__ == "__main__":
main()