#!/usr/bin/env python """ Async LaTeX compilation handler Works efficiently on Linux/HF Spaces with forking Falls back to sequential on Windows """ import os import sys import subprocess import platform from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor from pathlib import Path import time def is_linux(): """Check if running on Linux/Unix""" return platform.system() in ['Linux', 'Darwin'] def compile_latex_file(tex_path, output_dir=None, timeout=30): """ Compile a single LaTeX file to PDF Args: tex_path: Path to .tex file output_dir: Output directory (default: same as tex file) timeout: Compilation timeout in seconds Returns: tuple: (success: bool, pdf_path: str or None, error_msg: str or None) """ tex_path = Path(tex_path) if not tex_path.exists(): return False, None, f"File not found: {tex_path}" output_dir = output_dir or tex_path.parent pdf_path = output_dir / tex_path.with_suffix('.pdf').name # Remove old PDF if exists if pdf_path.exists(): try: pdf_path.unlink() except: pass # Compile command cmd = [ 'pdflatex', '-interaction=nonstopmode', '-halt-on-error', f'-output-directory={output_dir}', str(tex_path) ] try: # Run compilation result = subprocess.run( cmd, capture_output=True, text=True, timeout=timeout, cwd=str(tex_path.parent) ) # Check if PDF was created if pdf_path.exists(): return True, str(pdf_path), None else: # Extract error from log error_msg = "Compilation failed" if result.stdout: lines = result.stdout.split('\n') for i, line in enumerate(lines): if 'Error' in line or '!' in line[:2]: error_msg = '\n'.join(lines[i:i+5]) break return False, None, error_msg except subprocess.TimeoutExpired: return False, None, f"Timeout after {timeout} seconds" except FileNotFoundError: return False, None, "pdflatex not found - install texlive" except Exception as e: return False, None, str(e) def compile_latex_batch(tex_files, output_dir=None, max_workers=4, timeout=30): """ Compile multiple LaTeX files in parallel Args: tex_files: List of .tex file paths output_dir: Output directory for PDFs max_workers: Number of parallel workers timeout: Timeout per file Returns: dict: {tex_path: (success, pdf_path, error_msg)} """ results = {} if not tex_files: return results # Use ProcessPoolExecutor on Linux for true parallelism # Use ThreadPoolExecutor on Windows (less efficient but works) if is_linux(): executor_class = ProcessPoolExecutor print(f"Using process-based parallelism ({max_workers} workers)") else: executor_class = ThreadPoolExecutor print(f"Using thread-based parallelism ({max_workers} workers)") with executor_class(max_workers=max_workers) as executor: # Submit all compilation tasks futures = { executor.submit(compile_latex_file, tex_file, output_dir, timeout): tex_file for tex_file in tex_files } # Collect results as they complete for future in futures: tex_file = futures[future] try: success, pdf_path, error = future.result(timeout=timeout+5) results[tex_file] = (success, pdf_path, error) if success: print(f" ✓ Compiled: {Path(tex_file).name}") else: print(f" ✗ Failed: {Path(tex_file).name}") except Exception as e: results[tex_file] = (False, None, str(e)) print(f" ✗ Error: {Path(tex_file).name}: {e}") return results def compile_latex_async(tex_path, output_dir=None, callback=None): """ Compile LaTeX file asynchronously (fire-and-forget) Args: tex_path: Path to .tex file output_dir: Output directory callback: Optional callback function(success, pdf_path, error) """ if is_linux(): # On Linux, fork a subprocess pid = os.fork() if pid == 0: # Child process try: success, pdf_path, error = compile_latex_file(tex_path, output_dir) if callback: callback(success, pdf_path, error) finally: os._exit(0) else: # Parent process continues immediately print(f" → Compiling {Path(tex_path).name} in background (PID: {pid})") else: # On Windows, use threading from threading import Thread def compile_thread(): success, pdf_path, error = compile_latex_file(tex_path, output_dir) if callback: callback(success, pdf_path, error) thread = Thread(target=compile_thread, daemon=True) thread.start() print(f" → Compiling {Path(tex_path).name} in background thread") def check_latex_available(): """Check if pdflatex is available""" try: result = subprocess.run( ['pdflatex', '--version'], capture_output=True, text=True, timeout=5 ) if result.returncode == 0: # Extract version for line in result.stdout.split('\n'): if 'TeX' in line: print(f"LaTeX available: {line.strip()}") return True return False except: return False # Integration with universal_validator.py def setup_async_latex_compilation(): """ Setup async LaTeX compilation for the validator Returns a function that can be used to compile LaTeX files """ if not check_latex_available(): print("Warning: LaTeX not available, PDF compilation disabled") return None def compile_reconciliation(tex_path): """Compile reconciliation document asynchronously""" compile_latex_async( tex_path, callback=lambda s, p, e: print(f" [PDF] {'Success' if s else 'Failed'}: {Path(tex_path).name}") ) return compile_reconciliation if __name__ == "__main__": # Test the compiler import tempfile print("Testing LaTeX compilation...") print(f"Platform: {platform.system()}") print(f"Async support: {'Yes' if is_linux() else 'Limited (Windows)'}") if check_latex_available(): # Create a test document with tempfile.NamedTemporaryFile(mode='w', suffix='.tex', delete=False) as f: f.write(r"""\documentclass{article} \begin{document} \title{Test Document} \author{Validator} \maketitle This is a test: $x^2 + y^2 = z^2$ \end{document}""") test_file = f.name print(f"\nCompiling test file: {test_file}") success, pdf_path, error = compile_latex_file(test_file) if success: print(f"✓ Success! PDF created: {pdf_path}") print(f" Size: {os.path.getsize(pdf_path)} bytes") else: print(f"✗ Failed: {error}") # Clean up try: os.unlink(test_file) if pdf_path and os.path.exists(pdf_path): os.unlink(pdf_path) except: pass else: print("✗ LaTeX not installed") print(" On Linux: apt-get install texlive-latex-base") print(" On Windows: Install MiKTeX") print(" On macOS: brew install --cask mactex")