import fitz from PIL import Image import io import os from datetime import datetime, timezone, timedelta def convert_pdf_to_image(pdf_path, page_number=0, dpi=150): """ Convert a PDF page to PIL Image for preview """ try: # Open PDF document pdf_document = fitz.open(pdf_path) # Get the specified page (default: first page) if page_number >= len(pdf_document): page_number = 0 page = pdf_document.load_page(page_number) # Convert page to image # Increase DPI for better quality (default 72, we use 150) mat = fitz.Matrix(dpi/72, dpi/72) pix = page.get_pixmap(matrix=mat) # Convert to PIL Image img_data = pix.tobytes("ppm") img = Image.open(io.BytesIO(img_data)) pdf_document.close() return img except Exception as e: print(f"Error converting PDF to image: {e}") return None def get_pdf_info(pdf_path): """ Get basic information about the PDF """ try: pdf_document = fitz.open(pdf_path) page_count = len(pdf_document) pdf_document.close() return page_count except Exception as e: print(f"Error getting PDF info: {e}") return 0 def get_file_type(file_path): """ Determine the file type based on extension """ if file_path.lower().endswith('.pdf'): return 'pdf' elif file_path.lower().endswith(('.png', '.jpg', '.jpeg')): return 'image' elif file_path.lower().endswith('.txt'): return 'txt' else: return 'unknown' def get_recent_file_id(files, minutes=5): now = datetime.now(timezone.utc) for file in sorted(files, key=lambda x: x.get('createdAt', ''), reverse=True): created_at_str = file.get('createdAt') if created_at_str: created_at = datetime.fromisoformat(created_at_str.replace('Z', '+00:00')) if now - created_at <= timedelta(minutes=minutes): return file['fileId'] return None