parser-leaderboard / utils /pdf_utils.py
jojortz's picture
add sample files
9fe1137
raw
history blame
1.35 kB
import fitz # PyMuPDF
from PIL import Image
def update_page(file_path, current_page, direction):
if not file_path:
return None, "No file selected", current_page # If no file is selected, do nothing
# Calculate new page index
new_page = current_page + direction
# Load the initial page to get the total pages
_, _, total_pages = load_pdf(file_path)
# Ensure the new page is within bounds
if new_page < 0:
new_page = 0
elif new_page >= total_pages:
new_page = total_pages - 1
# Load the updated page
img, page_info, _ = get_pdf_page(file_path, new_page)
return img, f"Page {new_page + 1} of {total_pages}", new_page
def get_pdf_page(file_path, page_num):
try:
doc = fitz.open(file_path)
page_count = len(doc)
page_num = max(0, min(page_num, page_count - 1)) # Ensure page_num is within bounds
page = doc.load_page(page_num)
pix = page.get_pixmap()
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
return img, page_num, page_count
except Exception as e:
print(f"Error loading PDF page: {e}")
return None, page_num, 0
def load_pdf(file_path):
img, page_num, total_pages = get_pdf_page(file_path, 0)
return img, f"Page {page_num + 1} of {total_pages}", total_pages