Spaces:

TheDrakosfire
/

SwordsAndSorceryRulesLawyer

Runtime error

Reorganized the project into a module called document processing, and execute through a single entry file pdf_to_embeddings.py

824bd32 3 months ago

1.53 kB

	from PyPDF2 import PdfReader
	import os
	import math

	def estimate_conversion_time(num_pages):
	"""Estimate the total conversion time based on ~5 seconds per page"""
	return num_pages * 5

	def format_time(seconds):
	"""Convert seconds to a human-readable format"""
	minutes = math.floor(seconds / 60)
	remaining_seconds = seconds % 60
	return f"{minutes} minutes and {remaining_seconds:.0f} seconds"

	def check_pdf(file_path):
	if not os.path.exists(file_path):
	print(f"Error: File '{file_path}' does not exist.")
	return False

	try:
	with open(file_path, 'rb') as file:
	PdfReader(file)
	print(f"PDF '{file_path}' can be opened successfully.")
	return True
	except Exception as e:
	print(f"Error opening PDF '{file_path}': {str(e)}")
	return False

	def check_pdf_details(file_path):
	try:
	with open(file_path, 'rb') as file:
	pdf = PdfReader(file)
	num_pages = len(pdf.pages)
	print(f"Number of pages: {num_pages}")
	print(f"PDF Version: {pdf.pdf_header}")
	print(f"File size: {os.path.getsize(file_path)} bytes")
	if pdf.metadata:
	print("Metadata:")
	for key, value in pdf.metadata.items():
	print(f" {key}: {value}")
	else:
	print("No metadata available")
	return num_pages
	except Exception as e:
	print(f"Error checking PDF details: {str(e)}")
	return None