Spaces:

daswer123
/

pdf_converter

Sleeping

pdf_converter / funcs.py

Upload 3 files

e1cf46f verified about 1 year ago

1.21 kB

	import argparse
	import pypandoc
	from pdf2docx import Converter


	def pdf_to_epub(pdf_path, epub_path, ignore_header_footer=True):
	docx_path = pdf_path.replace('.pdf', '.docx')
	# Initialize converter with options to ignore headers and footers
	convert_settings = {
	"ignore_footer": ignore_header_footer,
	"ignore_header": ignore_header_footer,
	}
	cv = Converter(pdf_path)

	# Convert PDF to DOCX with specified settings
	cv.convert(docx_path, **convert_settings)
	cv.close()

	# Step 2: Convert DOCX to EPUB
	output = pypandoc.convert_file(docx_path, 'epub', outputfile=epub_path)
	print(output)


	def main():
	# Parse command-line arguments
	parser = argparse.ArgumentParser(description='Convert a PDF file to EPUB format.')
	parser.add_argument('pdf_path', type=str, help='Path to the PDF file to convert.')
	args = parser.parse_args()

	# Derive EPUB path from PDF path
	epub_path = args.pdf_path.replace('.pdf', '.epub')

	# Perform conversion
	pdf_to_epub(args.pdf_path, epub_path)
	print(f"Conversion complete. EPUB file saved to: {epub_path}")


	if __name__ == '__main__':
	main()