import pymupdf  # PyMuPDF


def read_pdf(file_path):
    # Open the PDF file
    document = pymupdf.open(file_path)
    text = ""

    # Iterate through the pages
    for page_num in range(len(document)):
        # Extract text from each page
        page = document.load_page(page_num)
        text += page.get_text()

    # Close the PDF document
    document.close()

    return text


def remove_line_breaks(text):
    #  remove only single line breaks, not paragraphs
    # find line breaks and it is not followed by a period
    for i in range(len(text)):
        if i == 0 or i == len(text) - 1:
            continue
        if text[i] == "\n" and text[i - 1] != "." and text[i + 1] != "\n":
            text = text[:i] + " " + text[i + 1 :]
    return text


if __name__ == "__main__":
    file_path = "example3.pdf"
    pdf_text = read_pdf(file_path)
    print(remove_line_breaks(pdf_text))