Spaces:
Running
Running
Update lib/read_pdf.py
Browse files- lib/read_pdf.py +4 -0
lib/read_pdf.py
CHANGED
@@ -53,6 +53,10 @@ def extract_and_format_paragraphs(pdf_path):
|
|
53 |
"""Append the line to the paragraph, handling line breaks and footnotes."""
|
54 |
if paragraph_lines and not is_end_of_sentence(paragraph_lines[-1]):
|
55 |
# This line is a continuation of the previous one
|
|
|
|
|
|
|
|
|
56 |
paragraph_lines[-1] += ' ' + line.strip()
|
57 |
else:
|
58 |
# Start a new line in the paragraph
|
|
|
53 |
"""Append the line to the paragraph, handling line breaks and footnotes."""
|
54 |
if paragraph_lines and not is_end_of_sentence(paragraph_lines[-1]):
|
55 |
# This line is a continuation of the previous one
|
56 |
+
if paragraph_lines[-1][-1] == "-":
|
57 |
+
paragraph_lines[-1] = paragraph_lines[-1][:-1]
|
58 |
+
paragraph_lines[-1] += line.strip()
|
59 |
+
|
60 |
paragraph_lines[-1] += ' ' + line.strip()
|
61 |
else:
|
62 |
# Start a new line in the paragraph
|