Cachoups commited on
Commit
9f63e0a
·
verified ·
1 Parent(s): d774f5b

Update lib/read_pdf.py

Browse files
Files changed (1) hide show
  1. lib/read_pdf.py +4 -0
lib/read_pdf.py CHANGED
@@ -53,6 +53,10 @@ def extract_and_format_paragraphs(pdf_path):
53
  """Append the line to the paragraph, handling line breaks and footnotes."""
54
  if paragraph_lines and not is_end_of_sentence(paragraph_lines[-1]):
55
  # This line is a continuation of the previous one
 
 
 
 
56
  paragraph_lines[-1] += ' ' + line.strip()
57
  else:
58
  # Start a new line in the paragraph
 
53
  """Append the line to the paragraph, handling line breaks and footnotes."""
54
  if paragraph_lines and not is_end_of_sentence(paragraph_lines[-1]):
55
  # This line is a continuation of the previous one
56
+ if paragraph_lines[-1][-1] == "-":
57
+ paragraph_lines[-1] = paragraph_lines[-1][:-1]
58
+ paragraph_lines[-1] += line.strip()
59
+
60
  paragraph_lines[-1] += ' ' + line.strip()
61
  else:
62
  # Start a new line in the paragraph