Update TextProcessor.py
Browse files- TextProcessor.py +5 -4
TextProcessor.py
CHANGED
|
@@ -61,7 +61,7 @@ def paragraphs(folderpath):
|
|
| 61 |
is_after_disposition = (prior_right_margin and current_tabbed)
|
| 62 |
is_after_oneline_paragraph = (prior_tabbed and current_tabbed and not prior_supertabbed and not current_supertabbed and not is_inblock)
|
| 63 |
|
| 64 |
-
if is_start_blockquote:
|
| 65 |
is_inblock = True
|
| 66 |
if is_after_blockquote:
|
| 67 |
is_inblock = False
|
|
@@ -69,8 +69,9 @@ def paragraphs(folderpath):
|
|
| 69 |
if is_section_header or is_the_classic or is_after_oneline_paragraph or is_start_blockquote or is_after_blockquote or is_after_disposition:
|
| 70 |
paras.append(para)
|
| 71 |
para = []
|
| 72 |
-
|
| 73 |
-
|
|
|
|
| 74 |
para.append((pg_inds[j], line_inds[j], line_text))
|
| 75 |
|
| 76 |
paras.append(para)
|
|
@@ -95,4 +96,4 @@ def process_file(folderpath):
|
|
| 95 |
x1, y1, x2, y2, para_first_line, pg_ind = indent
|
| 96 |
image = cv2.imread(folderpath + '/' + str(pg_ind) + '-processed.png')
|
| 97 |
cv2.circle(image, (x1 - 15, int(0.5 * (y1 + y2))), radius=1, color=(240, 32, 160), thickness=2)
|
| 98 |
-
cv2.imwrite(folderpath + '/' + str(pg_ind) + '-processed.png', image)
|
|
|
|
| 61 |
is_after_disposition = (prior_right_margin and current_tabbed)
|
| 62 |
is_after_oneline_paragraph = (prior_tabbed and current_tabbed and not prior_supertabbed and not current_supertabbed and not is_inblock)
|
| 63 |
|
| 64 |
+
if is_start_blockquote and not is_section_header and not is_inblock:
|
| 65 |
is_inblock = True
|
| 66 |
if is_after_blockquote:
|
| 67 |
is_inblock = False
|
|
|
|
| 69 |
if is_section_header or is_the_classic or is_after_oneline_paragraph or is_start_blockquote or is_after_blockquote or is_after_disposition:
|
| 70 |
paras.append(para)
|
| 71 |
para = []
|
| 72 |
+
print('\n')
|
| 73 |
+
print(str([j, pg_inds[j]]) + ':\t' + str(is_the_classic) + '\t' + str(is_start_blockquote) + '\t' + str(
|
| 74 |
+
is_after_blockquote) + '\t' + str(is_after_disposition) + '\t' + line_text)
|
| 75 |
para.append((pg_inds[j], line_inds[j], line_text))
|
| 76 |
|
| 77 |
paras.append(para)
|
|
|
|
| 96 |
x1, y1, x2, y2, para_first_line, pg_ind = indent
|
| 97 |
image = cv2.imread(folderpath + '/' + str(pg_ind) + '-processed.png')
|
| 98 |
cv2.circle(image, (x1 - 15, int(0.5 * (y1 + y2))), radius=1, color=(240, 32, 160), thickness=2)
|
| 99 |
+
cv2.imwrite(folderpath + '/' + str(pg_ind) + '-processed.png', image)
|