3v324v23 commited on
Commit
d35d771
1 Parent(s): 4c486f2

修复pdf分解bug

Browse files
Files changed (1) hide show
  1. crazy_functions/crazy_utils.py +1 -0
crazy_functions/crazy_utils.py CHANGED
@@ -444,6 +444,7 @@ def read_and_clean_pdf_text(fp):
444
  pf = 998
445
  for l in t['lines']:
446
  txt_line = "".join([wtf['text'] for wtf in l['spans']])
 
447
  pf = primary_ffsize(l)
448
  meta_line.append([txt_line, pf, l['bbox'], l])
449
  for wtf in l['spans']: # for l in t['lines']:
 
444
  pf = 998
445
  for l in t['lines']:
446
  txt_line = "".join([wtf['text'] for wtf in l['spans']])
447
+ if len(txt_line) == 0: continue
448
  pf = primary_ffsize(l)
449
  meta_line.append([txt_line, pf, l['bbox'], l])
450
  for wtf in l['spans']: # for l in t['lines']: