Epoching commited on
Commit
6318714
1 Parent(s): cda1095

Update DiT_Extractor/sentence_extractor.py

Browse files
DiT_Extractor/sentence_extractor.py CHANGED
@@ -118,7 +118,7 @@ def format_output_contexts(sections_per_page):
118
  whitespaced_text = ' '.join([word[0] for word in word_section])
119
  words_info = []
120
  for word in word_section:
121
- words_info.append({'word_text:':word[0], 'char_indices':word[1], 'word_bbox':word[2]})
122
 
123
  context_row = {'text':text_section, 'whitespaced_text':whitespaced_text, 'page_idx':int(page_idx), 'words_info':words_info}
124
  context_id = 'context_{0}'.format(len(all_contexts))
 
118
  whitespaced_text = ' '.join([word[0] for word in word_section])
119
  words_info = []
120
  for word in word_section:
121
+ words_info.append({'word_text':word[0], 'char_indices':word[1], 'word_bbox':word[2]})
122
 
123
  context_row = {'text':text_section, 'whitespaced_text':whitespaced_text, 'page_idx':int(page_idx), 'words_info':words_info}
124
  context_id = 'context_{0}'.format(len(all_contexts))