Joshua Lochner commited on
Commit
de9c264
1 Parent(s): c445f1a

Fix segmentation using binary search

Browse files
Files changed (1) hide show
  1. src/segment.py +3 -3
src/segment.py CHANGED
@@ -119,8 +119,8 @@ def generate_segments(words, tokenizer, segmentation_args):
119
  def extract_segment(words, start, end, map_function=None):
120
  """Extracts all words with time in [start, end]"""
121
 
122
- a = binary_search_below(words, 0, len(words) - 1, start)
123
- b = min(binary_search_above(words, 0, len(words) - 1, end) + 1, len(words))
124
 
125
  to_transform = map_function is not None and callable(map_function)
126
 
@@ -153,7 +153,7 @@ def binary_search_above(transcript, start_index, end_index, time):
153
 
154
  middle_index = (start_index + end_index + 1) // 2
155
  middle = transcript[middle_index]
156
- middle_time = avg(middle['start'], middle['end'])
157
 
158
  if time >= middle_time:
159
  return binary_search_above(transcript, middle_index, end_index, time)
 
119
  def extract_segment(words, start, end, map_function=None):
120
  """Extracts all words with time in [start, end]"""
121
 
122
+ a = max(binary_search_below(words, 0, len(words), start), 0)
123
+ b = min(binary_search_above(words, -1, len(words) -1, end) + 1, len(words))
124
 
125
  to_transform = map_function is not None and callable(map_function)
126
 
 
153
 
154
  middle_index = (start_index + end_index + 1) // 2
155
  middle = transcript[middle_index]
156
+ middle_time = avg(word_start(middle), word_end(middle))
157
 
158
  if time >= middle_time:
159
  return binary_search_above(transcript, middle_index, end_index, time)