mazesmazes commited on
Commit
12cc77c
·
verified ·
1 Parent(s): eda17fb

Update custom model files, README, and requirements

Browse files
Files changed (1) hide show
  1. alignment.py +12 -3
alignment.py CHANGED
@@ -197,7 +197,8 @@ class ForcedAligner:
197
  import torchaudio
198
 
199
  device = _get_device()
200
- model, labels, dictionary = cls.get_instance(device)
 
201
 
202
  # Convert audio to tensor (copy to ensure array is writable)
203
  if isinstance(audio, np.ndarray):
@@ -259,7 +260,11 @@ class ForcedAligner:
259
 
260
  for token_id, start_frame, end_frame in alignment_path:
261
  if token_id == separator_id: # Word separator
262
- if current_word_start is not None and word_idx < len(words):
 
 
 
 
263
  start_time = max(0.0, current_word_start * frame_duration - start_offset)
264
  end_time = max(0.0, current_word_end * frame_duration - end_offset)
265
  word_timestamps.append(
@@ -278,7 +283,11 @@ class ForcedAligner:
278
  current_word_end = end_frame
279
 
280
  # Don't forget the last word
281
- if current_word_start is not None and word_idx < len(words):
 
 
 
 
282
  start_time = max(0.0, current_word_start * frame_duration - start_offset)
283
  end_time = max(0.0, current_word_end * frame_duration - end_offset)
284
  word_timestamps.append(
 
197
  import torchaudio
198
 
199
  device = _get_device()
200
+ model, _labels, dictionary = cls.get_instance(device)
201
+ assert cls._bundle is not None and dictionary is not None # Initialized by get_instance
202
 
203
  # Convert audio to tensor (copy to ensure array is writable)
204
  if isinstance(audio, np.ndarray):
 
260
 
261
  for token_id, start_frame, end_frame in alignment_path:
262
  if token_id == separator_id: # Word separator
263
+ if (
264
+ current_word_start is not None
265
+ and current_word_end is not None
266
+ and word_idx < len(words)
267
+ ):
268
  start_time = max(0.0, current_word_start * frame_duration - start_offset)
269
  end_time = max(0.0, current_word_end * frame_duration - end_offset)
270
  word_timestamps.append(
 
283
  current_word_end = end_frame
284
 
285
  # Don't forget the last word
286
+ if (
287
+ current_word_start is not None
288
+ and current_word_end is not None
289
+ and word_idx < len(words)
290
+ ):
291
  start_time = max(0.0, current_word_start * frame_duration - start_offset)
292
  end_time = max(0.0, current_word_end * frame_duration - end_offset)
293
  word_timestamps.append(