Removed duplicate variable self.last_chunked_at
Browse filesI tried to find the difference between self.last_chunked_at and self.buffer_time_offset, and it took me a while to understand that they are exactly the same. I think it's better to get rid of one of the duplicates to make the code more readable.
- whisper_online.py +1 -3
whisper_online.py
CHANGED
|
@@ -328,7 +328,6 @@ class OnlineASRProcessor:
|
|
| 328 |
|
| 329 |
self.transcript_buffer = HypothesisBuffer(logfile=self.logfile)
|
| 330 |
self.commited = []
|
| 331 |
-
self.last_chunked_at = 0
|
| 332 |
|
| 333 |
self.silence_iters = 0
|
| 334 |
|
|
@@ -340,7 +339,7 @@ class OnlineASRProcessor:
|
|
| 340 |
"context" is the commited text that is inside the audio buffer. It is transcribed again and skipped. It is returned only for debugging and logging reasons.
|
| 341 |
"""
|
| 342 |
k = max(0,len(self.commited)-1)
|
| 343 |
-
while k > 0 and self.commited[k-1][1] > self.
|
| 344 |
k -= 1
|
| 345 |
|
| 346 |
p = self.commited[:k]
|
|
@@ -451,7 +450,6 @@ class OnlineASRProcessor:
|
|
| 451 |
cut_seconds = time - self.buffer_time_offset
|
| 452 |
self.audio_buffer = self.audio_buffer[int(cut_seconds*self.SAMPLING_RATE):]
|
| 453 |
self.buffer_time_offset = time
|
| 454 |
-
self.last_chunked_at = time
|
| 455 |
|
| 456 |
def words_to_sentences(self, words):
|
| 457 |
"""Uses self.tokenizer for sentence segmentation of words.
|
|
|
|
| 328 |
|
| 329 |
self.transcript_buffer = HypothesisBuffer(logfile=self.logfile)
|
| 330 |
self.commited = []
|
|
|
|
| 331 |
|
| 332 |
self.silence_iters = 0
|
| 333 |
|
|
|
|
| 339 |
"context" is the commited text that is inside the audio buffer. It is transcribed again and skipped. It is returned only for debugging and logging reasons.
|
| 340 |
"""
|
| 341 |
k = max(0,len(self.commited)-1)
|
| 342 |
+
while k > 0 and self.commited[k-1][1] > self.buffer_time_offset:
|
| 343 |
k -= 1
|
| 344 |
|
| 345 |
p = self.commited[:k]
|
|
|
|
| 450 |
cut_seconds = time - self.buffer_time_offset
|
| 451 |
self.audio_buffer = self.audio_buffer[int(cut_seconds*self.SAMPLING_RATE):]
|
| 452 |
self.buffer_time_offset = time
|
|
|
|
| 453 |
|
| 454 |
def words_to_sentences(self, words):
|
| 455 |
"""Uses self.tokenizer for sentence segmentation of words.
|