|
import unittest |
|
from unittest.mock import MagicMock, patch |
|
from transcribe.strategy import TranscriptStabilityAnalyzer, TranscriptChunk, TranscriptResult, SplitMode |
|
|
|
class TestTranscriptStabilityAnalyzer(unittest.TestCase): |
|
def setUp(self): |
|
self.analyzer = TranscriptStabilityAnalyzer() |
|
|
|
def test_first_chunk_yields_pending_text(self): |
|
mock_chunk = MagicMock(spec=TranscriptChunk) |
|
mock_chunk.join.return_value = "Hello world." |
|
|
|
with patch.object(self.analyzer._transcript_history, 'previous_chunk', return_value=None): |
|
results = list(self.analyzer.analysis(" ", mock_chunk, buffer_duration=5.0)) |
|
|
|
self.assertEqual(len(results), 1) |
|
self.assertIsInstance(results[0], TranscriptResult) |
|
self.assertIn("Hello", results[0].context) |
|
|
|
def test_short_buffer_with_high_similarity_and_end_sentence(self): |
|
curr_chunk = MagicMock(spec=TranscriptChunk) |
|
curr_first = MagicMock() |
|
curr_rest = [MagicMock()] |
|
prev_chunk = MagicMock(spec=TranscriptChunk) |
|
prev_first = MagicMock() |
|
|
|
|
|
curr_chunk.items = [curr_first, curr_rest[0]] |
|
curr_chunk.get_split_first_rest.return_value = (curr_first, curr_rest) |
|
prev_chunk.get_split_first_rest.return_value = (prev_first, []) |
|
curr_first.compare.return_value = 0.85 |
|
curr_first.is_end_sentence.return_value = True |
|
curr_first.has_punctuation.return_value = True |
|
curr_first.join.return_value = "This is a test sentence." |
|
curr_first.get_buffer_index.return_value = 0 |
|
curr_rest[0].join.return_value = " Continuing..." |
|
|
|
with patch.object(self.analyzer._transcript_history, 'previous_chunk', return_value=prev_chunk): |
|
with patch.object(self.analyzer._transcript_history, 'add'): |
|
results = list(self.analyzer.analysis(" ", curr_chunk, buffer_duration=5.0)) |
|
|
|
self.assertGreaterEqual(len(results), 1) |
|
self.assertTrue(any(r.is_end_sentence for r in results)) |
|
self.assertTrue(any("test" in r.context for r in results)) |
|
|
|
|
|
def test_long_buffer_triggers_commit(self): |
|
chunk1 = MagicMock() |
|
chunk2 = MagicMock() |
|
chunk3 = MagicMock() |
|
|
|
chunk1.join.return_value = "Hello." |
|
chunk2.join.return_value = "How are" |
|
chunk3.join.return_value = " you?" |
|
|
|
mock_chunk = MagicMock(spec=TranscriptChunk) |
|
mock_chunk.split_by.return_value = [chunk1, chunk2, chunk3] |
|
mock_chunk.get_buffer_index.return_value = 0 |
|
|
|
with patch.object(self.analyzer._transcript_history, 'previous_chunk', return_value=MagicMock()): |
|
with patch.object(self.analyzer._transcript_history, 'add'): |
|
results = list(self.analyzer.analysis(" ", mock_chunk, buffer_duration=15.0)) |
|
|
|
self.assertTrue(any(r.is_end_sentence for r in results)) |
|
self.assertTrue(any("Hello" in r.context for r in results)) |
|
|
|
if __name__ == '__main__': |
|
unittest.main() |
|
|