Updating app file
Browse files
app.py
CHANGED
|
@@ -108,13 +108,30 @@ def split_into_sentences(text):
|
|
| 108 |
|
| 109 |
|
| 110 |
def transcribe(audio_array, sample_rate=16000):
|
| 111 |
-
"""ASR: English audio to text.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
if len(audio_array) < 1600: # Less than 0.1s
|
| 113 |
return ""
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
return result["text"].strip()
|
| 119 |
|
| 120 |
|
|
|
|
| 108 |
|
| 109 |
|
| 110 |
def transcribe(audio_array, sample_rate=16000):
|
| 111 |
+
"""ASR: English audio to text.
|
| 112 |
+
|
| 113 |
+
Automatically handles both short (<30s) and long audio by enabling
|
| 114 |
+
timestamps and chunking for longer audio.
|
| 115 |
+
"""
|
| 116 |
if len(audio_array) < 1600: # Less than 0.1s
|
| 117 |
return ""
|
| 118 |
+
|
| 119 |
+
duration_s = len(audio_array) / sample_rate
|
| 120 |
+
|
| 121 |
+
if duration_s > 28:
|
| 122 |
+
# Long-form: enable chunking and timestamps (required by Whisper)
|
| 123 |
+
result = asr_pipe(
|
| 124 |
+
{"raw": audio_array, "sampling_rate": sample_rate},
|
| 125 |
+
return_timestamps=True,
|
| 126 |
+
chunk_length_s=25,
|
| 127 |
+
stride_length_s=5,
|
| 128 |
+
)
|
| 129 |
+
else:
|
| 130 |
+
# Short: standard single-pass transcription
|
| 131 |
+
result = asr_pipe(
|
| 132 |
+
{"raw": audio_array, "sampling_rate": sample_rate},
|
| 133 |
+
return_timestamps=False,
|
| 134 |
+
)
|
| 135 |
return result["text"].strip()
|
| 136 |
|
| 137 |
|