File size: 1,579 Bytes
4b0e845 0c5c3aa 4b0e845 0c5c3aa 4b0e845 0c5c3aa 4b0e845 0c5c3aa 4b0e845 0c5c3aa 4b0e845 0c5c3aa 4b0e845 0c5c3aa 4b0e845 0c5c3aa 4b0e845 0c5c3aa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
import speech_recognition as sr
import numpy as np
import io
import config
class SpeechTranscriber:
def __init__(self):
self.recognizer = sr.Recognizer()
self.recognizer.energy_threshold = config.ENERGY_THRESHOLD
self.recognizer.dynamic_energy_threshold = config.DYNAMIC_ENERGY_THRESHOLD
self.recognizer.pause_threshold = config.PAUSE_THRESHOLD
self.audio_buffer = bytearray()
def add_audio_chunk(self, audio_chunk):
# Convert numpy array to bytes
self.audio_buffer.extend(audio_chunk.tobytes())
def get_transcript_chunk(self):
# Only process if we have enough audio
min_bytes = config.SAMPLE_RATE * config.MIN_PROCESSING_DURATION * 2 # 2 bytes per sample
if len(self.audio_buffer) < min_bytes:
return None
# Create AudioData object
audio_data = sr.AudioData(
bytes(self.audio_buffer),
config.SAMPLE_RATE,
2 # Sample width in bytes
)
try:
# Use Google Web Speech API for best accuracy
text = self.recognizer.recognize_google(audio_data)
# Clear buffer after successful recognition
self.audio_buffer = bytearray()
return text
except sr.UnknownValueError:
# Clear buffer even if we couldn't recognize
self.audio_buffer = bytearray()
return None
except sr.RequestError as e:
print(f"Speech recognition error: {str(e)}")
return None |