docker-speech2text / create_test_audio.py
petergits
2nd checking push speech2text
c08e928
#!/usr/bin/env python3
"""
Create a test audio file for testing the MPC Speech service
"""
import numpy as np
import soundfile as sf
from pathlib import Path
def create_test_audio(filename="test_audio.wav", duration=3.0, sample_rate=16000):
"""
Create a simple test audio file with a sine wave tone
Args:
filename: Output filename
duration: Duration in seconds
sample_rate: Sample rate in Hz
"""
print(f"🎡 Creating test audio file: {filename}")
# Generate time array
t = np.linspace(0, duration, int(sample_rate * duration), False)
# Create a simple tone (440 Hz - A note) with some variation
frequency1 = 440 # A note
frequency2 = 523 # C note
# Create a chord-like sound
audio = (
0.3 * np.sin(2 * np.pi * frequency1 * t) + # A note
0.2 * np.sin(2 * np.pi * frequency2 * t) + # C note
0.1 * np.sin(2 * np.pi * 660 * t) # E note
)
# Add some envelope to make it sound more natural
envelope = np.exp(-t * 0.5) # Exponential decay
audio = audio * envelope
# Normalize to prevent clipping
audio = audio / np.max(np.abs(audio)) * 0.8
# Save as WAV file
sf.write(filename, audio, sample_rate)
file_size = Path(filename).stat().st_size
print(f"βœ… Created {filename}")
print(f" Duration: {duration}s")
print(f" Sample rate: {sample_rate} Hz")
print(f" File size: {file_size:,} bytes")
return filename
def create_silent_audio(filename="silent_audio.wav", duration=2.0, sample_rate=16000):
"""Create a silent audio file for testing"""
print(f"πŸ”‡ Creating silent audio file: {filename}")
# Create silent audio (zeros)
audio = np.zeros(int(sample_rate * duration))
# Save as WAV file
sf.write(filename, audio, sample_rate)
file_size = Path(filename).stat().st_size
print(f"βœ… Created {filename}")
print(f" Duration: {duration}s (silent)")
print(f" Sample rate: {sample_rate} Hz")
print(f" File size: {file_size:,} bytes")
return filename
def create_speech_like_audio(filename="speech_test.wav", duration=5.0, sample_rate=16000):
"""Create a more speech-like test audio with varying frequencies"""
print(f"🎀 Creating speech-like audio file: {filename}")
# Generate time array
t = np.linspace(0, duration, int(sample_rate * duration), False)
# Create speech-like formants (simplified)
# Human speech typically has formants around 500Hz, 1500Hz, 2500Hz
formant1 = 500
formant2 = 1500
formant3 = 2500
# Create a more complex waveform
audio = (
0.4 * np.sin(2 * np.pi * formant1 * t) * (1 + 0.3 * np.sin(2 * np.pi * 3 * t)) +
0.3 * np.sin(2 * np.pi * formant2 * t) * (1 + 0.2 * np.sin(2 * np.pi * 5 * t)) +
0.2 * np.sin(2 * np.pi * formant3 * t) * (1 + 0.1 * np.sin(2 * np.pi * 7 * t))
)
# Add some noise to make it more realistic
noise = 0.05 * np.random.normal(0, 1, len(audio))
audio = audio + noise
# Create segments (like words)
segment_duration = 0.8
pause_duration = 0.2
segment_samples = int(segment_duration * sample_rate)
pause_samples = int(pause_duration * sample_rate)
# Apply segmentation
for i in range(0, len(audio), segment_samples + pause_samples):
# Keep segment
segment_end = min(i + segment_samples, len(audio))
# Add pause
pause_start = segment_end
pause_end = min(pause_start + pause_samples, len(audio))
if pause_end > pause_start:
audio[pause_start:pause_end] *= 0.1 # Reduce volume for pause
# Normalize
audio = audio / np.max(np.abs(audio)) * 0.7
# Save as WAV file
sf.write(filename, audio, sample_rate)
file_size = Path(filename).stat().st_size
print(f"βœ… Created {filename}")
print(f" Duration: {duration}s")
print(f" Sample rate: {sample_rate} Hz")
print(f" File size: {file_size:,} bytes")
print(f" Note: This is synthetic audio for testing")
return filename
def main():
"""Create test audio files"""
print("🎧 Creating test audio files for MPC Speech service")
print("=" * 50)
try:
# Create different types of test audio
create_test_audio("audio.wav", duration=3.0)
create_silent_audio("silent_test.wav", duration=2.0)
create_speech_like_audio("speech_test.wav", duration=4.0)
print(f"\nβœ… All test audio files created successfully!")
print(f"πŸ“ Files created in current directory:")
print(f" β€’ audio.wav - Simple tone (for basic testing)")
print(f" β€’ silent_test.wav - Silent audio (edge case testing)")
print(f" β€’ speech_test.wav - Speech-like audio (more realistic)")
print(f"\nπŸ§ͺ You can now test with: python testClient.py")
except Exception as e:
print(f"❌ Error creating test audio files: {e}")
print(f"πŸ’‘ Make sure you have soundfile installed: pip install soundfile")
if __name__ == "__main__":
main()