Spaces:

pgits
/

docker-speech2text

Sleeping

docker-speech2text / create_test_audio.py

petergits

2nd checking push speech2text

c08e928 10 months ago

5.18 kB

	#!/usr/bin/env python3
	"""
	Create a test audio file for testing the MPC Speech service
	"""

	import numpy as np
	import soundfile as sf
	from pathlib import Path

	def create_test_audio(filename="test_audio.wav", duration=3.0, sample_rate=16000):
	"""
	Create a simple test audio file with a sine wave tone

	Args:
	filename: Output filename
	duration: Duration in seconds
	sample_rate: Sample rate in Hz
	"""
	print(f"🎵 Creating test audio file: {filename}")

	# Generate time array
	t = np.linspace(0, duration, int(sample_rate * duration), False)

	# Create a simple tone (440 Hz - A note) with some variation
	frequency1 = 440 # A note
	frequency2 = 523 # C note

	# Create a chord-like sound
	audio = (
	0.3 * np.sin(2 * np.pi * frequency1 * t) + # A note
	0.2 * np.sin(2 * np.pi * frequency2 * t) + # C note
	0.1 * np.sin(2 * np.pi * 660 * t) # E note
	)

	# Add some envelope to make it sound more natural
	envelope = np.exp(-t * 0.5) # Exponential decay
	audio = audio * envelope

	# Normalize to prevent clipping
	audio = audio / np.max(np.abs(audio)) * 0.8

	# Save as WAV file
	sf.write(filename, audio, sample_rate)

	file_size = Path(filename).stat().st_size
	print(f"✅ Created {filename}")
	print(f" Duration: {duration}s")
	print(f" Sample rate: {sample_rate} Hz")
	print(f" File size: {file_size:,} bytes")

	return filename

	def create_silent_audio(filename="silent_audio.wav", duration=2.0, sample_rate=16000):
	"""Create a silent audio file for testing"""
	print(f"🔇 Creating silent audio file: {filename}")

	# Create silent audio (zeros)
	audio = np.zeros(int(sample_rate * duration))

	# Save as WAV file
	sf.write(filename, audio, sample_rate)

	file_size = Path(filename).stat().st_size
	print(f"✅ Created {filename}")
	print(f" Duration: {duration}s (silent)")
	print(f" Sample rate: {sample_rate} Hz")
	print(f" File size: {file_size:,} bytes")

	return filename

	def create_speech_like_audio(filename="speech_test.wav", duration=5.0, sample_rate=16000):
	"""Create a more speech-like test audio with varying frequencies"""
	print(f"🎤 Creating speech-like audio file: {filename}")

	# Generate time array
	t = np.linspace(0, duration, int(sample_rate * duration), False)

	# Create speech-like formants (simplified)
	# Human speech typically has formants around 500Hz, 1500Hz, 2500Hz
	formant1 = 500
	formant2 = 1500
	formant3 = 2500

	# Create a more complex waveform
	audio = (
	0.4 * np.sin(2 * np.pi * formant1 * t) * (1 + 0.3 * np.sin(2 * np.pi * 3 * t)) +
	0.3 * np.sin(2 * np.pi * formant2 * t) * (1 + 0.2 * np.sin(2 * np.pi * 5 * t)) +
	0.2 * np.sin(2 * np.pi * formant3 * t) * (1 + 0.1 * np.sin(2 * np.pi * 7 * t))
	)

	# Add some noise to make it more realistic
	noise = 0.05 * np.random.normal(0, 1, len(audio))
	audio = audio + noise

	# Create segments (like words)
	segment_duration = 0.8
	pause_duration = 0.2
	segment_samples = int(segment_duration * sample_rate)
	pause_samples = int(pause_duration * sample_rate)

	# Apply segmentation
	for i in range(0, len(audio), segment_samples + pause_samples):
	# Keep segment
	segment_end = min(i + segment_samples, len(audio))
	# Add pause
	pause_start = segment_end
	pause_end = min(pause_start + pause_samples, len(audio))
	if pause_end > pause_start:
	audio[pause_start:pause_end] *= 0.1 # Reduce volume for pause

	# Normalize
	audio = audio / np.max(np.abs(audio)) * 0.7

	# Save as WAV file
	sf.write(filename, audio, sample_rate)

	file_size = Path(filename).stat().st_size
	print(f"✅ Created {filename}")
	print(f" Duration: {duration}s")
	print(f" Sample rate: {sample_rate} Hz")
	print(f" File size: {file_size:,} bytes")
	print(f" Note: This is synthetic audio for testing")

	return filename

	def main():
	"""Create test audio files"""
	print("🎧 Creating test audio files for MPC Speech service")
	print("=" * 50)

	try:
	# Create different types of test audio
	create_test_audio("audio.wav", duration=3.0)
	create_silent_audio("silent_test.wav", duration=2.0)
	create_speech_like_audio("speech_test.wav", duration=4.0)

	print(f"\n✅ All test audio files created successfully!")
	print(f"📁 Files created in current directory:")
	print(f" • audio.wav - Simple tone (for basic testing)")
	print(f" • silent_test.wav - Silent audio (edge case testing)")
	print(f" • speech_test.wav - Speech-like audio (more realistic)")
	print(f"\n🧪 You can now test with: python testClient.py")

	except Exception as e:
	print(f"❌ Error creating test audio files: {e}")
	print(f"💡 Make sure you have soundfile installed: pip install soundfile")

	if __name__ == "__main__":
	main()