Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -35,10 +35,21 @@ def whisper_speech_demo(text, lang, speaker_audio, mix_lang, mix_text):
|
|
35 |
|
36 |
resample_audio = resampler(newsr=24000)
|
37 |
audio_data_resampled = next(resample_audio([{'sample_rate': 24000, 'samples': audio_data.cpu()}]))['samples_24k']
|
|
|
38 |
audio_np = audio_data_resampled.cpu().numpy()
|
39 |
audio_np = audio_np / np.max(np.abs(audio_np))
|
|
|
|
|
40 |
audio_np = np.asarray(audio_np, dtype=np.float32)
|
|
|
|
|
41 |
audio_stereo = np.stack((audio_np, audio_np), axis=-1)
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp_file:
|
43 |
# Write the stereo data with a sample rate of 24000 Hz
|
44 |
sf.write(tmp_file.name, audio_stereo, 24000, format='WAV', subtype='PCM_16')
|
|
|
35 |
|
36 |
resample_audio = resampler(newsr=24000)
|
37 |
audio_data_resampled = next(resample_audio([{'sample_rate': 24000, 'samples': audio_data.cpu()}]))['samples_24k']
|
38 |
+
# Normalize audio
|
39 |
audio_np = audio_data_resampled.cpu().numpy()
|
40 |
audio_np = audio_np / np.max(np.abs(audio_np))
|
41 |
+
|
42 |
+
# Ensure audio data is in the correct format
|
43 |
audio_np = np.asarray(audio_np, dtype=np.float32)
|
44 |
+
|
45 |
+
# Create stereo audio by duplicating the mono channel
|
46 |
audio_stereo = np.stack((audio_np, audio_np), axis=-1)
|
47 |
+
|
48 |
+
# Debugging: Inspect the shape and dtype of the audio array
|
49 |
+
print("Audio Array Shape:", audio_stereo.shape)
|
50 |
+
print("Audio Array Dtype:", audio_stereo.dtype)
|
51 |
+
|
52 |
+
# Save to a temporary WAV file as stereo
|
53 |
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp_file:
|
54 |
# Write the stereo data with a sample rate of 24000 Hz
|
55 |
sf.write(tmp_file.name, audio_stereo, 24000, format='WAV', subtype='PCM_16')
|