Spaces:

SeyedAli
/

Persian-Speech-Transcription

Running

SeyedAli commited on Sep 21, 2023

Commit

da9d83e

1 Parent(s): 318958b

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -17,7 +17,12 @@ def ASR(audio):
         temp_audio_file.flush()
         # Load the audio file using torchaudio
         waveform, sample_rate = torchaudio.load(temp_audio_file.name)
         # Convert the PyTorch tensor to a NumPy ndarray
         audio_array = waveform.numpy()
         #inputs = processor(audio_array, sampling_rate=16_000)

         temp_audio_file.flush()
         # Load the audio file using torchaudio
         waveform, sample_rate = torchaudio.load(temp_audio_file.name)
+        # Resample the audio to 16kHz
+        resampler = torchaudio.transforms.Resample(sample_rate, 16000)
+        waveform = resampler(waveform)
+        # Convert the audio to a single channel
+        downmix_mono = torchaudio.transforms.DownmixMono()
+        waveform = downmix_mono(waveform)
         # Convert the PyTorch tensor to a NumPy ndarray
         audio_array = waveform.numpy()
         #inputs = processor(audio_array, sampling_rate=16_000)