rutsam commited on
Commit
ece6ab6
1 Parent(s): a39f293

use pyaudioconvert

Browse files
Files changed (2) hide show
  1. nemo_asr.py +2 -3
  2. requirements.txt +2 -1
nemo_asr.py CHANGED
@@ -1,7 +1,7 @@
1
  import aiofiles
2
  import nemo
3
  import nemo.collections.asr as nemo_asr
4
- import librosa
5
  import soundfile as sf
6
  from pydub import AudioSegment
7
 
@@ -28,7 +28,6 @@ def transcribe(file, modelName="stt_rw_conformer_transducer_large"):
28
  # print(transcription[0])
29
  sound = AudioSegment.from_file(file.name)
30
  sound.export("test.wav",format='wav')
31
- x, sr = librosa.load('test.wav')
32
- sf.write("Test2.wav", x, 16000)
33
  transcription= asr_model.transcribe(["Test2.wav"])
34
  return transcription[0]
 
1
  import aiofiles
2
  import nemo
3
  import nemo.collections.asr as nemo_asr
4
+ import pyaudioconvert as pac
5
  import soundfile as sf
6
  from pydub import AudioSegment
7
 
 
28
  # print(transcription[0])
29
  sound = AudioSegment.from_file(file.name)
30
  sound.export("test.wav",format='wav')
31
+ pac.convert_wav_to_16bit_mono("Test2.wav","Test2.wav")
 
32
  transcription= asr_model.transcribe(["Test2.wav"])
33
  return transcription[0]
requirements.txt CHANGED
@@ -11,4 +11,5 @@ soundfile==0.10.3.post1
11
  wget
12
  aiofiles
13
  pydub
14
- git+https://github.com/NVIDIA/NeMo.git@r1.11.0#egg=nemo_toolkit[all]
 
 
11
  wget
12
  aiofiles
13
  pydub
14
+ git+https://github.com/NVIDIA/NeMo.git@r1.11.0#egg=nemo_toolkit[all]
15
+ pyaudioconvert