File size: 1,348 Bytes
3119dd6
 
 
ece6ab6
9506ac8
d857f13
3119dd6
 
 
b263f8c
 
 
 
e14a4f8
c91f24a
6c4292b
 
 
 
c91f24a
 
 
 
 
3119dd6
c91f24a
 
 
d857f13
 
ece6ab6
d857f13
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import aiofiles
import nemo
import nemo.collections.asr as nemo_asr
import pyaudioconvert as pac
import soundfile as sf
from pydub import AudioSegment


def transcribe(file, modelName="stt_rw_conformer_transducer_large"):
#   with open(file.name, 'wb') as out_file:
#     #out_file = file.read()  # async read
#     content = file.read()  
#     out_file.write(content)
    #out_file.write(content)  # async write
    #print(out_file.name)
  asr_model = nemo_asr.models.EncDecCTCModelBPE.from_pretrained(
              model_name="stt_rw_conformer_ctc_large")
  #nemo_asr.models.EncDecRNNTBPEModel.from_pretrained(
                #model_name=modelName)
#   if not file.name.endswith("wav"):
#     sound = AudioSegment.from_mp3(out_file.name)
#     sound.export(out_file.name, format="wav")
#     files = [out_file.name]
#   pac.convert_wav_to_16bit_mono(out_file.name,out_file.name)
    # print("file loaded is **************",file.file)
#   for fname, transcription in zip(files, asr_model.transcribe(paths2audio_files=files)):
#     print(f"Audio in {fname} was recognized as: {transcription}")
#     print(transcription[0])
  sound = AudioSegment.from_file(file.name)
  sound.export("test.wav",format='wav')
  pac.convert_wav_to_16bit_mono("Test2.wav","Test2.wav")
  transcription= asr_model.transcribe(["Test2.wav"])
  return  transcription[0]