import aiofiles import nemo import nemo.collections.asr as nemo_asr import pyaudioconvert as pac import soundfile as sf from pydub import AudioSegment def transcribe(file, modelName="stt_rw_conformer_transducer_large"): # with open(file.name, 'wb') as out_file: # #out_file = file.read() # async read # content = file.read() # out_file.write(content) #out_file.write(content) # async write #print(out_file.name) asr_model = nemo_asr.models.EncDecCTCModelBPE.from_pretrained( model_name="stt_rw_conformer_ctc_large") #nemo_asr.models.EncDecRNNTBPEModel.from_pretrained( #model_name=modelName) # if not file.name.endswith("wav"): # sound = AudioSegment.from_mp3(out_file.name) # sound.export(out_file.name, format="wav") # files = [out_file.name] # pac.convert_wav_to_16bit_mono(out_file.name,out_file.name) # print("file loaded is **************",file.file) # for fname, transcription in zip(files, asr_model.transcribe(paths2audio_files=files)): # print(f"Audio in {fname} was recognized as: {transcription}") # print(transcription[0]) sound = AudioSegment.from_file(file.name) sound.export("test.wav",format='wav') pac.convert_wav_to_16bit_mono("Test2.wav","Test2.wav") transcription= asr_model.transcribe(["Test2.wav"]) return transcription[0]