import aiofiles import nemo import nemo.collections.asr as nemo_asr import librosa import soundfile as sf from pydub import AudioSegment def transcribe(file, modelName="stt_rw_conformer_transducer_large"): # with open(file.name, 'wb') as out_file: # #out_file = file.read() # async read # content = file.read() # out_file.write(content) #out_file.write(content) # async write #print(out_file.name) asr_model = nemo_asr.models.EncDecRNNTBPEModel.from_pretrained( model_name=modelName) # if not file.name.endswith("wav"): # sound = AudioSegment.from_mp3(out_file.name) # sound.export(out_file.name, format="wav") # files = [out_file.name] # pac.convert_wav_to_16bit_mono(out_file.name,out_file.name) # print("file loaded is **************",file.file) # for fname, transcription in zip(files, asr_model.transcribe(paths2audio_files=files)): # print(f"Audio in {fname} was recognized as: {transcription}") # print(transcription[0]) sound = AudioSegment.from_file(file.name) sound.export("test.wav",format='wav') x, sr = librosa.load('test.wav') sf.write("Test2.wav", x, 16000) transcription= asr_model.transcribe(["Test2.wav"]) return transcription[0]