import aiofiles import nemo import nemo.collections.asr as nemo_asr def transcribe(file, modelName="stt_rw_conformer_transducer_large"): with aiofiles.open(file.filename, 'wb') as out_file: content = file.read() # async read out_file.write(content) # async write print(out_file.name) asr_model = nemo_asr.models.EncDecRNNTBPEModel.from_pretrained( model_name=modelName) if not file.name.endswith("wav"): sound = AudioSegment.from_mp3(out_file.name) sound.export(out_file.name, format="wav") files = [out_file.name] pac.convert_wav_to_16bit_mono(out_file.name,out_file.name) # print("file loaded is **************",file.file) for fname, transcription in zip(files, asr_model.transcribe(paths2audio_files=files)): print(f"Audio in {fname} was recognized as: {transcription}") print(transcription[0]) return {"text": transcription[0], "filename": file.filename}