Kinyarwanda-asr / nemo_asr.py
rutsam's picture
push the code
3119dd6
raw
history blame
948 Bytes
import aiofiles
import nemo
import nemo.collections.asr as nemo_asr
def transcribe(file, modelName="stt_rw_conformer_transducer_large"):
with aiofiles.open(file.filename, 'wb') as out_file:
content = file.read() # async read
out_file.write(content) # async write
print(out_file.name)
asr_model = nemo_asr.models.EncDecRNNTBPEModel.from_pretrained(
model_name=modelName)
if not file.name.endswith("wav"):
sound = AudioSegment.from_mp3(out_file.name)
sound.export(out_file.name, format="wav")
files = [out_file.name]
pac.convert_wav_to_16bit_mono(out_file.name,out_file.name)
# print("file loaded is **************",file.file)
for fname, transcription in zip(files, asr_model.transcribe(paths2audio_files=files)):
print(f"Audio in {fname} was recognized as: {transcription}")
print(transcription[0])
return {"text": transcription[0], "filename": file.filename}