import install from reazonspeech.nemo.asr import audio_from_path, load_model, transcribe model = None def speech_to_text(audio_file, _model_size = None): global model if not model: model = load_model() audio = audio_from_path(audio_file) ret = transcribe(model, audio) text_with_timestamps = '' for segment in ret.segments: text_with_timestamps += f"{segment.start_seconds:.2f}\t{segment.end_seconds:.2f}\t{segment.text}\n" return ret.text, text_with_timestamps