import install
from reazonspeech.nemo.asr import audio_from_path, load_model, transcribe

model = None

def speech_to_text(audio_file, _model_size = None):
    global model

    if not model:
        model = load_model()

    audio = audio_from_path(audio_file)
    ret = transcribe(model, audio)

    text_with_timestamps = ''
    for segment in ret.segments:
        text_with_timestamps += f"{segment.start_seconds:.2f}\t{segment.end_seconds:.2f}\t{segment.text}\n"

    return ret.text, text_with_timestamps