interview_copilot_2 / whisper_stt.py
alex buz
fix
3df36f0
from streamlit_mic_recorder import mic_recorder
import streamlit as st
import io
from openai import OpenAI
import os
def whisper_stt(openai_api_key, start_prompt="Start recording", stop_prompt="Stop recording", just_once=False,
use_container_width=False, language=None, callback=None, args=(), kwargs=None, key=None):
if not '_last_speech_to_text_transcript_id' in st.session_state:
st.session_state._last_speech_to_text_transcript_id = 0
if not '_last_speech_to_text_transcript' in st.session_state:
st.session_state._last_speech_to_text_transcript = None
if key and not key + '_output' in st.session_state:
st.session_state[key + '_output'] = None
audio = mic_recorder(start_prompt=start_prompt, stop_prompt=stop_prompt, just_once=just_once,
use_container_width=use_container_width,format="webm", key=key)
new_output = False
if audio is None:
output = None
else:
if openai_api_key:
if not 'openai_client' in st.session_state:
#assert openai_api_key, openai_api_key
st.session_state.openai_client = OpenAI(api_key=openai_api_key)
id = audio['id']
new_output = (id > st.session_state._last_speech_to_text_transcript_id)
if new_output:
output = None
st.session_state._last_speech_to_text_transcript_id = id
audio_bio = io.BytesIO(audio['bytes'])
audio_bio.name = 'audio.webm'
success = False
err = 0
while not success and err < 3: # Retry up to 3 times in case of OpenAI server error.
try:
transcript = st.session_state.openai_client.audio.transcriptions.create(
model="whisper-1",
file=audio_bio,
language=language
)
except Exception as e:
print(str(e)) # log the exception in the terminal
err += 1
else:
success = True
output = transcript.text
st.session_state._last_speech_to_text_transcript = output
elif not just_once:
output = st.session_state._last_speech_to_text_transcript
else:
output = None
else:
output = None
if key:
st.session_state[key + '_output'] = output
if new_output and callback:
callback(*args, **(kwargs or {}))
return output