import base64 import os import openai import streamlit as st from audio_recorder_streamlit import audio_recorder from utils.helper import * # API KEY st.set_page_config(layout="wide") api_key = os.environ["OPENAI_API_KEY"] # Initialize the ChatBot with the OpenAI API key and a protocol. assistant = ChatBot( api_key=api_key, protocol=""" You are a helpful assistant. """, ) # Set the title of the Streamlit app st.title("Voice Command Here") # Initialize session state for conversation history if "history" not in st.session_state: st.session_state.history = [] # Create two columns col1, col2 = st.columns(2) # Column 1: Audio control and playback with col1: # Set the initial status to "recording" status = "recording" # Record audio from the user audio_bytes = audio_recorder( text="Click here to record!", recording_color="#e8b62c", neutral_color="#6aa36f", icon_name="user", icon_size="6x", ) with st.expander("Expand/collapse to listen to your own audio:"): if audio_bytes: # If audio is recorded, play the audio in the app st.audio(audio_bytes, format="audio/wav") # Define the filename for the recorded audio some_string = generate_random_string() audio_file = f"audio_{some_string}.wav" # Save the recorded audio to a file status = "" with open(audio_file, "wb") as f: try: f.write(audio_bytes) status = "Recording saved successfully." st.success("Success.") # Notify the user that the audio has been saved except: st.warning( "Please record audio." ) # Warn the user if the audio could not be saved if len(status) > 1: # If the audio file has been saved successfully # Open the saved audio file for reading audio_file = open(audio_file, "rb") # Transcribe the audio using OpenAI's Whisper model transcript = assistant.client.audio.transcriptions.create( model="whisper-1", file=audio_file ) # Extract the transcribed text prompt = transcript.text # Add user input to session state history st.session_state.history.append(f"🤔 User: {prompt}") # Generate a response from the assistant using the transcribed text response = assistant.generate_response(prompt) # Add assistant response to session state history st.session_state.history.append(f"🤖 Bot: {response}") try: # Clear existing audio elements st.empty() # Convert the assistant's response to speech using OpenAI's TTS model response_voice = assistant.client.audio.speech.create( model="tts-1", voice="alloy", input=response ) # Define the filename for the generated speech some_string_out = generate_random_string() speech_file_path = f"output_{some_string_out}.mp3" # Save the generated speech to a file response_voice.write_to_file(speech_file_path) # Play the generated speech in the app autoplay_audio(speech_file_path) st.success( "Autoplay attempted." ) # Notify the user that autoplay was attempted except: st.error( "No response file found." ) # Display an error if the response file could not be found # Column 2: Display conversation history with col2: st.header("Conversation History") for entry in st.session_state.history: st.markdown(entry)