voice-demo / app.py
eagle0504's picture
app updated
ec9eec7
import base64
import os
import openai
import streamlit as st
from audio_recorder_streamlit import audio_recorder
from utils.helper import *
# API KEY
st.set_page_config(layout="wide")
api_key = os.environ["OPENAI_API_KEY"]
# Initialize the ChatBot with the OpenAI API key and a protocol.
assistant = ChatBot(
api_key=api_key,
protocol="""
You are a helpful assistant.
""",
)
# Set the title of the Streamlit app
st.title("Voice Command Here")
# Initialize session state for conversation history
if "history" not in st.session_state:
st.session_state.history = []
# Create two columns
col1, col2 = st.columns(2)
# Column 1: Audio control and playback
with col1:
# Set the initial status to "recording"
status = "recording"
# Record audio from the user
audio_bytes = audio_recorder(
text="Click here to record!",
recording_color="#e8b62c",
neutral_color="#6aa36f",
icon_name="user",
icon_size="6x",
)
with st.expander("Expand/collapse to listen to your own audio:"):
if audio_bytes:
# If audio is recorded, play the audio in the app
st.audio(audio_bytes, format="audio/wav")
# Define the filename for the recorded audio
some_string = generate_random_string()
audio_file = f"audio_{some_string}.wav"
# Save the recorded audio to a file
status = ""
with open(audio_file, "wb") as f:
try:
f.write(audio_bytes)
status = "Recording saved successfully."
st.success("Success.") # Notify the user that the audio has been saved
except:
st.warning(
"Please record audio."
) # Warn the user if the audio could not be saved
if len(status) > 1:
# If the audio file has been saved successfully
# Open the saved audio file for reading
audio_file = open(audio_file, "rb")
# Transcribe the audio using OpenAI's Whisper model
transcript = assistant.client.audio.transcriptions.create(
model="whisper-1", file=audio_file
)
# Extract the transcribed text
prompt = transcript.text
# Add user input to session state history
st.session_state.history.append(f"πŸ€” User: {prompt}")
# Generate a response from the assistant using the transcribed text
response = assistant.generate_response(prompt)
# Add assistant response to session state history
st.session_state.history.append(f"πŸ€– Bot: {response}")
try:
# Clear existing audio elements
st.empty()
# Convert the assistant's response to speech using OpenAI's TTS model
response_voice = assistant.client.audio.speech.create(
model="tts-1", voice="alloy", input=response
)
# Define the filename for the generated speech
some_string_out = generate_random_string()
speech_file_path = f"output_{some_string_out}.mp3"
# Save the generated speech to a file
response_voice.write_to_file(speech_file_path)
# Play the generated speech in the app
autoplay_audio(speech_file_path)
st.success(
"Autoplay attempted."
) # Notify the user that autoplay was attempted
except:
st.error(
"No response file found."
) # Display an error if the response file could not be found
# Column 2: Display conversation history
with col2:
st.header("Conversation History")
for entry in st.session_state.history:
st.markdown(entry)