Spaces:

eagle0504
/

voice-demo

Sleeping

File size: 3,687 Bytes

import base64
import os

import openai
import streamlit as st
from audio_recorder_streamlit import audio_recorder

from utils.helper import *

# API KEY
st.set_page_config(layout="wide")
api_key = os.environ["OPENAI_API_KEY"]

# Initialize the ChatBot with the OpenAI API key and a protocol.
assistant = ChatBot(
    api_key=api_key,
    protocol="""
    You are a helpful assistant.
    """,
)

# Set the title of the Streamlit app
st.title("Voice Command Here")

# Initialize session state for conversation history
if "history" not in st.session_state:
    st.session_state.history = []

# Create two columns
col1, col2 = st.columns(2)

# Column 1: Audio control and playback
with col1:
    # Set the initial status to "recording"
    status = "recording"

    # Record audio from the user
    audio_bytes = audio_recorder(
        text="Click here to record!",
        recording_color="#e8b62c",
        neutral_color="#6aa36f",
        icon_name="user",
        icon_size="6x",
    )
    with st.expander("Expand/collapse to listen to your own audio:"):
        if audio_bytes:
            # If audio is recorded, play the audio in the app
            st.audio(audio_bytes, format="audio/wav")

    # Define the filename for the recorded audio
    some_string = generate_random_string()
    audio_file = f"audio_{some_string}.wav"

    # Save the recorded audio to a file
    status = ""
    with open(audio_file, "wb") as f:
        try:
            f.write(audio_bytes)
            status = "Recording saved successfully."
            st.success("Success.")  # Notify the user that the audio has been saved
        except:
            st.warning(
                "Please record audio."
            )  # Warn the user if the audio could not be saved

    if len(status) > 1:
        # If the audio file has been saved successfully
        # Open the saved audio file for reading
        audio_file = open(audio_file, "rb")

        # Transcribe the audio using OpenAI's Whisper model
        transcript = assistant.client.audio.transcriptions.create(
            model="whisper-1", file=audio_file
        )

        # Extract the transcribed text
        prompt = transcript.text

        # Add user input to session state history
        st.session_state.history.append(f"🤔 User: {prompt}")

        # Generate a response from the assistant using the transcribed text
        response = assistant.generate_response(prompt)

        # Add assistant response to session state history
        st.session_state.history.append(f"🤖 Bot: {response}")

        try:
            # Clear existing audio elements
            st.empty()

            # Convert the assistant's response to speech using OpenAI's TTS model
            response_voice = assistant.client.audio.speech.create(
                model="tts-1", voice="alloy", input=response
            )

            # Define the filename for the generated speech
            some_string_out = generate_random_string()
            speech_file_path = f"output_{some_string_out}.mp3"

            # Save the generated speech to a file
            response_voice.write_to_file(speech_file_path)

            # Play the generated speech in the app
            autoplay_audio(speech_file_path)
            st.success(
                "Autoplay attempted."
            )  # Notify the user that autoplay was attempted
        except:
            st.error(
                "No response file found."
            )  # Display an error if the response file could not be found


# Column 2: Display conversation history
with col2:
    st.header("Conversation History")
    for entry in st.session_state.history:
        st.markdown(entry)