Spaces:

HoangHa
/

IELTS_Speaking_GPT

Build error

File size: 8,033 Bytes

# Library
import openai
import streamlit as st
import pandas as pd
from datetime import datetime
from TTS.api import TTS
import whisper
from audio_recorder import record

# Custom Streamlit app title and icon
st.set_page_config(
    page_title="IELTS Speaking",
    page_icon=":robot_face:",
)

# Set the title
st.title("Part 1 Speaking")

# Sidebar Configuration
st.sidebar.title(":gear: Model Configuration")

# Toggle for API activation
api_toggle = st.sidebar.toggle("Activate free API")

# Define an empty API key
openai_key = ""

# Check if the toggle is on
if api_toggle:
    # If the toggle is on, access the API key from secrets
    openai_key = st.secrets["OPENAI_API_KEY"]
    openai.api_key = openai_key
else:
    # If the toggle is off, allow the user to input the API key
    openai_key = st.sidebar.text_input('Your OpenAI API key here:', value="")
    openai.api_key = openai_key

# User Input and AI Response
user_input_type = st.sidebar.selectbox("Choose input type:", ["Chat", "Record Audio"])

# Model Name Selector
model_name = st.sidebar.selectbox(
    "Select a Model",
    ["gpt-3.5-turbo", "gpt-4"],  # Add more model names as needed
    key="model_name",
)

# Temperature Slider
temperature = st.sidebar.slider(
    ":thermometer: Temperature",
    min_value=0.2,
    max_value=2.0,
    value=1.0,
    step=0.1,
    key="temperature",
)

# Max tokens Slider
max_tokens = st.sidebar.slider(
    ":straight_ruler: Max Tokens",
    min_value=1,
    max_value=4095,
    value=256,
    step=1,
    key="max_tokens",
)

# Top p Slider
# top_p = st.sidebar.slider(
#     "🎯 Top P",
#     min_value=0.00,
#     max_value=1.00,
#     value=1.00,
#     step=0.01,
#     key="top_p",
# )

# Presence penalty Slider
# presence_penalty = st.sidebar.slider(
#     "🚫 Presence penalty",
#     min_value=0.00,
#     max_value=2.00,
#     value=0.00,
#     step=0.01,
#     key="presence_penalty",
# )

# Frequency penalty Slider
# frequency_penalty = st.sidebar.slider(
#     "🤐 Frequency penalty",
#     min_value=0.00,
#     max_value=2.00,
#     value=0.00,
#     step=0.01,
#     key="frequency_penalty",
# )

# TEXT2SPEECH MODEL
# Instantiate the TTS class
tts = TTS(TTS().list_models()[13])
def convert_2_speech(given_text):
    tts.tts_to_file(text=given_text, file_path="response.wav")
    return("response.wav")

# SPEECH2TEXT MODEL
model_whisper = whisper.load_model("tiny.en")
def convert_2_text(speech):
    user_message = model_whisper.transcribe(speech)["text"]
    return user_message

# CHAT MODEL
# Initialize DataFrame to store chat history
chat_history_df = pd.DataFrame(columns=["Timestamp", "Chat"])

# Reset Button
if st.sidebar.button(":arrows_counterclockwise: Reset Chat"):
    # Save the chat history to the DataFrame before clearing it
    if st.session_state.messages:
        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        chat_history = "\n".join([f"{m['role']}: {m['content']}" for m in st.session_state.messages])
        new_entry = pd.DataFrame({"Timestamp": [timestamp], "Chat": [chat_history]})
        chat_history_df = pd.concat([chat_history_df, new_entry], ignore_index=True)

        # Save the DataFrame to a CSV file
        chat_history_df.to_csv("chat_history.csv", index=False)

    # Clear the chat messages and reset the full response
    st.session_state.messages = []
    full_response = ""
    
# Initialize Chat Messages
if "messages" not in st.session_state:
    st.session_state.messages = []

# Initialize full_response outside the user input check
full_response = ""

# Display Chat History
for message in st.session_state.messages:
    if message["role"] != "system":
        with st.chat_message(message["role"]):
            st.markdown(message["content"]) 

system_text="""As a helpful, thoughtful, and wise IELTS instructor responsible for testing Speaking Part 1. The users will provide the {subject} they want to talk about.
It's important to follow these guidelines:
- Give only original question for provided {subject}.
- Give one question at a time.
For example:
{subject}: Work
What is your job?
Where do you work?
{subject}: Study
What do you study?
Where do you study that?
{subject}: Hometown
Do you live in a house or a flat?
How are the walls decorated?
Let's start the test."""

# User Input and AI Response
# For "Chat mode"
# Use st.toggle to allow users to choose input type
# record_audio_input = st.toggle("Record Audio Input", value=False) # for toggle only

if user_input_type == "Chat":
# if not record_audio_input: # for toggle only
    if prompt := st.chat_input("What is up?"):
        # System
        st.session_state.messages.append({"role": "system", "content": system_text})
        
        # User
        st.session_state.messages.append({"role": "user", "content": prompt})
        with st.chat_message("user"):
            st.markdown(prompt)
        
        # Assistant 
        with st.chat_message("assistant"):
            with st.status("Generating response..."):
                message_placeholder = st.empty()
                for response in openai.ChatCompletion.create(
                    model=model_name,  # Use the selected model name
                    messages=[
                        {"role": m["role"], "content": m["content"]}
                        for m in st.session_state.messages
                    ],
                    temperature=temperature,  # Set temperature
                    max_tokens=max_tokens,  # Set max tokens
                    # top_p=top_p, # Set top p
                    # frequency_penalty=frequency_penalty, # Set frequency penalty
                    # presence_penalty=presence_penalty, # Set presence penalty
                    stream=True,
                ):
                    full_response += response.choices[0].delta.get("content", "")
                    message_placeholder.markdown(full_response + "▌")
                message_placeholder.markdown(full_response)
            
            st.session_state.messages.append({"role": "assistant", "content": full_response})
            st.audio(convert_2_speech(full_response))

elif user_input_type == "Record Audio":
# else: # for toggle only
    # Record audio when the "Record Audio" button is clicked
    if st.button("Record Audio"):
        st.write("Recording... Please speak for 10 seconds.")
        output = record(seconds=10, filename='my_recording.wav')
        st.write("Recording complete!")

        # Convert the recorded audio to text using the Whisper model
        user_message = convert_2_text(output)

        # Display the transcribed text as user input
        st.session_state.messages.append({"role": "user", "content": user_message})
        with st.chat_message("user"):
            st.markdown(user_message)
        
        # Assistant 
        with st.chat_message("assistant"):
            with st.status("Generating response..."):
                message_placeholder = st.empty()
                for response in openai.ChatCompletion.create(
                    model=model_name,  # Use the selected model name
                    messages=[
                        {"role": m["role"], "content": m["content"]}
                        for m in st.session_state.messages
                    ],
                    temperature=temperature,  # Set temperature
                    max_tokens=max_tokens,  # Set max tokens
                    # top_p=top_p, # Set top p
                    # frequency_penalty=frequency_penalty, # Set frequency penalty
                    # presence_penalty=presence_penalty, # Set presence penalty
                    stream=True,
                ):
                    full_response += response.choices[0].delta.get("content", "")
                    message_placeholder.markdown(full_response + "▌")
                message_placeholder.markdown(full_response)
            
            st.session_state.messages.append({"role": "assistant", "content": full_response})
            st.audio(convert_2_speech(full_response))