# Library import openai import streamlit as st import pandas as pd from datetime import datetime from TTS.api import TTS import whisper from audio_recorder import record # Custom Streamlit app title and icon st.set_page_config( page_title="IELTS Speaking", page_icon=":robot_face:", ) # Set the title st.title("Part 1 Speaking") # Sidebar Configuration st.sidebar.title(":gear: Model Configuration") # Toggle for API activation api_toggle = st.sidebar.toggle("Activate free API") # Define an empty API key openai_key = "" # Check if the toggle is on if api_toggle: # If the toggle is on, access the API key from secrets openai_key = st.secrets["OPENAI_API_KEY"] openai.api_key = openai_key else: # If the toggle is off, allow the user to input the API key openai_key = st.sidebar.text_input('Your OpenAI API key here:', value="") openai.api_key = openai_key # User Input and AI Response user_input_type = st.sidebar.selectbox("Choose input type:", ["Chat", "Record Audio"]) # Model Name Selector model_name = st.sidebar.selectbox( "Select a Model", ["gpt-3.5-turbo", "gpt-4"], # Add more model names as needed key="model_name", ) # Temperature Slider temperature = st.sidebar.slider( ":thermometer: Temperature", min_value=0.2, max_value=2.0, value=1.0, step=0.1, key="temperature", ) # Max tokens Slider max_tokens = st.sidebar.slider( ":straight_ruler: Max Tokens", min_value=1, max_value=4095, value=256, step=1, key="max_tokens", ) # Top p Slider # top_p = st.sidebar.slider( # "🎯 Top P", # min_value=0.00, # max_value=1.00, # value=1.00, # step=0.01, # key="top_p", # ) # Presence penalty Slider # presence_penalty = st.sidebar.slider( # "🚫 Presence penalty", # min_value=0.00, # max_value=2.00, # value=0.00, # step=0.01, # key="presence_penalty", # ) # Frequency penalty Slider # frequency_penalty = st.sidebar.slider( # "🤐 Frequency penalty", # min_value=0.00, # max_value=2.00, # value=0.00, # step=0.01, # key="frequency_penalty", # ) # TEXT2SPEECH MODEL # Instantiate the TTS class tts = TTS(TTS().list_models()[13]) def convert_2_speech(given_text): tts.tts_to_file(text=given_text, file_path="response.wav") return("response.wav") # SPEECH2TEXT MODEL model_whisper = whisper.load_model("tiny.en") def convert_2_text(speech): user_message = model_whisper.transcribe(speech)["text"] return user_message # CHAT MODEL # Initialize DataFrame to store chat history chat_history_df = pd.DataFrame(columns=["Timestamp", "Chat"]) # Reset Button if st.sidebar.button(":arrows_counterclockwise: Reset Chat"): # Save the chat history to the DataFrame before clearing it if st.session_state.messages: timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") chat_history = "\n".join([f"{m['role']}: {m['content']}" for m in st.session_state.messages]) new_entry = pd.DataFrame({"Timestamp": [timestamp], "Chat": [chat_history]}) chat_history_df = pd.concat([chat_history_df, new_entry], ignore_index=True) # Save the DataFrame to a CSV file chat_history_df.to_csv("chat_history.csv", index=False) # Clear the chat messages and reset the full response st.session_state.messages = [] full_response = "" # Initialize Chat Messages if "messages" not in st.session_state: st.session_state.messages = [] # Initialize full_response outside the user input check full_response = "" # Display Chat History for message in st.session_state.messages: if message["role"] != "system": with st.chat_message(message["role"]): st.markdown(message["content"]) system_text="""As a helpful, thoughtful, and wise IELTS instructor responsible for testing Speaking Part 1. The users will provide the {subject} they want to talk about. It's important to follow these guidelines: - Give only original question for provided {subject}. - Give one question at a time. For example: {subject}: Work What is your job? Where do you work? {subject}: Study What do you study? Where do you study that? {subject}: Hometown Do you live in a house or a flat? How are the walls decorated? Let's start the test.""" # User Input and AI Response # For "Chat mode" # Use st.toggle to allow users to choose input type # record_audio_input = st.toggle("Record Audio Input", value=False) # for toggle only if user_input_type == "Chat": # if not record_audio_input: # for toggle only if prompt := st.chat_input("What is up?"): # System st.session_state.messages.append({"role": "system", "content": system_text}) # User st.session_state.messages.append({"role": "user", "content": prompt}) with st.chat_message("user"): st.markdown(prompt) # Assistant with st.chat_message("assistant"): with st.status("Generating response..."): message_placeholder = st.empty() for response in openai.ChatCompletion.create( model=model_name, # Use the selected model name messages=[ {"role": m["role"], "content": m["content"]} for m in st.session_state.messages ], temperature=temperature, # Set temperature max_tokens=max_tokens, # Set max tokens # top_p=top_p, # Set top p # frequency_penalty=frequency_penalty, # Set frequency penalty # presence_penalty=presence_penalty, # Set presence penalty stream=True, ): full_response += response.choices[0].delta.get("content", "") message_placeholder.markdown(full_response + "▌") message_placeholder.markdown(full_response) st.session_state.messages.append({"role": "assistant", "content": full_response}) st.audio(convert_2_speech(full_response)) elif user_input_type == "Record Audio": # else: # for toggle only # Record audio when the "Record Audio" button is clicked if st.button("Record Audio"): st.write("Recording... Please speak for 10 seconds.") output = record(seconds=10, filename='my_recording.wav') st.write("Recording complete!") # Convert the recorded audio to text using the Whisper model user_message = convert_2_text(output) # Display the transcribed text as user input st.session_state.messages.append({"role": "user", "content": user_message}) with st.chat_message("user"): st.markdown(user_message) # Assistant with st.chat_message("assistant"): with st.status("Generating response..."): message_placeholder = st.empty() for response in openai.ChatCompletion.create( model=model_name, # Use the selected model name messages=[ {"role": m["role"], "content": m["content"]} for m in st.session_state.messages ], temperature=temperature, # Set temperature max_tokens=max_tokens, # Set max tokens # top_p=top_p, # Set top p # frequency_penalty=frequency_penalty, # Set frequency penalty # presence_penalty=presence_penalty, # Set presence penalty stream=True, ): full_response += response.choices[0].delta.get("content", "") message_placeholder.markdown(full_response + "▌") message_placeholder.markdown(full_response) st.session_state.messages.append({"role": "assistant", "content": full_response}) st.audio(convert_2_speech(full_response))