HoangHa's picture
Update app.py
5b1f2cd
# Library
import openai
import streamlit as st
import pandas as pd
from datetime import datetime
from TTS.api import TTS
import whisper
from audio_recorder import record
# Custom Streamlit app title and icon
st.set_page_config(
page_title="IELTS Speaking",
page_icon=":robot_face:",
)
# Set the title
st.title("Part 1 Speaking")
# Sidebar Configuration
st.sidebar.title(":gear: Model Configuration")
# Toggle for API activation
api_toggle = st.sidebar.toggle("Activate free API")
# Define an empty API key
openai_key = ""
# Check if the toggle is on
if api_toggle:
# If the toggle is on, access the API key from secrets
openai_key = st.secrets["OPENAI_API_KEY"]
openai.api_key = openai_key
else:
# If the toggle is off, allow the user to input the API key
openai_key = st.sidebar.text_input('Your OpenAI API key here:', value="")
openai.api_key = openai_key
# User Input and AI Response
user_input_type = st.sidebar.selectbox("Choose input type:", ["Chat", "Record Audio"])
# Model Name Selector
model_name = st.sidebar.selectbox(
"Select a Model",
["gpt-3.5-turbo", "gpt-4"], # Add more model names as needed
key="model_name",
)
# Temperature Slider
temperature = st.sidebar.slider(
":thermometer: Temperature",
min_value=0.2,
max_value=2.0,
value=1.0,
step=0.1,
key="temperature",
)
# Max tokens Slider
max_tokens = st.sidebar.slider(
":straight_ruler: Max Tokens",
min_value=1,
max_value=4095,
value=256,
step=1,
key="max_tokens",
)
# Top p Slider
# top_p = st.sidebar.slider(
# "🎯 Top P",
# min_value=0.00,
# max_value=1.00,
# value=1.00,
# step=0.01,
# key="top_p",
# )
# Presence penalty Slider
# presence_penalty = st.sidebar.slider(
# "🚫 Presence penalty",
# min_value=0.00,
# max_value=2.00,
# value=0.00,
# step=0.01,
# key="presence_penalty",
# )
# Frequency penalty Slider
# frequency_penalty = st.sidebar.slider(
# "🤐 Frequency penalty",
# min_value=0.00,
# max_value=2.00,
# value=0.00,
# step=0.01,
# key="frequency_penalty",
# )
# TEXT2SPEECH MODEL
# Instantiate the TTS class
tts = TTS(TTS().list_models()[13])
def convert_2_speech(given_text):
tts.tts_to_file(text=given_text, file_path="response.wav")
return("response.wav")
# SPEECH2TEXT MODEL
model_whisper = whisper.load_model("tiny.en")
def convert_2_text(speech):
user_message = model_whisper.transcribe(speech)["text"]
return user_message
# CHAT MODEL
# Initialize DataFrame to store chat history
chat_history_df = pd.DataFrame(columns=["Timestamp", "Chat"])
# Reset Button
if st.sidebar.button(":arrows_counterclockwise: Reset Chat"):
# Save the chat history to the DataFrame before clearing it
if st.session_state.messages:
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
chat_history = "\n".join([f"{m['role']}: {m['content']}" for m in st.session_state.messages])
new_entry = pd.DataFrame({"Timestamp": [timestamp], "Chat": [chat_history]})
chat_history_df = pd.concat([chat_history_df, new_entry], ignore_index=True)
# Save the DataFrame to a CSV file
chat_history_df.to_csv("chat_history.csv", index=False)
# Clear the chat messages and reset the full response
st.session_state.messages = []
full_response = ""
# Initialize Chat Messages
if "messages" not in st.session_state:
st.session_state.messages = []
# Initialize full_response outside the user input check
full_response = ""
# Display Chat History
for message in st.session_state.messages:
if message["role"] != "system":
with st.chat_message(message["role"]):
st.markdown(message["content"])
system_text="""As a helpful, thoughtful, and wise IELTS instructor responsible for testing Speaking Part 1. The users will provide the {subject} they want to talk about.
It's important to follow these guidelines:
- Give only original question for provided {subject}.
- Give one question at a time.
For example:
{subject}: Work
What is your job?
Where do you work?
{subject}: Study
What do you study?
Where do you study that?
{subject}: Hometown
Do you live in a house or a flat?
How are the walls decorated?
Let's start the test."""
# User Input and AI Response
# For "Chat mode"
# Use st.toggle to allow users to choose input type
# record_audio_input = st.toggle("Record Audio Input", value=False) # for toggle only
if user_input_type == "Chat":
# if not record_audio_input: # for toggle only
if prompt := st.chat_input("What is up?"):
# System
st.session_state.messages.append({"role": "system", "content": system_text})
# User
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.markdown(prompt)
# Assistant
with st.chat_message("assistant"):
with st.status("Generating response..."):
message_placeholder = st.empty()
for response in openai.ChatCompletion.create(
model=model_name, # Use the selected model name
messages=[
{"role": m["role"], "content": m["content"]}
for m in st.session_state.messages
],
temperature=temperature, # Set temperature
max_tokens=max_tokens, # Set max tokens
# top_p=top_p, # Set top p
# frequency_penalty=frequency_penalty, # Set frequency penalty
# presence_penalty=presence_penalty, # Set presence penalty
stream=True,
):
full_response += response.choices[0].delta.get("content", "")
message_placeholder.markdown(full_response + "▌")
message_placeholder.markdown(full_response)
st.session_state.messages.append({"role": "assistant", "content": full_response})
st.audio(convert_2_speech(full_response))
elif user_input_type == "Record Audio":
# else: # for toggle only
# Record audio when the "Record Audio" button is clicked
if st.button("Record Audio"):
st.write("Recording... Please speak for 10 seconds.")
output = record(seconds=10, filename='my_recording.wav')
st.write("Recording complete!")
# Convert the recorded audio to text using the Whisper model
user_message = convert_2_text(output)
# Display the transcribed text as user input
st.session_state.messages.append({"role": "user", "content": user_message})
with st.chat_message("user"):
st.markdown(user_message)
# Assistant
with st.chat_message("assistant"):
with st.status("Generating response..."):
message_placeholder = st.empty()
for response in openai.ChatCompletion.create(
model=model_name, # Use the selected model name
messages=[
{"role": m["role"], "content": m["content"]}
for m in st.session_state.messages
],
temperature=temperature, # Set temperature
max_tokens=max_tokens, # Set max tokens
# top_p=top_p, # Set top p
# frequency_penalty=frequency_penalty, # Set frequency penalty
# presence_penalty=presence_penalty, # Set presence penalty
stream=True,
):
full_response += response.choices[0].delta.get("content", "")
message_placeholder.markdown(full_response + "▌")
message_placeholder.markdown(full_response)
st.session_state.messages.append({"role": "assistant", "content": full_response})
st.audio(convert_2_speech(full_response))