YoussefA7med's picture
Update app.py
625bc4b verified
import gradio as gr
import requests
import json
import random
from gradio_client import Client
from dotenv import load_dotenv
import os
import speech_recognition as sr
from pydub import AudioSegment
import re
load_dotenv()
API_KEY = os.getenv("DEEPSEEK_API_KEY")
HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
TTS_PASSWORD = os.getenv("TTS_PASSWORD")
if not all([API_KEY, HF_TOKEN, TTS_PASSWORD]):
raise ValueError("Missing required environment variables!")
TTS_CLIENT = Client("KindSynapse/Youssef-Ahmed-Private-Text-To-Speech-Unlimited", hf_token=HF_TOKEN)
recognizer = sr.Recognizer()
MAIN_SYSTEM_PROMPT = {
"role": "system",
"content": """You are Sam, an intelligent and proactive English tutor. You drive the conversation and actively engage students. Your responses must be in JSON format with these keys:
'response': Your main response (keep it conversational and engaging),
'corrections': ALWAYS provide specific grammar or pronunciation corrections with examples (if none needed, say "Great grammar!"),
'vocabulary': ALWAYS suggest alternative words/phrases with explanations (if none needed, suggest related vocabulary),
'level_assessment': Current assessment (beginner/intermediate/advanced),
'encouragement': A motivating comment,
'context_memory': Important details about the user,
'next_question': A follow-up question to keep conversation flowing
IMPORTANT: You MUST always provide corrections and vocabulary suggestions in every response. Even if the student speaks perfectly, provide positive feedback and suggest advanced vocabulary or alternative expressions.
Your personality:
- Be the conversation driver - ask follow-up questions
- Show genuine interest in the student's life
- Provide corrections naturally without stopping the flow
- Use the student's name frequently
- Build on previous topics
- Be encouraging but provide constructive feedback
- Ask about their day, work, hobbies, culture, goals
Correction guidelines:
- ALWAYS provide corrections field - even if it's positive feedback
- ALWAYS provide vocabulary field - suggest alternatives or related words
- Use format: "Instead of 'X', try saying 'Y'"
- Give pronunciation tips when needed
- If no mistakes, say "Excellent grammar!" or "Perfect sentence structure!"
Vocabulary guidelines:
- ALWAYS suggest vocabulary - even if it's synonyms or advanced alternatives
- Provide explanations for suggested words
- Use format: "Instead of 'good', try 'excellent' or 'outstanding'"
- Suggest topic-related vocabulary
Conversation flow:
- Start with personal questions (name, country, job, hobbies)
- Build conversations around their interests
- Use profession-specific vocabulary
- Ask about their culture and experiences
- Keep the conversation natural and flowing
- Always end with a question to continue the dialogue
Response length: Keep responses conversational (2-3 sentences max for response field)."""
}
WELCOME_PROMPT = {
"role": "system",
"content": """Create a heartfelt welcome message that:
1. Introduces you as Sam, an enthusiastic and friendly English tutor who’s excited to guide them
2. Kindly asks for their name and where they’re from in a natural conversational way
3. Expresses genuine excitement about helping them grow
Return the message in JSON format with the key 'greeting'.
Make it feel personal, warm, and inviting β€” like a tutor who truly cares. Keep it within 2 sentences.
Example:
{"greeting": "Hi there! I'm Sam, your friendly English tutor β€” so glad you're here! What's your name and where are you from?"}
"""
}
class EnglishTutor:
def __init__(self):
self.chat_history = [MAIN_SYSTEM_PROMPT]
self.user_info = {
"name": None,
"level": "beginner",
"interests": [],
"country": None,
"profession": None,
"goals": None
}
def get_welcome_message(self):
try:
response = requests.post(
"https://api.deepseek.com/v1/chat/completions",
headers={"Authorization": f"Bearer {API_KEY}"},
json={
"model": "deepseek-chat",
"messages": [WELCOME_PROMPT],
"temperature": random.uniform(0.5, 1.0),
"response_format": {"type": "json_object"}
}
)
welcome_json = json.loads(response.json()["choices"][0]["message"]["content"])
return welcome_json["greeting"]
except Exception as e:
print(f"Error in welcome message: {str(e)}")
return "Hi! I'm Sam, your English tutor. What's your name and where are you from?"
def get_bot_response(self, user_message):
try:
# Add user context to the message
context_info = f"User info: {self.user_info}"
enhanced_message = f"{user_message}\n\n[Context: {context_info}]"
self.chat_history.append({"role": "user", "content": enhanced_message})
response = requests.post(
"https://api.deepseek.com/v1/chat/completions",
headers={"Authorization": f"Bearer {API_KEY}"},
json={
"model": "deepseek-chat",
"messages": self.chat_history,
"temperature": random.uniform(0.8, 1.0),
"response_format": {"type": "json_object"}
}
)
bot_response = json.loads(response.json()["choices"][0]["message"]["content"])
# Update user info
if "level_assessment" in bot_response:
self.user_info["level"] = bot_response["level_assessment"]
if "context_memory" in bot_response:
self._update_user_info(bot_response["context_memory"])
self.chat_history.append({"role": "assistant", "content": json.dumps(bot_response)})
return bot_response
except Exception as e:
print(f"Error getting bot response: {str(e)}")
return {
"response": "I apologize, but I couldn't process that properly. Could you try again?",
"corrections": "",
"vocabulary": "",
"level_assessment": "beginner",
"encouragement": "Don't worry, let's keep practicing!",
"context_memory": "",
"next_question": "What would you like to talk about?"
}
def _update_user_info(self, context_memory):
if isinstance(context_memory, str):
# Try to extract name if mentioned
if "name" in context_memory.lower():
name_match = re.search(r"name[:\s]+([A-Za-z]+)", context_memory)
if name_match:
self.user_info["name"] = name_match.group(1)
# Try to extract country if mentioned
if "country" in context_memory.lower() or "from" in context_memory.lower():
country_match = re.search(r"(?:from|country)[:\s]+([A-Za-z\s]+)", context_memory)
if country_match:
self.user_info["country"] = country_match.group(1).strip()
elif isinstance(context_memory, dict):
for key in self.user_info:
if key in context_memory:
self.user_info[key] = context_memory[key]
def clean_text_for_tts(self, text):
# Remove emojis and special characters that might cause TTS issues
text = re.sub(r'[πŸŽ―πŸŒŸβœ¨πŸ’«πŸŽ€πŸ€–]', '', text)
# Remove extra spaces and newlines
text = re.sub(r'\s+', ' ', text).strip()
# Remove duplicate words at the beginning
words = text.split()
if len(words) > 1 and words[0].lower() == words[1].lower():
text = ' '.join(words[1:])
return text
def convert_audio_to_text(audio_path):
try:
if not audio_path.endswith('.wav'):
audio = AudioSegment.from_file(audio_path)
wav_path = audio_path + '.wav'
audio.export(wav_path, format='wav')
audio_path = wav_path
with sr.AudioFile(audio_path) as source:
audio = recognizer.record(source)
text = recognizer.recognize_google(audio, language='en-US')
return text
except Exception as e:
print(f"Error in speech recognition: {str(e)}")
return None
def text_to_speech(text):
try:
result = TTS_CLIENT.predict(
password=TTS_PASSWORD,
prompt=text,
voice="coral",
emotion="Warm and friendly",
use_random_seed=True,
specific_seed=12345,
api_name="/text_to_speech_app"
)
return result[0] if isinstance(result, (list, tuple)) else result
except Exception as e:
print(f"Error in text to speech: {str(e)}")
return None
tutor = EnglishTutor()
def initialize_chat():
try:
welcome = tutor.get_welcome_message()
clean_welcome = tutor.clean_text_for_tts(welcome)
welcome_audio = text_to_speech(clean_welcome)
history = [{"role": "assistant", "content": welcome}]
return history, welcome_audio, f"πŸ€– Sam: {welcome}", ""
except Exception as e:
print(f"Error initializing chat: {str(e)}")
welcome_msg = "Hi! I'm Sam, your English tutor. What's your name and where are you from?"
history = [{"role": "assistant", "content": welcome_msg}]
return history, None, f"πŸ€– Sam: {welcome_msg}", ""
def process_audio(audio, history, transcript, corrections):
try:
if audio is None:
return history, None, transcript, corrections
user_message = convert_audio_to_text(audio)
if not user_message:
return history, None, transcript, corrections
bot_response = tutor.get_bot_response(user_message)
# Create the main response with follow-up question
main_response = bot_response.get("response", "")
if bot_response.get("next_question"):
main_response += f" {bot_response['next_question']}"
# Add encouragement
if bot_response.get("encouragement"):
main_response += f" {bot_response['encouragement']}"
# Clean text for TTS
clean_response = tutor.clean_text_for_tts(main_response)
audio_response = text_to_speech(clean_response)
# Update chat history
history = history or []
history.append({"role": "user", "content": user_message})
history.append({"role": "assistant", "content": main_response})
# Update transcript
new_transcript = transcript + f"\n\n🎀 You: {user_message}\nπŸ€– Sam: {main_response}"
# Update corrections and vocabulary with debugging
new_corrections = corrections
correction_parts = []
# Debug: Print the bot response to see what we're getting
print(f"DEBUG - Bot response keys: {bot_response.keys()}")
print(f"DEBUG - Corrections: '{bot_response.get('corrections', 'NOT FOUND')}'")
print(f"DEBUG - Vocabulary: '{bot_response.get('vocabulary', 'NOT FOUND')}'")
print(f"DEBUG - Level: '{bot_response.get('level_assessment', 'NOT FOUND')}'")
# Always show current level
if bot_response.get("level_assessment"):
correction_parts.append(f"πŸ“Š **Current Level:** {bot_response['level_assessment'].title()}")
# Show corrections if available
if bot_response.get("corrections") and str(bot_response["corrections"]).strip() and bot_response["corrections"] != "":
correction_parts.append(f"✍️ **Grammar Corrections:**\n{bot_response['corrections']}")
# Show vocabulary if available
if bot_response.get("vocabulary") and str(bot_response["vocabulary"]).strip() and bot_response["vocabulary"] != "":
vocab = bot_response['vocabulary']
if isinstance(vocab, dict):
vocab_text = "\n".join([f"β€’ '{k}' β†’ '{v}'" for k, v in vocab.items()])
else:
vocab_text = str(vocab)
correction_parts.append(f"πŸ“š **Vocabulary Suggestions:**\n{vocab_text}")
# Show encouragement
if bot_response.get("encouragement"):
correction_parts.append(f"πŸ’‘ **Encouragement:**\n{bot_response['encouragement']}")
# Always show user info if available
if tutor.user_info.get("name"):
info_parts = []
if tutor.user_info.get("name"): info_parts.append(f"Name: {tutor.user_info['name']}")
if tutor.user_info.get("country"): info_parts.append(f"Country: {tutor.user_info['country']}")
if tutor.user_info.get("level"): info_parts.append(f"Level: {tutor.user_info['level']}")
if info_parts:
correction_parts.append(f"πŸ‘€ **Your Profile:**\n{' | '.join(info_parts)}")
# If still no corrections, show a default message
if not correction_parts:
correction_parts.append("🎯 **Feedback:** Keep practicing! Sam is analyzing your English and will provide feedback soon.")
# Create the new corrections text
new_correction_text = "\n\n".join(correction_parts)
timestamp = f"[{user_message[:30]}...]" if len(user_message) > 30 else f"[{user_message}]"
if new_corrections:
new_corrections = new_corrections + f"\n\n--- Latest Response {timestamp} ---\n{new_correction_text}"
else:
new_corrections = f"--- Latest Response {timestamp} ---\n{new_correction_text}"
return history, audio_response, new_transcript, new_corrections
except Exception as e:
print(f"Error in process_audio: {str(e)}")
return history, None, transcript, corrections
def submit_recording(audio, history, transcript, corrections):
return process_audio(audio, history, transcript, corrections)
def clear_chat():
global tutor
tutor = EnglishTutor()
return initialize_chat()
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# πŸŽ“ English Learning Assistant with Sam")
gr.Markdown("🎀 **Record your voice** - Sam will automatically respond when you finish recording and help improve your English!")
with gr.Row():
with gr.Column(scale=3):
chatbot = gr.Chatbot(
height=500,
show_label=False,
type='messages',
avatar_images=("🎀", "πŸ€–")
)
with gr.Row():
with gr.Column(scale=1):
audio_input = gr.Audio(
label="πŸŽ™οΈ Record your voice (auto-submits when finished)",
type="filepath",
show_label=True
)
with gr.Column(scale=1):
audio_output = gr.Audio(
label="πŸ”Š Sam's response",
type="filepath",
show_label=True,
autoplay=True
)
with gr.Column(scale=2):
gr.Markdown("### πŸ“ Live Transcript")
transcript_display = gr.Textbox(
lines=10,
max_lines=10,
show_label=False,
interactive=False,
placeholder="Your conversation will appear here...",
container=True
)
gr.Markdown("### πŸ“š Learning Corner")
corrections_display = gr.Textbox(
lines=8,
max_lines=8,
show_label=False,
interactive=False,
placeholder="Grammar corrections, vocabulary suggestions, and level assessment will appear here...",
container=True
)
with gr.Row():
clear_btn = gr.Button("πŸ”„ Start New Conversation", variant="secondary", size="lg")
gr.Markdown("πŸ’‘ **Tip**: Sam will actively guide the conversation and provide personalized feedback!")
# Auto-submit when audio is recorded
audio_input.change(
process_audio,
inputs=[audio_input, chatbot, transcript_display, corrections_display],
outputs=[chatbot, audio_output, transcript_display, corrections_display]
)
clear_btn.click(
clear_chat,
outputs=[chatbot, audio_output, transcript_display, corrections_display]
)
demo.load(
initialize_chat,
outputs=[chatbot, audio_output, transcript_display, corrections_display]
)
if __name__ == "__main__":
demo.launch()