Spaces:
Sleeping
Sleeping
import gradio as gr | |
import requests | |
import json | |
import random | |
from gradio_client import Client | |
from dotenv import load_dotenv | |
import os | |
import speech_recognition as sr | |
from pydub import AudioSegment | |
import re | |
load_dotenv() | |
API_KEY = os.getenv("DEEPSEEK_API_KEY") | |
HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN") | |
TTS_PASSWORD = os.getenv("TTS_PASSWORD") | |
if not all([API_KEY, HF_TOKEN, TTS_PASSWORD]): | |
raise ValueError("Missing required environment variables!") | |
TTS_CLIENT = Client("KindSynapse/Youssef-Ahmed-Private-Text-To-Speech-Unlimited", hf_token=HF_TOKEN) | |
recognizer = sr.Recognizer() | |
MAIN_SYSTEM_PROMPT = { | |
"role": "system", | |
"content": """You are Sam, an intelligent and proactive English tutor. You drive the conversation and actively engage students. Your responses must be in JSON format with these keys: | |
'response': Your main response (keep it conversational and engaging), | |
'corrections': ALWAYS provide specific grammar or pronunciation corrections with examples (if none needed, say "Great grammar!"), | |
'vocabulary': ALWAYS suggest alternative words/phrases with explanations (if none needed, suggest related vocabulary), | |
'level_assessment': Current assessment (beginner/intermediate/advanced), | |
'encouragement': A motivating comment, | |
'context_memory': Important details about the user, | |
'next_question': A follow-up question to keep conversation flowing | |
IMPORTANT: You MUST always provide corrections and vocabulary suggestions in every response. Even if the student speaks perfectly, provide positive feedback and suggest advanced vocabulary or alternative expressions. | |
Your personality: | |
- Be the conversation driver - ask follow-up questions | |
- Show genuine interest in the student's life | |
- Provide corrections naturally without stopping the flow | |
- Use the student's name frequently | |
- Build on previous topics | |
- Be encouraging but provide constructive feedback | |
- Ask about their day, work, hobbies, culture, goals | |
Correction guidelines: | |
- ALWAYS provide corrections field - even if it's positive feedback | |
- ALWAYS provide vocabulary field - suggest alternatives or related words | |
- Use format: "Instead of 'X', try saying 'Y'" | |
- Give pronunciation tips when needed | |
- If no mistakes, say "Excellent grammar!" or "Perfect sentence structure!" | |
Vocabulary guidelines: | |
- ALWAYS suggest vocabulary - even if it's synonyms or advanced alternatives | |
- Provide explanations for suggested words | |
- Use format: "Instead of 'good', try 'excellent' or 'outstanding'" | |
- Suggest topic-related vocabulary | |
Conversation flow: | |
- Start with personal questions (name, country, job, hobbies) | |
- Build conversations around their interests | |
- Use profession-specific vocabulary | |
- Ask about their culture and experiences | |
- Keep the conversation natural and flowing | |
- Always end with a question to continue the dialogue | |
Response length: Keep responses conversational (2-3 sentences max for response field).""" | |
} | |
WELCOME_PROMPT = { | |
"role": "system", | |
"content": """Create a heartfelt welcome message that: | |
1. Introduces you as Sam, an enthusiastic and friendly English tutor whoβs excited to guide them | |
2. Kindly asks for their name and where theyβre from in a natural conversational way | |
3. Expresses genuine excitement about helping them grow | |
Return the message in JSON format with the key 'greeting'. | |
Make it feel personal, warm, and inviting β like a tutor who truly cares. Keep it within 2 sentences. | |
Example: | |
{"greeting": "Hi there! I'm Sam, your friendly English tutor β so glad you're here! What's your name and where are you from?"} | |
""" | |
} | |
class EnglishTutor: | |
def __init__(self): | |
self.chat_history = [MAIN_SYSTEM_PROMPT] | |
self.user_info = { | |
"name": None, | |
"level": "beginner", | |
"interests": [], | |
"country": None, | |
"profession": None, | |
"goals": None | |
} | |
def get_welcome_message(self): | |
try: | |
response = requests.post( | |
"https://api.deepseek.com/v1/chat/completions", | |
headers={"Authorization": f"Bearer {API_KEY}"}, | |
json={ | |
"model": "deepseek-chat", | |
"messages": [WELCOME_PROMPT], | |
"temperature": random.uniform(0.5, 1.0), | |
"response_format": {"type": "json_object"} | |
} | |
) | |
welcome_json = json.loads(response.json()["choices"][0]["message"]["content"]) | |
return welcome_json["greeting"] | |
except Exception as e: | |
print(f"Error in welcome message: {str(e)}") | |
return "Hi! I'm Sam, your English tutor. What's your name and where are you from?" | |
def get_bot_response(self, user_message): | |
try: | |
# Add user context to the message | |
context_info = f"User info: {self.user_info}" | |
enhanced_message = f"{user_message}\n\n[Context: {context_info}]" | |
self.chat_history.append({"role": "user", "content": enhanced_message}) | |
response = requests.post( | |
"https://api.deepseek.com/v1/chat/completions", | |
headers={"Authorization": f"Bearer {API_KEY}"}, | |
json={ | |
"model": "deepseek-chat", | |
"messages": self.chat_history, | |
"temperature": random.uniform(0.8, 1.0), | |
"response_format": {"type": "json_object"} | |
} | |
) | |
bot_response = json.loads(response.json()["choices"][0]["message"]["content"]) | |
# Update user info | |
if "level_assessment" in bot_response: | |
self.user_info["level"] = bot_response["level_assessment"] | |
if "context_memory" in bot_response: | |
self._update_user_info(bot_response["context_memory"]) | |
self.chat_history.append({"role": "assistant", "content": json.dumps(bot_response)}) | |
return bot_response | |
except Exception as e: | |
print(f"Error getting bot response: {str(e)}") | |
return { | |
"response": "I apologize, but I couldn't process that properly. Could you try again?", | |
"corrections": "", | |
"vocabulary": "", | |
"level_assessment": "beginner", | |
"encouragement": "Don't worry, let's keep practicing!", | |
"context_memory": "", | |
"next_question": "What would you like to talk about?" | |
} | |
def _update_user_info(self, context_memory): | |
if isinstance(context_memory, str): | |
# Try to extract name if mentioned | |
if "name" in context_memory.lower(): | |
name_match = re.search(r"name[:\s]+([A-Za-z]+)", context_memory) | |
if name_match: | |
self.user_info["name"] = name_match.group(1) | |
# Try to extract country if mentioned | |
if "country" in context_memory.lower() or "from" in context_memory.lower(): | |
country_match = re.search(r"(?:from|country)[:\s]+([A-Za-z\s]+)", context_memory) | |
if country_match: | |
self.user_info["country"] = country_match.group(1).strip() | |
elif isinstance(context_memory, dict): | |
for key in self.user_info: | |
if key in context_memory: | |
self.user_info[key] = context_memory[key] | |
def clean_text_for_tts(self, text): | |
# Remove emojis and special characters that might cause TTS issues | |
text = re.sub(r'[π―πβ¨π«π€π€]', '', text) | |
# Remove extra spaces and newlines | |
text = re.sub(r'\s+', ' ', text).strip() | |
# Remove duplicate words at the beginning | |
words = text.split() | |
if len(words) > 1 and words[0].lower() == words[1].lower(): | |
text = ' '.join(words[1:]) | |
return text | |
def convert_audio_to_text(audio_path): | |
try: | |
if not audio_path.endswith('.wav'): | |
audio = AudioSegment.from_file(audio_path) | |
wav_path = audio_path + '.wav' | |
audio.export(wav_path, format='wav') | |
audio_path = wav_path | |
with sr.AudioFile(audio_path) as source: | |
audio = recognizer.record(source) | |
text = recognizer.recognize_google(audio, language='en-US') | |
return text | |
except Exception as e: | |
print(f"Error in speech recognition: {str(e)}") | |
return None | |
def text_to_speech(text): | |
try: | |
result = TTS_CLIENT.predict( | |
password=TTS_PASSWORD, | |
prompt=text, | |
voice="coral", | |
emotion="Warm and friendly", | |
use_random_seed=True, | |
specific_seed=12345, | |
api_name="/text_to_speech_app" | |
) | |
return result[0] if isinstance(result, (list, tuple)) else result | |
except Exception as e: | |
print(f"Error in text to speech: {str(e)}") | |
return None | |
tutor = EnglishTutor() | |
def initialize_chat(): | |
try: | |
welcome = tutor.get_welcome_message() | |
clean_welcome = tutor.clean_text_for_tts(welcome) | |
welcome_audio = text_to_speech(clean_welcome) | |
history = [{"role": "assistant", "content": welcome}] | |
return history, welcome_audio, f"π€ Sam: {welcome}", "" | |
except Exception as e: | |
print(f"Error initializing chat: {str(e)}") | |
welcome_msg = "Hi! I'm Sam, your English tutor. What's your name and where are you from?" | |
history = [{"role": "assistant", "content": welcome_msg}] | |
return history, None, f"π€ Sam: {welcome_msg}", "" | |
def process_audio(audio, history, transcript, corrections): | |
try: | |
if audio is None: | |
return history, None, transcript, corrections | |
user_message = convert_audio_to_text(audio) | |
if not user_message: | |
return history, None, transcript, corrections | |
bot_response = tutor.get_bot_response(user_message) | |
# Create the main response with follow-up question | |
main_response = bot_response.get("response", "") | |
if bot_response.get("next_question"): | |
main_response += f" {bot_response['next_question']}" | |
# Add encouragement | |
if bot_response.get("encouragement"): | |
main_response += f" {bot_response['encouragement']}" | |
# Clean text for TTS | |
clean_response = tutor.clean_text_for_tts(main_response) | |
audio_response = text_to_speech(clean_response) | |
# Update chat history | |
history = history or [] | |
history.append({"role": "user", "content": user_message}) | |
history.append({"role": "assistant", "content": main_response}) | |
# Update transcript | |
new_transcript = transcript + f"\n\nπ€ You: {user_message}\nπ€ Sam: {main_response}" | |
# Update corrections and vocabulary with debugging | |
new_corrections = corrections | |
correction_parts = [] | |
# Debug: Print the bot response to see what we're getting | |
print(f"DEBUG - Bot response keys: {bot_response.keys()}") | |
print(f"DEBUG - Corrections: '{bot_response.get('corrections', 'NOT FOUND')}'") | |
print(f"DEBUG - Vocabulary: '{bot_response.get('vocabulary', 'NOT FOUND')}'") | |
print(f"DEBUG - Level: '{bot_response.get('level_assessment', 'NOT FOUND')}'") | |
# Always show current level | |
if bot_response.get("level_assessment"): | |
correction_parts.append(f"π **Current Level:** {bot_response['level_assessment'].title()}") | |
# Show corrections if available | |
if bot_response.get("corrections") and str(bot_response["corrections"]).strip() and bot_response["corrections"] != "": | |
correction_parts.append(f"βοΈ **Grammar Corrections:**\n{bot_response['corrections']}") | |
# Show vocabulary if available | |
if bot_response.get("vocabulary") and str(bot_response["vocabulary"]).strip() and bot_response["vocabulary"] != "": | |
vocab = bot_response['vocabulary'] | |
if isinstance(vocab, dict): | |
vocab_text = "\n".join([f"β’ '{k}' β '{v}'" for k, v in vocab.items()]) | |
else: | |
vocab_text = str(vocab) | |
correction_parts.append(f"π **Vocabulary Suggestions:**\n{vocab_text}") | |
# Show encouragement | |
if bot_response.get("encouragement"): | |
correction_parts.append(f"π‘ **Encouragement:**\n{bot_response['encouragement']}") | |
# Always show user info if available | |
if tutor.user_info.get("name"): | |
info_parts = [] | |
if tutor.user_info.get("name"): info_parts.append(f"Name: {tutor.user_info['name']}") | |
if tutor.user_info.get("country"): info_parts.append(f"Country: {tutor.user_info['country']}") | |
if tutor.user_info.get("level"): info_parts.append(f"Level: {tutor.user_info['level']}") | |
if info_parts: | |
correction_parts.append(f"π€ **Your Profile:**\n{' | '.join(info_parts)}") | |
# If still no corrections, show a default message | |
if not correction_parts: | |
correction_parts.append("π― **Feedback:** Keep practicing! Sam is analyzing your English and will provide feedback soon.") | |
# Create the new corrections text | |
new_correction_text = "\n\n".join(correction_parts) | |
timestamp = f"[{user_message[:30]}...]" if len(user_message) > 30 else f"[{user_message}]" | |
if new_corrections: | |
new_corrections = new_corrections + f"\n\n--- Latest Response {timestamp} ---\n{new_correction_text}" | |
else: | |
new_corrections = f"--- Latest Response {timestamp} ---\n{new_correction_text}" | |
return history, audio_response, new_transcript, new_corrections | |
except Exception as e: | |
print(f"Error in process_audio: {str(e)}") | |
return history, None, transcript, corrections | |
def submit_recording(audio, history, transcript, corrections): | |
return process_audio(audio, history, transcript, corrections) | |
def clear_chat(): | |
global tutor | |
tutor = EnglishTutor() | |
return initialize_chat() | |
with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
gr.Markdown("# π English Learning Assistant with Sam") | |
gr.Markdown("π€ **Record your voice** - Sam will automatically respond when you finish recording and help improve your English!") | |
with gr.Row(): | |
with gr.Column(scale=3): | |
chatbot = gr.Chatbot( | |
height=500, | |
show_label=False, | |
type='messages', | |
avatar_images=("π€", "π€") | |
) | |
with gr.Row(): | |
with gr.Column(scale=1): | |
audio_input = gr.Audio( | |
label="ποΈ Record your voice (auto-submits when finished)", | |
type="filepath", | |
show_label=True | |
) | |
with gr.Column(scale=1): | |
audio_output = gr.Audio( | |
label="π Sam's response", | |
type="filepath", | |
show_label=True, | |
autoplay=True | |
) | |
with gr.Column(scale=2): | |
gr.Markdown("### π Live Transcript") | |
transcript_display = gr.Textbox( | |
lines=10, | |
max_lines=10, | |
show_label=False, | |
interactive=False, | |
placeholder="Your conversation will appear here...", | |
container=True | |
) | |
gr.Markdown("### π Learning Corner") | |
corrections_display = gr.Textbox( | |
lines=8, | |
max_lines=8, | |
show_label=False, | |
interactive=False, | |
placeholder="Grammar corrections, vocabulary suggestions, and level assessment will appear here...", | |
container=True | |
) | |
with gr.Row(): | |
clear_btn = gr.Button("π Start New Conversation", variant="secondary", size="lg") | |
gr.Markdown("π‘ **Tip**: Sam will actively guide the conversation and provide personalized feedback!") | |
# Auto-submit when audio is recorded | |
audio_input.change( | |
process_audio, | |
inputs=[audio_input, chatbot, transcript_display, corrections_display], | |
outputs=[chatbot, audio_output, transcript_display, corrections_display] | |
) | |
clear_btn.click( | |
clear_chat, | |
outputs=[chatbot, audio_output, transcript_display, corrections_display] | |
) | |
demo.load( | |
initialize_chat, | |
outputs=[chatbot, audio_output, transcript_display, corrections_display] | |
) | |
if __name__ == "__main__": | |
demo.launch() |