sultan-hassan's picture
Update app.py
bb0cac5 verified
import os
import gradio as gr
#import elevenlabs
#from elevenlabs.client import ElevenLabs
#from speech_recognition import Recognizer, AudioFile
import io
from google import genai
from google.genai import types
#from dotenv import load_dotenv
#from elevenlabs import play
from TTS.api import TTS
#load_dotenv()
#elevenlabs = ElevenLabs(
# api_key=os.getenv("ELEVENLABS_API_KEY"),
#)
# --- TTS Setup ---
model_tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", progress_bar=False, gpu=False)
# --- API Keys (Set as environment variables for security!) ---
genai_api_key = os.environ.get("GOOGLE_API_KEY") # Gemini API Key
#elevenlabs_api_key = os.environ.get("ELEVENLABS_API_KEY")
#elevenlabs.set_api_key(elevenlabs_api_key)
#elevenlabs.API_KEY = elevenlabs_api_key
client = genai.Client(api_key=genai_api_key)
# --- ElevenLabs Voice ---
voice = "p225"#"Bella" # Choose a voice from ElevenLabs
# --- Language Tutor Parameters ---
target_language = "Arabic"
difficulty = 1 # 1 = Easy, 2 = Medium, 3 = Hard
# --- Gemini Model ---
#model = genai.GenerativeModel('gemini-pro') # Or 'gemini-pro-vision' if you need image input
#model = genai.GenerativeModel('gemini-1.5-pro-latest')
#model = genai.GenerativeModel('gemini-2-flash')
#model = genai.GenerativeModel('gemini-pro', generate_response_clause=genai.types.GenerateResponseClause(model='gemini-pro'))
# --- Functions ---
def generate_question(difficulty):
"""Generates a Arabic question based on difficulty using Gemini."""
prompt = f"Generate a simple {target_language} question for a language learner at difficulty level {difficulty}. Just the question, no extra text."
response = client.models.generate_content(
model="gemini-2.0-flash",
contents=prompt,
config=types.GenerateContentConfig(temperature=0.95)
)
#response = model.generate_content(prompt)
return response.text.strip()
def evaluate_answer(question, answer):
"""Evaluates the user's answer using Gemini."""
prompt = f"You are a Arabic language tutor. Evaluate the following answer to the question: '{question}'. Answer: '{answer}'. Provide feedback on grammar, vocabulary, and fluency. Keep the feedback concise (under 50 words). Also, give a score from 1-5 (1 being very poor, 5 being excellent)."
#response = model.generate_content(prompt)
response = client.models.generate_content(
model="gemini-2.0-flash",
contents=prompt,
config=types.GenerateContentConfig(temperature=0.1)
)
return response.text.strip()
def text_to_speech(text, voice):
"""Converts text to speech using ElevenLabs."""
#audio = elevenlabs.text_to_speech.convert(
#text=text,
#voice_id="JBFqnCBsd6RMkjVDRZzb",
#model_id="eleven_monolingual_v1",
#output_format="mp3_44100_128",
#)
#audio = elevenlabs.tts(text=text, voice=voice, model="eleven_monolingual_v1")
#return b"".join(audio)
#wav = tts.tts(text=text)#, language=target_language)
wav = tts_model.tts_to_file(text=text, language=target_language)#, voice="p225", split_sentences=True)# speaker_wav="/path/to/target/speaker.wav", split_sentences=True)
return wav
def transcribe_audio(audio_file):
"""Transcribes audio using SpeechRecognition."""
r = Recognizer()
with AudioFile(audio_file) as source:
audio = r.record(source)
try:
text = r.recognize_google(audio, language=target_language) # You might need to adjust the language code
return text
except Exception as e:
return f"Error transcribing audio: {e}"
def run_tutor():
"""Main function to run the tutor."""
question = generate_question(difficulty)
question_audio = text_to_speech(question, voice)
return question_audio, question
def process_answer(audio_file, question, question_audio):
"""Processes the user's answer and provides feedback."""
user_answer = transcribe_audio(audio_file)
feedback = evaluate_answer(question, user_answer)
feedback_audio = text_to_speech(feedback, voice)
return feedback_audio, feedback
# --- Gradio Interface ---
with gr.Blocks() as demo:
gr.Markdown("# Adaptive Language Tutor (Arabic)")
with gr.Row():
generate_button = gr.Button("Generate Question")
question_audio_output = gr.Audio(label="Question")
question_text_output = gr.Textbox(label="Question Text")
with gr.Row():
feedback_audio_output = gr.Audio(label="Feedback")
feedback_text_output = gr.Textbox(label="Feedback")
mic_input = gr.Audio(label="Speak Your Answer")
generate_button.click(
fn=run_tutor,
outputs=[question_audio_output, question_text_output]
)
mic_input.change(
fn=process_answer,
inputs=[mic_input, question_text_output, question_audio_output],
outputs=[feedback_audio_output, feedback_text_output]
)
demo.launch()
'''
def run_tutor(audio_file):
"""Main function to run the tutor."""
question = generate_question(difficulty)
question_audio = text_to_speech(question, voice)
# Display the question in the interface
yield question_audio, question, None, None
# Transcribe the user's answer
user_answer = transcribe_audio(audio_file)
# Evaluate the answer
feedback = evaluate_answer(question, user_answer)
feedback_audio = text_to_speech(feedback, voice)
yield None, None, feedback_audio, feedback
#return question_audio, feedback_audio, question, user_answer, feedback
# --- Gradio Interface ---
with gr.Blocks() as demo:
gr.Markdown("# Adaptive Language Tutor (Arabic)")
# with gr.Row():
# question_audio_output = gr.Audio(label="Question")
# feedback_audio_output = gr.Audio(label="Feedback")
# with gr.Row():
# feedback_audio_output = gr.Audio(label="Feedback")
# feedback_text_output = gr.Textbox(label="Feedback")
with gr.Row():
question_audio_output = gr.Audio(label="Question")
question_text_output = gr.Textbox(label="Question Text")
with gr.Row():
feedback_audio_output = gr.Audio(label="Feedback")
feedback_text_output = gr.Textbox(label="Feedback")
mic_input = gr.Audio(label="Speak Your Answer")
generate_button = gr.Button("Generate Question")
generate_button.click(
fn=run_tutor,
inputs=mic_input,
outputs=[question_audio_output, question_text_output, feedback_audio_output, feedback_text_output]
)
#NameError: name 'question_text_output' is not defined. Did you mean: 'question_audio_output'?
#
# question_text_output = gr.Textbox(label="Question Text")
# answer_text_output = gr.Textbox(label="Your Answer")
# feedback_text_output = gr.Textbox(label="Feedback")
# mic_input = gr.Audio(label="Speak Your Answer")#
# mic_input.change(
# fn=run_tutor,
# inputs=mic_input,
# outputs=[question_audio_output, feedback_audio_output, question_text_output, answer_text_output, feedback_text_output]
# )
demo.launch()
'''