Spaces:
Runtime error
Runtime error
import os | |
import gradio as gr | |
#import elevenlabs | |
#from elevenlabs.client import ElevenLabs | |
#from speech_recognition import Recognizer, AudioFile | |
import io | |
from google import genai | |
from google.genai import types | |
#from dotenv import load_dotenv | |
#from elevenlabs import play | |
from TTS.api import TTS | |
#load_dotenv() | |
#elevenlabs = ElevenLabs( | |
# api_key=os.getenv("ELEVENLABS_API_KEY"), | |
#) | |
# --- TTS Setup --- | |
model_tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", progress_bar=False, gpu=False) | |
# --- API Keys (Set as environment variables for security!) --- | |
genai_api_key = os.environ.get("GOOGLE_API_KEY") # Gemini API Key | |
#elevenlabs_api_key = os.environ.get("ELEVENLABS_API_KEY") | |
#elevenlabs.set_api_key(elevenlabs_api_key) | |
#elevenlabs.API_KEY = elevenlabs_api_key | |
client = genai.Client(api_key=genai_api_key) | |
# --- ElevenLabs Voice --- | |
voice = "p225"#"Bella" # Choose a voice from ElevenLabs | |
# --- Language Tutor Parameters --- | |
target_language = "Arabic" | |
difficulty = 1 # 1 = Easy, 2 = Medium, 3 = Hard | |
# --- Gemini Model --- | |
#model = genai.GenerativeModel('gemini-pro') # Or 'gemini-pro-vision' if you need image input | |
#model = genai.GenerativeModel('gemini-1.5-pro-latest') | |
#model = genai.GenerativeModel('gemini-2-flash') | |
#model = genai.GenerativeModel('gemini-pro', generate_response_clause=genai.types.GenerateResponseClause(model='gemini-pro')) | |
# --- Functions --- | |
def generate_question(difficulty): | |
"""Generates a Arabic question based on difficulty using Gemini.""" | |
prompt = f"Generate a simple {target_language} question for a language learner at difficulty level {difficulty}. Just the question, no extra text." | |
response = client.models.generate_content( | |
model="gemini-2.0-flash", | |
contents=prompt, | |
config=types.GenerateContentConfig(temperature=0.95) | |
) | |
#response = model.generate_content(prompt) | |
return response.text.strip() | |
def evaluate_answer(question, answer): | |
"""Evaluates the user's answer using Gemini.""" | |
prompt = f"You are a Arabic language tutor. Evaluate the following answer to the question: '{question}'. Answer: '{answer}'. Provide feedback on grammar, vocabulary, and fluency. Keep the feedback concise (under 50 words). Also, give a score from 1-5 (1 being very poor, 5 being excellent)." | |
#response = model.generate_content(prompt) | |
response = client.models.generate_content( | |
model="gemini-2.0-flash", | |
contents=prompt, | |
config=types.GenerateContentConfig(temperature=0.1) | |
) | |
return response.text.strip() | |
def text_to_speech(text, voice): | |
"""Converts text to speech using ElevenLabs.""" | |
#audio = elevenlabs.text_to_speech.convert( | |
#text=text, | |
#voice_id="JBFqnCBsd6RMkjVDRZzb", | |
#model_id="eleven_monolingual_v1", | |
#output_format="mp3_44100_128", | |
#) | |
#audio = elevenlabs.tts(text=text, voice=voice, model="eleven_monolingual_v1") | |
#return b"".join(audio) | |
#wav = tts.tts(text=text)#, language=target_language) | |
wav = tts_model.tts_to_file(text=text, language=target_language)#, voice="p225", split_sentences=True)# speaker_wav="/path/to/target/speaker.wav", split_sentences=True) | |
return wav | |
def transcribe_audio(audio_file): | |
"""Transcribes audio using SpeechRecognition.""" | |
r = Recognizer() | |
with AudioFile(audio_file) as source: | |
audio = r.record(source) | |
try: | |
text = r.recognize_google(audio, language=target_language) # You might need to adjust the language code | |
return text | |
except Exception as e: | |
return f"Error transcribing audio: {e}" | |
def run_tutor(): | |
"""Main function to run the tutor.""" | |
question = generate_question(difficulty) | |
question_audio = text_to_speech(question, voice) | |
return question_audio, question | |
def process_answer(audio_file, question, question_audio): | |
"""Processes the user's answer and provides feedback.""" | |
user_answer = transcribe_audio(audio_file) | |
feedback = evaluate_answer(question, user_answer) | |
feedback_audio = text_to_speech(feedback, voice) | |
return feedback_audio, feedback | |
# --- Gradio Interface --- | |
with gr.Blocks() as demo: | |
gr.Markdown("# Adaptive Language Tutor (Arabic)") | |
with gr.Row(): | |
generate_button = gr.Button("Generate Question") | |
question_audio_output = gr.Audio(label="Question") | |
question_text_output = gr.Textbox(label="Question Text") | |
with gr.Row(): | |
feedback_audio_output = gr.Audio(label="Feedback") | |
feedback_text_output = gr.Textbox(label="Feedback") | |
mic_input = gr.Audio(label="Speak Your Answer") | |
generate_button.click( | |
fn=run_tutor, | |
outputs=[question_audio_output, question_text_output] | |
) | |
mic_input.change( | |
fn=process_answer, | |
inputs=[mic_input, question_text_output, question_audio_output], | |
outputs=[feedback_audio_output, feedback_text_output] | |
) | |
demo.launch() | |
''' | |
def run_tutor(audio_file): | |
"""Main function to run the tutor.""" | |
question = generate_question(difficulty) | |
question_audio = text_to_speech(question, voice) | |
# Display the question in the interface | |
yield question_audio, question, None, None | |
# Transcribe the user's answer | |
user_answer = transcribe_audio(audio_file) | |
# Evaluate the answer | |
feedback = evaluate_answer(question, user_answer) | |
feedback_audio = text_to_speech(feedback, voice) | |
yield None, None, feedback_audio, feedback | |
#return question_audio, feedback_audio, question, user_answer, feedback | |
# --- Gradio Interface --- | |
with gr.Blocks() as demo: | |
gr.Markdown("# Adaptive Language Tutor (Arabic)") | |
# with gr.Row(): | |
# question_audio_output = gr.Audio(label="Question") | |
# feedback_audio_output = gr.Audio(label="Feedback") | |
# with gr.Row(): | |
# feedback_audio_output = gr.Audio(label="Feedback") | |
# feedback_text_output = gr.Textbox(label="Feedback") | |
with gr.Row(): | |
question_audio_output = gr.Audio(label="Question") | |
question_text_output = gr.Textbox(label="Question Text") | |
with gr.Row(): | |
feedback_audio_output = gr.Audio(label="Feedback") | |
feedback_text_output = gr.Textbox(label="Feedback") | |
mic_input = gr.Audio(label="Speak Your Answer") | |
generate_button = gr.Button("Generate Question") | |
generate_button.click( | |
fn=run_tutor, | |
inputs=mic_input, | |
outputs=[question_audio_output, question_text_output, feedback_audio_output, feedback_text_output] | |
) | |
#NameError: name 'question_text_output' is not defined. Did you mean: 'question_audio_output'? | |
# | |
# question_text_output = gr.Textbox(label="Question Text") | |
# answer_text_output = gr.Textbox(label="Your Answer") | |
# feedback_text_output = gr.Textbox(label="Feedback") | |
# mic_input = gr.Audio(label="Speak Your Answer")# | |
# mic_input.change( | |
# fn=run_tutor, | |
# inputs=mic_input, | |
# outputs=[question_audio_output, feedback_audio_output, question_text_output, answer_text_output, feedback_text_output] | |
# ) | |
demo.launch() | |
''' |