Spaces:
Runtime error
Runtime error
File size: 7,131 Bytes
fa20980 2a1a45f fa20980 43a2f7f 2a1a45f d89de2e 641ecef 2a1a45f 641ecef 2a1a45f 641ecef 9e0016f a03b447 fa20980 0c29edf 641ecef fa20980 e669b5a 1ffcbf7 0c29edf fa20980 2a1a45f fa20980 0ee1d05 9039c79 a3448b6 bb9dd5a 9039c79 fa20980 0c29edf fa20980 0c29edf fa20980 641ecef 2a1a45f 14804da ea02dc9 641ecef 2a1a45f 024c778 bb0cac5 2a1a45f fa20980 5ee5639 2a1a45f 5ee5639 0e40cd6 5ee5639 0e40cd6 5ee5639 fa20980 43a2f7f 4d7e5af 43a2f7f fa20980 43a2f7f 83a16a4 43a2f7f fa20980 5ee5639 fa20980 4f3a827 76d825b fa20980 76d825b fa20980 4d7e5af fa20980 76d825b 4f3a827 4d7e5af fa20980 4d7e5af fa20980 4d7e5af fa20980 2c1e979 4d7e5af 5ee5639 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 |
import os
import gradio as gr
#import elevenlabs
#from elevenlabs.client import ElevenLabs
#from speech_recognition import Recognizer, AudioFile
import io
from google import genai
from google.genai import types
#from dotenv import load_dotenv
#from elevenlabs import play
from TTS.api import TTS
#load_dotenv()
#elevenlabs = ElevenLabs(
# api_key=os.getenv("ELEVENLABS_API_KEY"),
#)
# --- TTS Setup ---
model_tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", progress_bar=False, gpu=False)
# --- API Keys (Set as environment variables for security!) ---
genai_api_key = os.environ.get("GOOGLE_API_KEY") # Gemini API Key
#elevenlabs_api_key = os.environ.get("ELEVENLABS_API_KEY")
#elevenlabs.set_api_key(elevenlabs_api_key)
#elevenlabs.API_KEY = elevenlabs_api_key
client = genai.Client(api_key=genai_api_key)
# --- ElevenLabs Voice ---
voice = "p225"#"Bella" # Choose a voice from ElevenLabs
# --- Language Tutor Parameters ---
target_language = "Arabic"
difficulty = 1 # 1 = Easy, 2 = Medium, 3 = Hard
# --- Gemini Model ---
#model = genai.GenerativeModel('gemini-pro') # Or 'gemini-pro-vision' if you need image input
#model = genai.GenerativeModel('gemini-1.5-pro-latest')
#model = genai.GenerativeModel('gemini-2-flash')
#model = genai.GenerativeModel('gemini-pro', generate_response_clause=genai.types.GenerateResponseClause(model='gemini-pro'))
# --- Functions ---
def generate_question(difficulty):
"""Generates a Arabic question based on difficulty using Gemini."""
prompt = f"Generate a simple {target_language} question for a language learner at difficulty level {difficulty}. Just the question, no extra text."
response = client.models.generate_content(
model="gemini-2.0-flash",
contents=prompt,
config=types.GenerateContentConfig(temperature=0.95)
)
#response = model.generate_content(prompt)
return response.text.strip()
def evaluate_answer(question, answer):
"""Evaluates the user's answer using Gemini."""
prompt = f"You are a Arabic language tutor. Evaluate the following answer to the question: '{question}'. Answer: '{answer}'. Provide feedback on grammar, vocabulary, and fluency. Keep the feedback concise (under 50 words). Also, give a score from 1-5 (1 being very poor, 5 being excellent)."
#response = model.generate_content(prompt)
response = client.models.generate_content(
model="gemini-2.0-flash",
contents=prompt,
config=types.GenerateContentConfig(temperature=0.1)
)
return response.text.strip()
def text_to_speech(text, voice):
"""Converts text to speech using ElevenLabs."""
#audio = elevenlabs.text_to_speech.convert(
#text=text,
#voice_id="JBFqnCBsd6RMkjVDRZzb",
#model_id="eleven_monolingual_v1",
#output_format="mp3_44100_128",
#)
#audio = elevenlabs.tts(text=text, voice=voice, model="eleven_monolingual_v1")
#return b"".join(audio)
#wav = tts.tts(text=text)#, language=target_language)
wav = tts_model.tts_to_file(text=text, language=target_language)#, voice="p225", split_sentences=True)# speaker_wav="/path/to/target/speaker.wav", split_sentences=True)
return wav
def transcribe_audio(audio_file):
"""Transcribes audio using SpeechRecognition."""
r = Recognizer()
with AudioFile(audio_file) as source:
audio = r.record(source)
try:
text = r.recognize_google(audio, language=target_language) # You might need to adjust the language code
return text
except Exception as e:
return f"Error transcribing audio: {e}"
def run_tutor():
"""Main function to run the tutor."""
question = generate_question(difficulty)
question_audio = text_to_speech(question, voice)
return question_audio, question
def process_answer(audio_file, question, question_audio):
"""Processes the user's answer and provides feedback."""
user_answer = transcribe_audio(audio_file)
feedback = evaluate_answer(question, user_answer)
feedback_audio = text_to_speech(feedback, voice)
return feedback_audio, feedback
# --- Gradio Interface ---
with gr.Blocks() as demo:
gr.Markdown("# Adaptive Language Tutor (Arabic)")
with gr.Row():
generate_button = gr.Button("Generate Question")
question_audio_output = gr.Audio(label="Question")
question_text_output = gr.Textbox(label="Question Text")
with gr.Row():
feedback_audio_output = gr.Audio(label="Feedback")
feedback_text_output = gr.Textbox(label="Feedback")
mic_input = gr.Audio(label="Speak Your Answer")
generate_button.click(
fn=run_tutor,
outputs=[question_audio_output, question_text_output]
)
mic_input.change(
fn=process_answer,
inputs=[mic_input, question_text_output, question_audio_output],
outputs=[feedback_audio_output, feedback_text_output]
)
demo.launch()
'''
def run_tutor(audio_file):
"""Main function to run the tutor."""
question = generate_question(difficulty)
question_audio = text_to_speech(question, voice)
# Display the question in the interface
yield question_audio, question, None, None
# Transcribe the user's answer
user_answer = transcribe_audio(audio_file)
# Evaluate the answer
feedback = evaluate_answer(question, user_answer)
feedback_audio = text_to_speech(feedback, voice)
yield None, None, feedback_audio, feedback
#return question_audio, feedback_audio, question, user_answer, feedback
# --- Gradio Interface ---
with gr.Blocks() as demo:
gr.Markdown("# Adaptive Language Tutor (Arabic)")
# with gr.Row():
# question_audio_output = gr.Audio(label="Question")
# feedback_audio_output = gr.Audio(label="Feedback")
# with gr.Row():
# feedback_audio_output = gr.Audio(label="Feedback")
# feedback_text_output = gr.Textbox(label="Feedback")
with gr.Row():
question_audio_output = gr.Audio(label="Question")
question_text_output = gr.Textbox(label="Question Text")
with gr.Row():
feedback_audio_output = gr.Audio(label="Feedback")
feedback_text_output = gr.Textbox(label="Feedback")
mic_input = gr.Audio(label="Speak Your Answer")
generate_button = gr.Button("Generate Question")
generate_button.click(
fn=run_tutor,
inputs=mic_input,
outputs=[question_audio_output, question_text_output, feedback_audio_output, feedback_text_output]
)
#NameError: name 'question_text_output' is not defined. Did you mean: 'question_audio_output'?
#
# question_text_output = gr.Textbox(label="Question Text")
# answer_text_output = gr.Textbox(label="Your Answer")
# feedback_text_output = gr.Textbox(label="Feedback")
# mic_input = gr.Audio(label="Speak Your Answer")#
# mic_input.change(
# fn=run_tutor,
# inputs=mic_input,
# outputs=[question_audio_output, feedback_audio_output, question_text_output, answer_text_output, feedback_text_output]
# )
demo.launch()
''' |