File size: 7,131 Bytes
fa20980
 
2a1a45f
 
 
fa20980
43a2f7f
 
2a1a45f
 
d89de2e
641ecef
2a1a45f
641ecef
2a1a45f
 
 
641ecef
 
9e0016f
a03b447
fa20980
 
0c29edf
641ecef
fa20980
e669b5a
1ffcbf7
 
0c29edf
fa20980
 
2a1a45f
fa20980
 
 
 
 
 
0ee1d05
9039c79
a3448b6
bb9dd5a
9039c79
fa20980
 
 
 
 
 
0c29edf
 
 
 
 
 
 
fa20980
 
 
 
 
0c29edf
 
 
 
 
 
 
fa20980
 
 
 
641ecef
2a1a45f
 
 
 
14804da
ea02dc9
641ecef
 
2a1a45f
024c778
bb0cac5
2a1a45f
 
fa20980
 
 
 
 
 
 
 
 
 
 
 
5ee5639
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2a1a45f
5ee5639
0e40cd6
5ee5639
 
 
 
 
 
0e40cd6
5ee5639
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fa20980
 
 
 
 
43a2f7f
4d7e5af
43a2f7f
fa20980
 
 
 
 
 
43a2f7f
83a16a4
43a2f7f
fa20980
5ee5639
 
 
 
fa20980
 
4f3a827
76d825b
 
 
 
 
 
 
 
fa20980
 
76d825b
fa20980
4d7e5af
fa20980
76d825b
4f3a827
4d7e5af
fa20980
4d7e5af
fa20980
 
4d7e5af
fa20980
 
2c1e979
4d7e5af
 
 
 
 
 
 
 
 
 
 
 
 
5ee5639
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
import os
import gradio as gr
#import elevenlabs
#from elevenlabs.client import ElevenLabs
#from speech_recognition import Recognizer, AudioFile
import io
from google import genai
from google.genai import types
#from dotenv import load_dotenv
#from elevenlabs import play
from TTS.api import TTS

#load_dotenv()

#elevenlabs = ElevenLabs(
#  api_key=os.getenv("ELEVENLABS_API_KEY"),
#)


# --- TTS Setup ---  
model_tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", progress_bar=False, gpu=False)

# --- API Keys (Set as environment variables for security!) ---
genai_api_key = os.environ.get("GOOGLE_API_KEY")  # Gemini API Key
#elevenlabs_api_key = os.environ.get("ELEVENLABS_API_KEY")

#elevenlabs.set_api_key(elevenlabs_api_key)
#elevenlabs.API_KEY = elevenlabs_api_key

client = genai.Client(api_key=genai_api_key)

# --- ElevenLabs Voice ---
voice = "p225"#"Bella"  # Choose a voice from ElevenLabs

# --- Language Tutor Parameters ---
target_language = "Arabic"
difficulty = 1  # 1 = Easy, 2 = Medium, 3 = Hard

# --- Gemini Model ---
#model = genai.GenerativeModel('gemini-pro')  # Or 'gemini-pro-vision' if you need image input
#model = genai.GenerativeModel('gemini-1.5-pro-latest')
#model = genai.GenerativeModel('gemini-2-flash')
#model = genai.GenerativeModel('gemini-pro', generate_response_clause=genai.types.GenerateResponseClause(model='gemini-pro'))


# --- Functions ---

def generate_question(difficulty):
    """Generates a Arabic question based on difficulty using Gemini."""
    prompt = f"Generate a simple {target_language} question for a language learner at difficulty level {difficulty}. Just the question, no extra text."
    response = client.models.generate_content(
        model="gemini-2.0-flash",
        contents=prompt,
        config=types.GenerateContentConfig(temperature=0.95)
    )
    
    #response = model.generate_content(prompt)
    return response.text.strip()

def evaluate_answer(question, answer):
    """Evaluates the user's answer using Gemini."""
    prompt = f"You are a Arabic language tutor. Evaluate the following answer to the question: '{question}'. Answer: '{answer}'. Provide feedback on grammar, vocabulary, and fluency. Keep the feedback concise (under 50 words). Also, give a score from 1-5 (1 being very poor, 5 being excellent)."
    #response = model.generate_content(prompt)
    response = client.models.generate_content(
        model="gemini-2.0-flash",
        contents=prompt,
        config=types.GenerateContentConfig(temperature=0.1)
    )
    
    return response.text.strip()

def text_to_speech(text, voice):
    """Converts text to speech using ElevenLabs."""

    #audio = elevenlabs.text_to_speech.convert(
    #text=text,
    #voice_id="JBFqnCBsd6RMkjVDRZzb",
    #model_id="eleven_monolingual_v1",
    #output_format="mp3_44100_128",
    #)
    
    #audio = elevenlabs.tts(text=text, voice=voice, model="eleven_monolingual_v1")
    #return b"".join(audio)
    #wav = tts.tts(text=text)#, language=target_language)
    wav = tts_model.tts_to_file(text=text, language=target_language)#, voice="p225", split_sentences=True)# speaker_wav="/path/to/target/speaker.wav", split_sentences=True)
    return wav


def transcribe_audio(audio_file):
    """Transcribes audio using SpeechRecognition."""
    r = Recognizer()
    with AudioFile(audio_file) as source:
        audio = r.record(source)
    try:
        text = r.recognize_google(audio, language=target_language) # You might need to adjust the language code
        return text
    except Exception as e:
        return f"Error transcribing audio: {e}"



def run_tutor():
    """Main function to run the tutor."""
    question = generate_question(difficulty)
    question_audio = text_to_speech(question, voice)

    return question_audio, question

def process_answer(audio_file, question, question_audio):
    """Processes the user's answer and provides feedback."""
    user_answer = transcribe_audio(audio_file)
    feedback = evaluate_answer(question, user_answer)
    feedback_audio = text_to_speech(feedback, voice)

    return feedback_audio, feedback
    

# --- Gradio Interface ---
with gr.Blocks() as demo:
    gr.Markdown("# Adaptive Language Tutor (Arabic)")
    with gr.Row():
        generate_button = gr.Button("Generate Question")
        question_audio_output = gr.Audio(label="Question")
        question_text_output = gr.Textbox(label="Question Text")
    with gr.Row():
        feedback_audio_output = gr.Audio(label="Feedback")
        feedback_text_output = gr.Textbox(label="Feedback")
    mic_input = gr.Audio(label="Speak Your Answer")
    

    generate_button.click(
        fn=run_tutor,
        outputs=[question_audio_output, question_text_output]
    )

    mic_input.change(
        fn=process_answer,
        inputs=[mic_input, question_text_output, question_audio_output],
        outputs=[feedback_audio_output, feedback_text_output]
    )

demo.launch()



'''

def run_tutor(audio_file):
    """Main function to run the tutor."""
    question = generate_question(difficulty)
    question_audio = text_to_speech(question, voice)

        # Display the question in the interface
    yield question_audio, question, None, None

    # Transcribe the user's answer
    user_answer = transcribe_audio(audio_file)

    # Evaluate the answer
    feedback = evaluate_answer(question, user_answer)
    feedback_audio = text_to_speech(feedback, voice)
    
    yield None, None, feedback_audio, feedback
    #return question_audio, feedback_audio, question, user_answer, feedback



    

# --- Gradio Interface ---
with gr.Blocks() as demo:
    gr.Markdown("# Adaptive Language Tutor (Arabic)")
#    with gr.Row():
#        question_audio_output = gr.Audio(label="Question")
#        feedback_audio_output = gr.Audio(label="Feedback")
#    with gr.Row():
#        feedback_audio_output = gr.Audio(label="Feedback")
#        feedback_text_output = gr.Textbox(label="Feedback")


    with gr.Row():
        question_audio_output = gr.Audio(label="Question")
        question_text_output = gr.Textbox(label="Question Text")
    with gr.Row():
        feedback_audio_output = gr.Audio(label="Feedback")
        feedback_text_output = gr.Textbox(label="Feedback")
    
    mic_input = gr.Audio(label="Speak Your Answer")
    generate_button = gr.Button("Generate Question")    

    generate_button.click(
        fn=run_tutor,
        inputs=mic_input,
        outputs=[question_audio_output, question_text_output, feedback_audio_output, feedback_text_output]
    )

#NameError: name 'question_text_output' is not defined. Did you mean: 'question_audio_output'?
        
        #
#        question_text_output = gr.Textbox(label="Question Text")
#        answer_text_output = gr.Textbox(label="Your Answer")
#        feedback_text_output = gr.Textbox(label="Feedback")
#    mic_input = gr.Audio(label="Speak Your Answer")#

#    mic_input.change(
#        fn=run_tutor,
#        inputs=mic_input,
#        outputs=[question_audio_output, feedback_audio_output, question_text_output, answer_text_output, feedback_text_output]
#    )

demo.launch()
'''