ParulPandey commited on
Commit
f1bbb15
Β·
verified Β·
1 Parent(s): 254209b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +845 -314
app.py CHANGED
@@ -1,385 +1,916 @@
 
 
 
1
  import gradio as gr
2
  import os
3
  import difflib
4
- from gradio_client import Client, file as gradio_file # Renamed to avoid conflict
5
  import time
6
  import google.generativeai as genai
7
 
8
  # --- Configuration & Clients ---
9
 
10
- def configure_gemini_api():
11
- """Configures the Google Gemini API with API key from Secrets or environment."""
12
  api_key = None
13
  try:
14
- api_key = gr.Secrets.get("GOOGLE_API_KEY") # For Hugging Face Spaces
15
- except AttributeError: # Running locally, gr.Secrets not available
16
- api_key = os.environ.get("GOOGLE_API_KEY")
17
- except FileNotFoundError: # gr.Secrets.get can raise this if no secrets file found
18
  api_key = os.environ.get("GOOGLE_API_KEY")
19
-
20
  if api_key:
21
  try:
22
  genai.configure(api_key=api_key)
23
- print("Google Gemini API configured successfully.")
24
  return True
25
  except Exception as e:
26
- print(f"Error configuring Gemini API: {e}")
27
- return False
28
  else:
29
- print("WARN: GOOGLE_API_KEY not found in Gradio Secrets or environment. Story generation with Gemini will be disabled.")
30
- return False
31
 
32
- GEMINI_API_CONFIGURED = configure_gemini_api()
33
-
34
- # Initialize TTS Client (Using ESPnet VITS as an alternative to Bark)
35
  try:
36
- tts_client = Client("espnet/kan-bayashi_ljspeech_vits")
37
- print("ESPnet VITS TTS client initialized successfully.")
38
- # --- IMPORTANT: For Debugging VITS API if issues persist ---
39
- # print("--- ESPnet VITS TTS API Details (Uncomment to view) ---")
40
- # print(tts_client.view_api(all_endpoints=True))
41
- # print("----------------------------------------------------")
42
- # For a more structured dictionary output:
43
- # api_info_tts = tts_client.view_api(return_format="dict")
44
- # import json
45
- # print(json.dumps(api_info_tts, indent=2))
46
- # --- End Debugging Section ---
47
  except Exception as e:
48
- print(f"Fatal: Could not initialize ESPnet VITS TTS client: {e}. TTS will not work.")
49
  tts_client = None
50
 
51
- # Initialize STT Client for Whisper (abidlabs/whisper-large-v2)
52
  try:
53
  whisper_stt_client = Client("abidlabs/whisper-large-v2")
54
- print("Whisper STT client initialized successfully.")
55
- # --- For Debugging Whisper API ---
56
- # print("--- Whisper STT API Details (Uncomment to view) ---")
57
- # print(whisper_stt_client.view_api(all_endpoints=True))
58
- # print("-------------------------------------------------")
59
- except Exception as e:
60
- print(f"Fatal: Could not initialize Whisper STT client: {e}. STT will not work.")
61
- whisper_stt_client = None
62
 
63
  # --- Helper Functions ---
64
-
65
- def generate_story_with_gemini(name, grade, topic):
66
- if not GEMINI_API_CONFIGURED:
67
- return "Google Gemini API key not configured. Story generation is disabled. πŸ”‘"
 
 
 
 
68
  try:
69
- model = genai.GenerativeModel(model_name="gemini-1.5-flash-latest") # Fast and capable
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  prompt = (
71
- f"You are a super friendly and imaginative storyteller for kids. "
72
- f"Please write an exciting and fun short story (around 100-120 words) for a student named {name} who is in Grade {grade}. "
73
- f"The story must be about '{topic}'. "
74
- f"Use simple words and sentences that a Grade {grade} student can easily read aloud and understand. "
75
- f"Make the story engaging and positive. Jump right into the story without any introduction like 'Here is a story for you'."
76
- )
77
- safety_settings = [
78
- {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
79
- {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
80
- {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
81
- {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
82
- ]
83
- generation_config = genai.types.GenerationConfig(
84
- candidate_count=1, max_output_tokens=300, temperature=0.75
85
- )
86
- response = model.generate_content(
87
- prompt, generation_config=generation_config, safety_settings=safety_settings
88
  )
 
 
 
 
 
 
 
 
 
 
89
  if response.candidates and response.candidates[0].content.parts:
90
  story = response.text
91
  if response.prompt_feedback and response.prompt_feedback.block_reason:
92
- return f"Oh dear! My story idea for '{topic}' was a bit too wild and got blocked (Reason: {response.prompt_feedback.block_reason}). Let's try a different topic! 😊"
93
- if not story.strip():
94
- return f"Hmm, Gemini gave me a blank page for '{topic}'. Let's try a different topic or try again! ✨"
95
- return story.strip()
 
 
96
  else:
97
  if response.prompt_feedback and response.prompt_feedback.block_reason:
98
- return f"Oh dear! My story idea for '{topic}' was a bit too wild and got blocked (Reason: {response.prompt_feedback.block_reason}). Let's try a different topic! 😊"
99
- print(f"Gemini API response issue: {response}")
100
- return f"Hmm, Gemini's story magic seems to be on a little break for '{topic}'. Maybe try another topic? πŸ€”"
 
 
101
  except Exception as e:
102
- print(f"Error generating story with Gemini: {e}")
103
- if "API_KEY_INVALID" in str(e).lower() or "api key not valid" in str(e).lower():
104
- return "Oops! The Google Gemini API key seems to be having a problem. Please tell the grown-ups to check it! πŸ”‘"
105
- return f"Oh no! 😟 I had a little trouble dreaming up a story with Gemini. Error: {e}"
106
-
107
- def text_to_speech_vits(text_to_speak):
 
 
 
 
 
 
 
 
108
  if not tts_client:
109
- return "The VITS sound machine isn't working right now. πŸ› οΈ Please tell the grown-ups!"
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  try:
111
- # Parameters for espnet/kan-bayashi_ljspeech_vits.
112
- # YOU MUST VERIFY these with tts_client.view_api() if TTS fails.
113
- # The fn_index (or api_name) and the order/names of parameters are critical.
114
- job = tts_client.submit(
115
- text_to_speak, # text (str)
116
- "EN", # lang (str) - e.g., "EN" for English in this model
117
- 0, # speaker_id (int | float) - usually 0 for LJSpeech default
118
- 0.667, # noise_scale (float) - variance of Z
119
- 0.8, # noise_scale_w (float) - variance of Z in duration
120
- 1.0, # length_scale (float) - controls speed
121
- fn_index=0 # ASSUMPTION: TTS is the first function (index 0).
122
- # If view_api() shows a different fn_index or an api_name like "/predict", use that.
123
- )
124
- # VITS is generally faster than Bark, but network can add delays
125
- audio_filepath = job.result(timeout=90)
126
-
127
- # This space typically returns just the audio filepath directly.
128
- if isinstance(audio_filepath, str) and audio_filepath.endswith(('.wav', '.mp3', '.flac')):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  return audio_filepath
130
  else:
131
- # Sometimes the result might be a tuple, e.g., (filepath, samplerate)
132
- # Check the actual output structure from view_api() or by printing audio_filepath
133
- print(f"Unexpected VITS TTS result format: {audio_filepath}")
134
- if isinstance(audio_filepath, tuple) and len(audio_filepath) > 0 and isinstance(audio_filepath[0], str):
135
- return audio_filepath[0] # Assume audio path is the first element if it's a tuple
136
- return "Hmm, the sound from VITS came out a bit funny. πŸ€”"
137
  except Exception as e:
138
- print(f"Error with VITS TTS (espnet/kan-bayashi_ljspeech_vits): {e}")
139
- if "Queue full" in str(e).lower() or "too much pending traffic" in str(e).lower():
140
- return "The VITS sound machine is busy! Please try again in a moment. πŸ•’"
141
- # Provide more specific error if submit call itself failed due to wrong params
142
- if "expected" in str(e).lower() and ("argument" in str(e).lower() or "parameter" in str(e).lower()):
143
- return f"VITS TTS had a hiccup with parameters. (Details: {e}). Please check view_api() output."
144
- return f"Oh dear, VITS couldn't make the sound. πŸ”‡ Error: {e}"
145
-
146
-
147
- def speech_to_text_whisper_space(audio_filepath):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  if not whisper_stt_client:
149
- return "The Whisper listening ears aren't working right now. πŸ› οΈ Please tell the grown-ups!"
 
150
  if not audio_filepath:
151
- return "Oops! I didn't get any recording to listen to. 🎀"
152
- try:
153
- # API for abidlabs/whisper-large-v2 usually takes audio, task, language.
154
- job = whisper_stt_client.submit(
155
- gradio_file(audio_filepath), # Use gradio_client.file to handle the upload
156
- "transcribe", # task
157
- "English", # language (can be None for auto-detect)
158
- api_name="/predict" # This is common for abidlabs/whisper spaces
159
- )
160
- result_dict = job.result(timeout=120) # Wait up to 2 minutes
161
-
162
- if isinstance(result_dict, dict) and 'text' in result_dict:
163
- return result_dict['text']
164
- elif isinstance(result_dict, str): # Fallback if it's simpler and returns text directly
165
- return result_dict
166
- else:
167
- print(f"Unexpected Whisper STT result format: {result_dict}")
168
- return "Hmm, I couldn't quite understand the words from Whisper. πŸ€”"
169
- except Exception as e:
170
- print(f"Error transcribing audio with Whisper Space: {e}")
171
- if "Queue full" in str(e).lower() or "too much pending traffic" in str(e).lower():
172
- return "The Whisper listening ears are super busy! 인기폭발! ΠΎΡ‡Π΅Ρ€Π΅Π΄ΡŒ! Please try again in a bit. πŸ•’"
173
- return f"Oh no! Whisper had trouble hearing that. πŸ™‰ Error: {e}"
174
 
175
  def clean_text_for_comparison(text):
176
  if not isinstance(text, str): return []
177
- text = text.lower()
178
- punctuation_to_remove = "!\"#$%&()*+,-./:;<=>?@[\\]^_`{|}~" # Keeps apostrophes for contractions
179
- text = text.translate(str.maketrans('', '', punctuation_to_remove))
180
- return text.split()
181
 
182
  def compare_texts_for_feedback(original_text, student_text):
183
- original_words = clean_text_for_comparison(original_text)
184
- student_words = clean_text_for_comparison(student_text)
185
-
186
- if not student_words:
187
- return "It sounds like you didn't record anything, or maybe it was super quiet! 🀫 Try recording again nice and clear!", ""
188
-
189
  matcher = difflib.SequenceMatcher(None, original_words, student_words, autojunk=False)
190
- feedback_lines = []
191
- highlighted_passage_parts = []
192
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
  for tag, i1, i2, j1, j2 in matcher.get_opcodes():
194
- original_segment = original_words[i1:i2]
195
- student_segment = student_words[j1:j2]
196
-
197
  if tag == 'equal':
198
- highlighted_passage_parts.append(" ".join(original_segment))
199
- elif tag == 'replace':
200
- # Try to highlight word by word if segments are same length for better visual
201
- if len(original_segment) == len(student_segment):
202
- for i in range(len(original_segment)):
203
- o_word = original_segment[i]
204
- s_word = student_segment[i]
205
- feedback_lines.append(f"- You said: \"*{s_word}*\" instead of: \"**{o_word}**\"")
206
- highlighted_passage_parts.append(f"~~{o_word}~~ **{s_word}**")
207
- else: # General replacement if segment lengths differ
208
- feedback_lines.append(f"- Instead of: \"**{' '.join(original_segment)}**\", you said: \"*{' '.join(student_segment)}*\"")
209
- highlighted_passage_parts.append(f"~~{' '.join(original_segment)}~~ **{' '.join(student_segment)}**")
210
- elif tag == 'delete': # Student skipped words from original
211
- feedback_lines.append(f"- You missed: \"**{' '.join(original_segment)}**\"")
212
- highlighted_passage_parts.append(f"~~{' '.join(original_segment)}~~ (*skipped*)")
213
- elif tag == 'insert': # Student added words not in original
214
- feedback_lines.append(f"- You added: \"*{' '.join(student_segment)}*\" (which wasn't in the story)")
215
- highlighted_passage_parts.append(f"(*added:* **{' '.join(student_segment)}**)")
216
-
217
- final_highlighted_text = " ".join(highlighted_passage_parts)
218
-
219
  if not feedback_lines:
220
- return "πŸŽ‰πŸ₯³ WOOHOO! Amazing reading! You got all the words spot on! πŸ₯³πŸŽ‰", final_highlighted_text
 
 
 
 
 
221
  else:
222
- feedback_summary = "Great try! Here are a few words to practice to make it even better:\n" + "\n".join(feedback_lines)
223
- return feedback_summary, final_highlighted_text
224
-
225
- # --- Gradio UI Functions ---
226
- def generate_story_and_audio_for_ui(name, grade, topic, progress=gr.Progress(track_tqdm=True)):
227
- if not name or not grade or not topic:
228
- return "Oops! Please tell me your name, grade, and a fun topic first! 😊", None, gr.update(visible=False), ""
229
-
230
- progress(0.1, desc="πŸ“– Asking Gemini to dream up a cool story for you...")
231
- story_text = generate_story_with_gemini(name, grade, topic)
232
- gemini_error_keywords = ["Gemini API key not configured", "Oh no!", "Oops!", "Hmm,"]
233
- if any(keyword in story_text for keyword in gemini_error_keywords) or not story_text.strip() :
234
- return story_text, None, gr.update(visible=False), story_text # Keep recording area hidden
235
-
236
- progress(0.5, desc="🎧 Warming up the VITS sound machine... (this should be quicker!)")
237
- tts_audio_path = text_to_speech_vits(story_text) # Use VITS TTS
238
- error_conditions_tts = [
239
- "couldn't make the sound", "sound came out a bit funny", "sound machine isn't working",
240
- "sound machine is busy", "VITS had a hiccup" # Check for VITS specific errors
241
- ]
242
- if any(err in (tts_audio_path or "") for err in error_conditions_tts):
243
- return story_text, tts_audio_path, gr.update(visible=False), story_text # Keep recording hidden
244
-
245
- progress(1.0, desc="βœ… Story and sound are ready! Let's go!")
246
- return (
247
- story_text,
248
- tts_audio_path,
249
- gr.update(visible=True), # Show recording_assessment_area
250
- story_text # Pass story_text to gr.State
251
- )
252
 
253
  def assess_student_reading_ui(original_passage_state, student_audio_path, progress=gr.Progress(track_tqdm=True)):
254
- if not student_audio_path:
255
- return "🎀 Whoops! Did you forget to record your awesome reading? Try again!", ""
256
- if not original_passage_state: # Should not happen if UI flow is correct
257
- return "Hmm, I lost the story! 😟 Please generate a new story first.", ""
258
-
259
- progress(0.2, desc="πŸ‘‚ Whisper is listening carefully to your recording...")
260
- transcribed_text = speech_to_text_whisper_space(student_audio_path)
261
- error_conditions_stt = [
262
- "couldn't understand the words", "had trouble hearing that", "listening ears aren't working",
263
- "listening ears are super busy", "didn't get any recording"
264
- ]
265
- if any(err in (transcribed_text or "") for err in error_conditions_stt):
266
- return transcribed_text, "" # Show STT error
267
-
268
- progress(0.7, desc="🧠 Thinking about the words...")
269
- feedback, highlighted_passage = compare_texts_for_feedback(original_passage_state, transcribed_text)
270
- progress(1.0, desc="⭐ Feedback is ready!")
271
- return feedback, highlighted_passage
272
-
273
- # --- Gradio Interface ---
274
  css = """
275
- body { font-family: 'Comic Sans MS', 'Chalkboard SE', 'Comic Neue', cursive; background-color: #F0F8FF; } /* AliceBlue background */
276
- .gr-button {
277
- background-color: #FF69B4 !important; /* HotPink */
278
- color: white !important;
 
 
279
  border-radius: 20px !important;
280
- font-weight: bold !important;
281
- border: 2px solid #FF1493 !important; /* DeepPink border */
282
- box-shadow: 0px 3px 5px rgba(0,0,0,0.2) !important;
 
 
283
  }
284
- .gr-button:hover { background-color: #FF1493 !important; } /* DeepPink on hover */
285
- .gr-panel {
286
- border-radius: 15px !important;
287
- box-shadow: 5px 5px 15px rgba(0,0,0,0.1) !important;
288
- background-color: #FFFACD !important; /* LemonChiffon panel background */
289
- border: 2px dashed #FFD700 !important; /* Gold dashed border */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
290
  }
291
- label, .gr-checkbox-label { color: #4B0082 !important; font-weight: bold !important; } /* Indigo */
292
- .gr-textbox, .gr-dropdown { border-radius: 10px !important; border: 1px solid #DDA0DD !important; } /* Plum border for inputs */
293
- #student_audio_input audio { background-color: #E6E6FA; border-radius: 10px; } /* Lavender for audio player */
294
- #feedback_output, #highlighted_passage_output {
295
- background-color: #FFFFE0; /* LightYellow */
296
- padding: 15px;
297
- border-radius: 10px;
298
- border: 1px solid #FAFAD2; /* LightGoldenrodYellow */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
299
  }
300
  """
301
 
302
- # Using a theme that allows CSS to take more precedence
303
- with gr.Blocks(theme=gr.themes.Base(), css=css) as app: # theme=gr.themes.Soft() or gr.themes.Base()
304
- gr.Markdown(
305
- """
306
- <div style="text-align: center; padding: 20px 0;">
307
- <h1 style="color: #FF6347; font-size: 3em; text-shadow: 2px 2px #D3D3D3;">πŸŒˆπŸ¦„βœ¨ AI Reading Buddy βœ¨πŸ¦„πŸŒˆ</h1>
308
- <p style="font-size: 1.3em; color: #483D8B;">Let's read a super fun story from Gemini and practice our words!</p>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
309
  </div>
310
  """
311
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
312
 
313
- original_passage_state = gr.State("") # To store the generated story
 
 
 
 
 
 
 
 
 
 
 
 
314
 
315
- with gr.Row():
316
- with gr.Column(scale=1):
317
- gr.Markdown("### <span style='color:#DB7093;'>✏️ Tell Me About You!</span>")
318
- student_name_input = gr.Textbox(label="πŸ‘‘ Your Awesome Name:", placeholder="E.g., Princess Lily")
319
- student_grade_input = gr.Dropdown(
320
- label="πŸ§‘β€πŸŽ“ Your Grade:",
321
- choices=[f"{i}" for i in range(1, 11)], # Grades 1 to 10
322
- value="3" # Default value
323
  )
324
- topic_input = gr.Textbox(label="πŸš€ Story Topic Idea:", placeholder="E.g., brave little astronaut")
325
- generate_button = gr.Button(value="🎈 Get My Gemini Story!")
326
-
327
- with gr.Column(scale=2):
328
- gr.Markdown("### <span style='color:#DB7093;'>πŸ“– Your Special Story (from Gemini AI):</span>")
329
- passage_output = gr.Textbox(label="Read this aloud:", lines=10, interactive=False)
330
- gr.Markdown("### <span style='color:#DB7093;'>πŸ”Š Listen to the Story:</span>")
331
- audio_output = gr.Audio(label="Hear how it sounds (with VITS TTS)", type="filepath") # Label updated for VITS
332
-
333
- gr.Markdown("<hr style='border:1px dashed #FFB6C1;'>") # LightPink dashed separator
334
-
335
- with gr.Row(visible=False) as recording_assessment_area: # Initially hidden
336
- with gr.Column(scale=1):
337
- gr.Markdown("### <span style='color:#32CD32;'>🀩 Your Turn to Shine! 🀩</span>")
338
- student_audio_input = gr.Audio(sources=["microphone"], type="filepath", label="🎀 Record yourself reading the story! Press the mic, then stop.", elem_id="student_audio_input")
339
- assess_button = gr.Button(value="🧐 Check My Reading!", elem_id="assess_button")
340
-
341
- with gr.Column(scale=2):
342
- gr.Markdown("### <span style='color:#32CD32;'>πŸ’‘ Word Detective Feedback:</span>")
343
- feedback_output = gr.Markdown(value="Your amazing feedback will pop up here! ✨", elem_id="feedback_output")
344
- highlighted_passage_output = gr.Markdown(value="See your reading journey here! πŸ—ΊοΈ", elem_id="highlighted_passage_output")
345
-
346
-
347
- generate_button.click(
348
- fn=generate_story_and_audio_for_ui,
349
- inputs=[student_name_input, student_grade_input, topic_input],
350
- outputs=[
351
- passage_output,
352
- audio_output,
353
- recording_assessment_area, # Directly control visibility of the row
354
- original_passage_state
355
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
356
  )
357
-
358
- assess_button.click(
359
- fn=assess_student_reading_ui,
360
- inputs=[original_passage_state, student_audio_input],
361
- outputs=[feedback_output, highlighted_passage_output]
 
362
  )
363
-
364
- gr.Markdown(
365
- """
366
- ---
367
- <div style="text-align: center; font-size: 0.9em; color: #555;">
368
- Built with ❀️ for the Agentic Demo Track Hackathon! Tag: <code>agent-demo-track</code>
369
- <br>Stories by Google Gemini, voices by ESPnet VITS @ HF, and listening by Whisper @ HF.
370
- </div>
371
- """
 
 
 
 
 
 
372
  )
373
 
374
- # --- Launching the App ---
375
  if __name__ == "__main__":
376
- if not GEMINI_API_CONFIGURED:
377
- print("🚨 GOOGLE_API_KEY not configured for local testing or failed to initialize!")
378
- print("Please set it: export GOOGLE_API_KEY='your_key_here'")
379
-
380
- if not tts_client:
381
- print("🚨 ESPnet VITS TTS client (espnet/kan-bayashi_ljspeech_vits) could not be initialized. TTS will not work.")
382
- if not whisper_stt_client:
383
- print("🚨 Whisper STT client (abidlabs/whisper-large-v2) could not be initialized. STT will not work.")
384
-
385
- app.launch(debug=True) # Set share=True for a temporary public link if running locally
 
1
+ from dotenv import load_dotenv
2
+ load_dotenv()
3
+
4
  import gradio as gr
5
  import os
6
  import difflib
7
+ from gradio_client import Client
8
  import time
9
  import google.generativeai as genai
10
 
11
  # --- Configuration & Clients ---
12
 
13
+ def configure_llm_api():
 
14
  api_key = None
15
  try:
16
+ api_key = gr.Secrets.get("GOOGLE_API_KEY")
17
+ except (AttributeError, FileNotFoundError):
 
 
18
  api_key = os.environ.get("GOOGLE_API_KEY")
 
19
  if api_key:
20
  try:
21
  genai.configure(api_key=api_key)
 
22
  return True
23
  except Exception as e:
24
+ print(f"Error configuring LLM (Gemini) API: {e}"); return False
 
25
  else:
26
+ print("WARN: LLM API Key (GOOGLE_API_KEY) not found."); return False
27
+ LLM_API_CONFIGURED = configure_llm_api()
28
 
29
+ # Initialize new TTS client
 
 
30
  try:
31
+ tts_client = Client("NihalGazi/Text-To-Speech-Unlimited")
32
+ print("βœ… Connected to advanced TTS service (Text-To-Speech-Unlimited)")
 
 
 
 
 
 
 
 
 
33
  except Exception as e:
34
+ print(f"❌ Failed to connect to TTS service: {e}")
35
  tts_client = None
36
 
 
37
  try:
38
  whisper_stt_client = Client("abidlabs/whisper-large-v2")
39
+ except Exception: whisper_stt_client = None
 
 
 
 
 
 
 
40
 
41
  # --- Helper Functions ---
42
+ def generate_story_from_llm(name, grade_str, topic, progress=gr.Progress(track_tqdm=True)):
43
+ progress(0.0, desc="Starting story creation...")
44
+ default_passage_val = ""
45
+ default_audio_gen_update = gr.update(interactive=False, visible=False)
46
+ default_audio_player_update = gr.update(value=None, visible=False)
47
+ if not LLM_API_CONFIGURED:
48
+ progress(1.0, desc="Complete")
49
+ return "LLM API key not configured...", default_audio_gen_update, default_audio_player_update
50
  try:
51
+ if grade_str.startswith("Grade "):
52
+ grade = int(grade_str.replace("Grade ", ""))
53
+ else:
54
+ grade = int(grade_str)
55
+ except ValueError:
56
+ progress(1.0, desc="Complete")
57
+ return "Invalid grade level selected.", default_audio_gen_update, default_audio_player_update
58
+ if grade <= 2: word_target, max_llm_tokens = "around 40-60 words", 100
59
+ elif grade <= 5: word_target, max_llm_tokens = "around 80-100 words", 200
60
+ elif grade <= 8: word_target, max_llm_tokens = "around 100-120 words", 250
61
+ else: word_target, max_llm_tokens = "around 120-150 words", 300
62
+
63
+ progress(0.1, desc="Setting up AI story generator...")
64
+ story_text_result = default_passage_val
65
+ audio_gen_btn_update = default_audio_gen_update
66
+ try:
67
+ model = genai.GenerativeModel(model_name="gemini-1.5-flash-latest")
68
  prompt = (
69
+ f"You are an AI assistant that creates engaging short reading passages. "
70
+ f"Generate a story of {word_target} suitable for a student named {name} in Grade {grade}. "
71
+ f"The story topic is: '{topic}'. Use age-appropriate vocabulary for Grade {grade}. Ensure the story is interesting and easy to read aloud. "
72
+ f"Do not include any introductory or concluding phrases like 'Here is a story'."
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  )
74
+ safety_settings = [{"category": c, "threshold": "BLOCK_MEDIUM_AND_ABOVE"} for c in [
75
+ "HARM_CATEGORY_HARASSMENT",
76
+ "HARM_CATEGORY_HATE_SPEECH",
77
+ "HARM_CATEGORY_SEXUALLY_EXPLICIT",
78
+ "HARM_CATEGORY_DANGEROUS_CONTENT"
79
+ ]]
80
+ generation_config = genai.types.GenerationConfig(candidate_count=1, max_output_tokens=max_llm_tokens, temperature=0.7)
81
+ progress(0.3, desc="AI is writing your story...")
82
+ response = model.generate_content(prompt, generation_config=generation_config, safety_settings=safety_settings)
83
+ progress(0.8, desc="Polishing your story...")
84
  if response.candidates and response.candidates[0].content.parts:
85
  story = response.text
86
  if response.prompt_feedback and response.prompt_feedback.block_reason:
87
+ story_text_result = f"Story idea for '{topic}' blocked (Reason: {response.prompt_feedback.block_reason}). Try a different topic. 😊"
88
+ elif not story.strip():
89
+ story_text_result = f"The LLM couldn't generate a story for '{topic}'. Try another topic or rephrase. ✨"
90
+ else:
91
+ story_text_result = story.strip()
92
+ audio_gen_btn_update = gr.update(interactive=True, visible=True)
93
  else:
94
  if response.prompt_feedback and response.prompt_feedback.block_reason:
95
+ story_text_result = f"Story idea for '{topic}' got blocked (Reason: {response.prompt_feedback.block_reason}). Try a different topic. 😊"
96
+ else:
97
+ story_text_result = "Hmm, LLM had trouble with that topic. Maybe try another one? πŸ€”"
98
+ progress(1.0, desc="Story complete!")
99
+ return story_text_result, audio_gen_btn_update, default_audio_player_update
100
  except Exception as e:
101
+ progress(1.0, desc="Complete")
102
+ return f"Oh no! 😟 Error generating story. Details: {e}", default_audio_gen_update, default_audio_player_update
103
+
104
+ def text_to_speech_using_space(text_to_speak, progress=gr.Progress(track_tqdm=True)):
105
+ global tts_client
106
+ progress(0.0, desc="πŸ”Š Preparing voice synthesis...")
107
+
108
+ if not text_to_speak or not text_to_speak.strip():
109
+ progress(1.0, desc="Complete")
110
+ return None
111
+
112
+ progress(0.1, desc="πŸ”Š Initializing audio generation...")
113
+
114
+ # Reconnect to TTS client if needed
115
  if not tts_client:
116
+ progress(0.2, desc="πŸ”— Connecting to advanced voice service...")
117
+ try:
118
+ tts_client = Client("NihalGazi/Text-To-Speech-Unlimited")
119
+ progress(0.3, desc="πŸ”— Connected to voice service...")
120
+ except Exception as e:
121
+ print(f"Failed to connect to TTS service: {e}")
122
+ progress(1.0, desc="Complete")
123
+ return None
124
+
125
+ if not tts_client:
126
+ progress(1.0, desc="Complete")
127
+ return None
128
+
129
+ progress(0.4, desc="πŸŽ™οΈ AI is reading your story aloud...")
130
  try:
131
+ # Try the correct API configuration with emotion parameter
132
+ api_methods = [
133
+ {"params": [text_to_speak, "alloy", "happy"], "api_name": "/text_to_speech_app"},
134
+ {"params": [text_to_speak, "alloy", "neutral"], "api_name": "/text_to_speech_app"},
135
+ {"params": [text_to_speak, "nova", "neutral"], "api_name": "/text_to_speech_app"},
136
+ {"params": [text_to_speak], "api_name": "/predict"}
137
+ ]
138
+
139
+ audio_filepath = None
140
+ for method in api_methods:
141
+ try:
142
+ print(f"Trying TTS with params: {method['params']} and api_name: {method['api_name']}")
143
+ audio_result = tts_client.predict(
144
+ *method["params"],
145
+ api_name=method["api_name"]
146
+ )
147
+ print(f"TTS result type: {type(audio_result)}, content: {audio_result}")
148
+
149
+ # Extract audio file path from result
150
+ if isinstance(audio_result, tuple) and len(audio_result) > 0:
151
+ audio_filepath = audio_result[0]
152
+ elif isinstance(audio_result, str) and audio_result.endswith(('.wav', '.mp3', '.flac', '.m4a')):
153
+ audio_filepath = audio_result
154
+ elif isinstance(audio_result, list) and len(audio_result) > 0:
155
+ audio_filepath = audio_result[0]
156
+
157
+ if audio_filepath:
158
+ print(f"Successfully generated audio: {audio_filepath}")
159
+ break
160
+
161
+ except Exception as method_error:
162
+ print(f"TTS method failed: {method_error}")
163
+ continue
164
+
165
+ if audio_filepath:
166
+ progress(0.9, desc="🎡 Voice generation complete!")
167
+ progress(1.0, desc="πŸ”Š Audio ready!")
168
+ print(f"FINAL: Returning audio file path: {audio_filepath}")
169
  return audio_filepath
170
  else:
171
+ print("All TTS methods failed, trying to reconnect...")
172
+ raise Exception("All API methods failed")
173
+
 
 
 
174
  except Exception as e:
175
+ print(f"TTS error: {e}")
176
+ # Try to reconnect on error
177
+ try:
178
+ progress(0.6, desc="πŸ”„ Reconnecting to voice service...")
179
+ tts_client = Client("NihalGazi/Text-To-Speech-Unlimited")
180
+ if tts_client:
181
+ progress(0.8, desc="πŸŽ™οΈ Retrying voice generation...")
182
+ # Try the most basic approach with emotion parameter
183
+ audio_result = tts_client.predict(
184
+ text_to_speak,
185
+ "alloy", # voice
186
+ "neutral", # emotion
187
+ api_name="/text_to_speech_app"
188
+ )
189
+ print(f"Retry result: {type(audio_result)}, {audio_result}")
190
+
191
+ audio_filepath = None
192
+ if isinstance(audio_result, tuple) and len(audio_result) > 0:
193
+ audio_filepath = audio_result[0]
194
+ elif isinstance(audio_result, str) and audio_result.endswith(('.wav', '.mp3', '.flac', '.m4a')):
195
+ audio_filepath = audio_result
196
+ elif isinstance(audio_result, list) and len(audio_result) > 0:
197
+ audio_filepath = audio_result[0]
198
+
199
+ if audio_filepath:
200
+ progress(1.0, desc="πŸ”Š Audio ready!")
201
+ print(f"RETRY SUCCESS: Returning audio file path: {audio_filepath}")
202
+ return audio_filepath
203
+
204
+ except Exception as retry_error:
205
+ print(f"TTS retry failed: {retry_error}")
206
+ pass
207
+
208
+ progress(1.0, desc="Audio generation failed")
209
+ print("TTS failed completely - returning None")
210
+ return None
211
+
212
+ def speech_to_text_whisper_space(audio_filepath, progress=gr.Progress(track_tqdm=True), max_retries=3):
213
+ progress(0.1, desc="Sending your reading for transcription...")
214
  if not whisper_stt_client:
215
+ progress(1.0, desc="Complete")
216
+ return "Speech-to-text service is not available. πŸ› οΈ"
217
  if not audio_filepath:
218
+ progress(1.0, desc="Complete")
219
+ return "No recording received for transcription. 🎀"
220
+ for attempt in range(max_retries):
221
+ try:
222
+ progress(0.2 + (attempt * 0.1), desc=f"Transcribing your voice (Whisper) - Attempt {attempt + 1}...")
223
+ result = whisper_stt_client.predict(audio_filepath, api_name="/predict")
224
+ progress(0.9, desc="Transcription complete.")
225
+ if isinstance(result, tuple) and len(result) > 0:
226
+ transcribed_text = result[0] if result[0] else ""
227
+ elif isinstance(result, list) and len(result) > 0:
228
+ transcribed_text = result[0] if result[0] else ""
229
+ elif isinstance(result, str):
230
+ transcribed_text = result
231
+ else:
232
+ progress(1.0, desc="Complete")
233
+ return "Hmm, STT service returned unexpected format. πŸ€”"
234
+ progress(1.0, desc="Transcription complete!")
235
+ return transcribed_text if transcribed_text else "No speech detected in the recording. 🀫"
236
+ except Exception:
237
+ continue
238
+ progress(1.0, desc="Complete")
239
+ return "Unexpected error during transcription. Please try again! πŸ”„"
 
240
 
241
  def clean_text_for_comparison(text):
242
  if not isinstance(text, str): return []
243
+ text = text.lower(); punctuation_to_remove = "!\"#$%&()*+,-./:;<=>?@[\\]^_`{|}~"
244
+ text = text.translate(str.maketrans('', '', punctuation_to_remove)); return text.split()
 
 
245
 
246
  def compare_texts_for_feedback(original_text, student_text):
247
+ original_words, student_words = clean_text_for_comparison(original_text), clean_text_for_comparison(student_text)
248
+ if not student_words: return "It sounds like you didn't record or it was very quiet! 🀫 Try recording again nice and clear!", ""
 
 
 
 
249
  matcher = difflib.SequenceMatcher(None, original_words, student_words, autojunk=False)
250
+ feedback_lines, highlighted_parts = [], []
251
+ word_diff_count = 0
252
+ pronunciation_tips = []
253
+ pronunciation_guide = {
254
+ 'the': 'thuh or thee', 'through': 'threw', 'though': 'thoh', 'thought': 'thawt',
255
+ 'knight': 'night', 'know': 'noh', 'write': 'right', 'wrong': 'rawng', 'what': 'wot',
256
+ 'where': 'wair', 'when': 'wen', 'why': 'wy', 'who': 'hoo', 'laugh': 'laff',
257
+ 'enough': 'ee-nuff', 'cough': 'koff', 'rough': 'ruff', 'tough': 'tuff', 'magic': 'maj-ik',
258
+ 'school': 'skool', 'friend': 'frend', 'said': 'sed', 'says': 'sez', 'once': 'wunts',
259
+ 'was': 'wuz', 'were': 'wur', 'you': 'yoo', 'your': 'yor', 'there': 'thair', 'their': 'thair', 'they': 'thay'
260
+ }
261
+ def get_pronunciation_tip(word):
262
+ word_lower = word.lower()
263
+ if word_lower in pronunciation_guide:
264
+ return f"πŸ—£οΈ Try saying: \"{pronunciation_guide[word_lower]}\""
265
+ elif len(word) > 6:
266
+ syllables = []
267
+ vowels = 'aeiou'
268
+ current_syllable = ''
269
+ for i, char in enumerate(word_lower):
270
+ current_syllable += char
271
+ if char in vowels and i < len(word_lower) - 1:
272
+ if word_lower[i + 1] not in vowels:
273
+ syllables.append(current_syllable)
274
+ current_syllable = ''
275
+ if current_syllable: syllables.append(current_syllable)
276
+ if len(syllables) > 1: return f"πŸ”€ Break it down: \"{'-'.join(syllables)}\""
277
+ if word_lower.endswith('tion'): return "πŸ—£οΈ Words ending in '-tion' sound like 'shun'"
278
+ elif word_lower.endswith('ough'): return "πŸ—£οΈ '-ough' can be tricky! Listen to the audio again"
279
+ elif 'gh' in word_lower: return "πŸ—£οΈ 'gh' is often silent or sounds like 'f'"
280
+ elif word_lower.startswith('wr'): return "πŸ—£οΈ In 'wr-' words, the 'w' is silent"
281
+ elif word_lower.startswith('kn'): return "πŸ—£οΈ In 'kn-' words, the 'k' is silent"
282
+ return f"🎯 Focus on each sound in \"{word}\""
283
  for tag, i1, i2, j1, j2 in matcher.get_opcodes():
284
+ orig_seg_words, stud_seg_words = original_words[i1:i2], student_words[j1:j2]
285
+ orig_seg_text, stud_seg_text = " ".join(orig_seg_words), " ".join(stud_seg_words)
 
286
  if tag == 'equal':
287
+ highlighted_parts.append(f'<span style="background: #90EE90; padding: 2px 4px; border-radius: 4px; margin: 1px;">{orig_seg_text}</span>')
288
+ else:
289
+ word_diff_count += max(len(orig_seg_words), len(stud_seg_words))
290
+ if tag == 'replace':
291
+ for orig_word, stud_word in zip(orig_seg_words, stud_seg_words):
292
+ if orig_word != stud_word:
293
+ tip = get_pronunciation_tip(orig_word)
294
+ pronunciation_tips.append(f"**{orig_word.upper()}**: {tip}")
295
+ feedback_lines.append(f"πŸ”„ Instead of: \"{orig_seg_text}\", you said: \"{stud_seg_text}\"")
296
+ highlighted_parts.append(f'<span style="background: #FFE4B5; padding: 2px 4px; border-radius: 4px; margin: 1px; text-decoration: line-through;">{orig_seg_text}</span> <span style="background: #FFB6C1; padding: 2px 4px; border-radius: 4px; margin: 1px; font-weight: bold;">{stud_seg_text}</span>')
297
+ elif tag == 'delete':
298
+ for missed_word in orig_seg_words:
299
+ tip = get_pronunciation_tip(missed_word)
300
+ pronunciation_tips.append(f"**{missed_word.upper()}** (missed): {tip}")
301
+ feedback_lines.append(f"⏭️ You missed: \"{orig_seg_text}\"")
302
+ highlighted_parts.append(f'<span style="background: #FFA0B4; padding: 2px 4px; border-radius: 4px; margin: 1px; text-decoration: line-through;">{orig_seg_text}</span> <span style="font-style: italic; color: #666;">(*skipped*)</span>')
303
+ elif tag == 'insert':
304
+ feedback_lines.append(f"βž• You added: \"{stud_seg_text}\" (not in original)")
305
+ highlighted_parts.append(f'<span style="background: #DDA0DD; padding: 2px 4px; border-radius: 4px; margin: 1px; font-style: italic;">(*added:* {stud_seg_text})</span>')
306
+ final_text = " ".join(highlighted_parts)
 
307
  if not feedback_lines:
308
+ feedback_html = """
309
+ πŸŽ‰πŸ₯³ **PERFECT READING!** πŸ₯³πŸŽ‰
310
+ Amazing! You read every single word correctly! 🌟
311
+ πŸ† **Reading Champion!** πŸ†
312
+ """
313
+ return feedback_html, final_text
314
  else:
315
+ feedback_parts = [
316
+ f"πŸ“ˆ **Reading Progress Report**",
317
+ f"πŸ“Š **Words to practice:** {word_diff_count}",
318
+ f"πŸ’ͺ **Keep improving!** Practice makes perfect!",
319
+ "",
320
+ "πŸ” **What to work on:**"
321
+ ]
322
+ for line in feedback_lines: feedback_parts.append(f"β€’ {line}")
323
+ if pronunciation_tips:
324
+ feedback_parts.extend([
325
+ "",
326
+ "🎀 **Pronunciation Helper**",
327
+ "Here's how to say the tricky words:"
328
+ ])
329
+ for tip in pronunciation_tips[:5]: feedback_parts.append(f"β€’ {tip}")
330
+ feedback_parts.extend([
331
+ "",
332
+ "πŸ’‘ **Pro tip:** Listen to the story audio again and pay special attention to these words!"
333
+ ])
334
+ feedback_parts.extend([
335
+ "",
336
+ "🎯 **Practice Suggestions**",
337
+ "β€’ 🎧 Listen to the AI reading first",
338
+ "β€’ πŸ”€ Practice saying difficult words slowly",
339
+ "β€’ πŸ“– Read the story again at your own pace",
340
+ "β€’ πŸ”„ Try recording again when you're ready!"
341
+ ])
342
+ feedback_html = "\n".join(feedback_parts)
343
+ return feedback_html, final_text
 
344
 
345
  def assess_student_reading_ui(original_passage_state, student_audio_path, progress=gr.Progress(track_tqdm=True)):
346
+ if not student_audio_path: return "🎀 Please record your reading first!", ""
347
+ if not original_passage_state: return "Hmm, the original story is missing. 😟 Please generate a story first.", ""
348
+ transcribed_text = speech_to_text_whisper_space(student_audio_path, progress=progress)
349
+ stt_errors = ["couldn't understand", "had trouble", "service isn't working", "service is busy", "didn't get any recording", "filepath type issue"]
350
+ if any(err in (transcribed_text or "").lower() for err in stt_errors): return transcribed_text, ""
351
+ progress(0.6, desc="Analyzing your reading accuracy..."); feedback, highlighted_passage = compare_texts_for_feedback(original_passage_state, transcribed_text)
352
+ progress(1.0, desc="Assessment complete!"); return feedback, highlighted_passage
353
+
 
 
 
 
 
 
 
 
 
 
 
 
354
  css = """
355
+ body, .gradio-container {
356
+ background: #f9fafb !important;
357
+ font-family: -apple-system, BlinkMacSystemFont, 'San Francisco', 'Segoe UI', 'Roboto', Arial, sans-serif !important;
358
+ }
359
+ .main-header {
360
+ background: white !important;
361
  border-radius: 20px !important;
362
+ box-shadow: 0 8px 32px 0 rgba(60,60,90,0.06) !important;
363
+ padding: 36px 20px 24px 20px !important;
364
+ margin-bottom: 28px !important;
365
+ text-align: center;
366
+ border: none !important;
367
  }
368
+ .main-header h1 {font-size: 2.2rem !important; font-weight: 700 !important; color: #23232b !important;}
369
+ .main-header p {color: #6b7280 !important; font-size: 1.08rem !important; margin-bottom: 8px !important;}
370
+ .tech-badge {background: #e0e7ef !important; color: #4f8fff !important; border-radius: 12px !important; padding: 4px 12px !important; font-size: 12px !important; font-weight: 600 !important;}
371
+ .gr-block, .gr-panel {background: white !important; border-radius: 18px !important; box-shadow: 0 2px 8px 0 rgba(60,60,90,0.07) !important; border: none !important; padding: 28px 22px !important;}
372
+ .section-header {background: transparent !important; border: none !important; padding: 0 !important; margin-bottom: 16px !important;}
373
+ .section-header h3 {color: #1e293b !important; font-size: 1.14rem !important; font-weight: 600 !important;}
374
+ .section-header p {color: #8691a2 !important; font-size: 13px !important;}
375
+
376
+ /* Enhanced button styles with click feedback */
377
+ .gr-button {
378
+ background: linear-gradient(90deg, #007AFF, #2689ff) !important;
379
+ color: white !important;
380
+ border-radius: 18px !important;
381
+ font-weight: 600 !important;
382
+ border: none !important;
383
+ box-shadow: 0 1px 4px rgba(0, 123, 255, 0.04) !important;
384
+ padding: 9px 22px !important;
385
+ font-size: 16px !important;
386
+ transition: all 0.15s cubic-bezier(0.4,0.0,0.2,1) !important;
387
+ transform: translateY(0) !important;
388
  }
389
+
390
+ .gr-button:hover {
391
+ background: linear-gradient(90deg, #2689ff, #007AFF) !important;
392
+ box-shadow: 0 4px 12px rgba(0, 123, 255, 0.15) !important;
393
+ transform: translateY(-1px) !important;
394
+ }
395
+
396
+ .gr-button:active {
397
+ background: linear-gradient(90deg, #0056CC, #1F5FFF) !important;
398
+ box-shadow: 0 1px 3px rgba(0, 123, 255, 0.25) !important;
399
+ transform: translateY(1px) !important;
400
+ transition: all 0.1s cubic-bezier(0.4,0.0,0.2,1) !important;
401
+ }
402
+
403
+ .gr-button[variant="secondary"] {
404
+ background: linear-gradient(90deg, #e0e7ef, #dde5f2) !important;
405
+ color: #2a3140 !important;
406
+ transition: all 0.15s cubic-bezier(0.4,0.0,0.2,1) !important;
407
+ transform: translateY(0) !important;
408
+ }
409
+
410
+ .gr-button[variant="secondary"]:hover {
411
+ background: linear-gradient(90deg, #dde5f2, #e0e7ef) !important;
412
+ box-shadow: 0 4px 12px rgba(0, 0, 0, 0.08) !important;
413
+ transform: translateY(-1px) !important;
414
+ }
415
+
416
+ .gr-button[variant="secondary"]:active {
417
+ background: linear-gradient(90deg, #d1d9e0, #c9d1db) !important;
418
+ box-shadow: 0 1px 3px rgba(0, 0, 0, 0.15) !important;
419
+ transform: translateY(1px) !important;
420
+ transition: all 0.1s cubic-bezier(0.4,0.0,0.2,1) !important;
421
+ }
422
+
423
+ /* Processing state for buttons */
424
+ .gr-button.processing {
425
+ background: linear-gradient(90deg, #94a3b8, #cbd5e1) !important;
426
+ color: #64748b !important;
427
+ cursor: wait !important;
428
+ transform: translateY(0) !important;
429
+ box-shadow: inset 0 2px 4px rgba(0, 0, 0, 0.1) !important;
430
+ }
431
+
432
+ label {color: #374151 !important; font-weight: 600 !important; font-size: 15px !important;}
433
+ .gr-textbox, .gr-dropdown {border-radius: 12px !important; border: 1.5px solid #dbeafe !important; background: #f6f8fb !important; font-size: 16px !important; padding: 10px 14px !important;}
434
+ .gr-textbox:focus, .gr-dropdown:focus {border-color: #007AFF !important; box-shadow: 0 0 0 2px rgba(0, 122, 255, 0.10) !important; outline: none !important;}
435
+ .gr-audio {background: #f9fafb !important; border-radius: 16px !important; border: 1.5px solid #e5e7eb !important; padding: 18px !important;}
436
+ .feedback-container {background: #f4f7fa !important; border-radius: 18px !important; padding: 18px 24px !important;}
437
+
438
+ /* Spinner animation for progress indicators */
439
+ @keyframes spin {
440
+ 0% { transform: rotate(0deg); }
441
+ 100% { transform: rotate(360deg); }
442
+ }
443
+
444
+ /* Pulse animation for loading states */
445
+ @keyframes pulse {
446
+ 0%, 100% { opacity: 1; }
447
+ 50% { opacity: 0.7; }
448
+ }
449
+
450
+ .loading-pulse {
451
+ animation: pulse 1.5s ease-in-out infinite;
452
  }
453
  """
454
 
455
+ with gr.Blocks(theme=gr.themes.Soft(), css=css, title="ReadRight") as app:
456
+ gr.Markdown("""
457
+ <div class="main-header">
458
+ <h1>πŸ“š ReadRight</h1>
459
+ <p>AI-powered reading practice and pronunciation feedback for students</p>
460
+ </div>
461
+ """)
462
+
463
+ original_passage_state = gr.State("")
464
+
465
+ with gr.Tabs():
466
+ with gr.TabItem("πŸ“– Practice & Generate", elem_id="main_tab"):
467
+ with gr.Row(equal_height=True):
468
+ with gr.Column(scale=1, variant="panel"):
469
+ gr.Markdown("""
470
+ <div class="section-header">
471
+ <h3>πŸ“ Story & Reading</h3>
472
+ <p>Enter details, get your story, generate audio, and record yourselfβ€”all in one flow.</p>
473
+ </div>
474
+ """)
475
+ s_name = gr.Textbox(label="πŸ‘€ Your Name", placeholder="Enter your name")
476
+ s_grade = gr.Dropdown(label="πŸŽ“ Grade Level", choices=[f"Grade {i}" for i in range(1, 11)], value="Grade 3")
477
+ s_topic = gr.Textbox(label="πŸ’‘ Story Topic", placeholder="E.g., space, animals, friendship")
478
+ gen_btn = gr.Button("✨ Generate Story", variant="primary")
479
+ passage_out = gr.Textbox(label="πŸ“– Story", lines=8, interactive=False, placeholder="Your story appears here...")
480
+ audio_out = gr.Audio(label="🎡 Story Audio", type="filepath", visible=True, autoplay=False)
481
+ gr.Markdown("""
482
+ <div style="margin: 20px 0 0 0; padding: 10px 20px; background: #f4f7fa; border-radius: 16px;">
483
+ <b>➑️ Next:</b> Record yourself reading below for feedback.
484
+ </div>
485
+ """)
486
+ stud_audio_in = gr.Audio(sources=["microphone"], type="filepath", label="🎀 Your Recording")
487
+ record_again_btn = gr.Button("πŸ”„ Record Again", variant="secondary", size="sm", visible=False)
488
+ clear_recording_btn = gr.Button("πŸ—‘οΈ Clear", variant="secondary", size="sm", visible=False)
489
+ assess_btn = gr.Button("πŸ” Analyze Reading", variant="primary", size="lg", interactive=False)
490
+ recording_status = gr.Markdown("", elem_id="recording_status")
491
+ analysis_status = gr.Markdown("", elem_id="analysis_status")
492
+ with gr.TabItem("πŸ“Š Analysis & Feedback", elem_id="analysis_tab"):
493
+ gr.Markdown("""
494
+ <div class="section-header">
495
+ <h3>πŸ“ˆ Analysis</h3>
496
+ <p>See your performance and areas to improve</p>
497
+ </div>
498
+ """)
499
+ feedback_out = gr.Markdown(
500
+ value="""
501
+ <div style="text-align: center; color: #6b7280;">
502
+ <h4>Analysis Results</h4>
503
+ <p>Your feedback will appear here.</p>
504
+ </div>
505
+ """,
506
+ elem_id="feedback_output"
507
+ )
508
+ highlighted_out = gr.Markdown(
509
+ value="""
510
+ <div style="text-align: center; color: #6b7280;">
511
+ <h4>Word-by-Word Analysis</h4>
512
+ <p>Get color-coded feedback below.</p>
513
+ </div>
514
+ """,
515
+ elem_id="highlighted_passage_output"
516
+ )
517
+ gr.Markdown("""
518
+ <div style="background: #f7fafc; border-radius: 16px; padding: 16px 12px 10px 12px; margin: 22px 0 18px 0; box-shadow: 0 2px 6px 0 rgba(60,60,90,0.04);">
519
+ <b>Color code:</b>
520
+ <div style="display: flex; gap: 14px; flex-wrap: wrap; margin-top: 8px;">
521
+ <span style="background: #90EE90; padding: 5px 14px; border-radius: 12px; font-size: 13px; font-weight: 500; color: #155724;">Perfect Match</span>
522
+ <span style="background: #FFE4B5; padding: 5px 14px; border-radius: 12px; font-size: 13px; font-weight: 500; color: #856404;">Substitution</span>
523
+ <span style="background: #FFA0B4; padding: 5px 14px; border-radius: 12px; font-size: 13px; font-weight: 500; color: #721c24;">Skipped Word</span>
524
+ <span style="background: #DDA0DD; padding: 5px 14px; border-radius: 12px; font-size: 13px; font-weight: 500; color: #5f006a;">Extra Word</span>
525
+ </div>
526
+ </div>
527
+ """)
528
+ gr.Markdown("""
529
+ <div style="margin: 14px 0; padding: 14px 22px; background: #f8fafc; border-radius: 14px;">
530
+ <span style="color: #0a58ca; font-weight: 500;">Goals:</span>
531
+ <ul style="margin: 7px 0 0 18px; color: #6b7280;">
532
+ <li>Word accuracy above 90%</li>
533
+ <li>Speak clearly and with confidence</li>
534
+ <li>Practice as much as you like</li>
535
+ </ul>
536
+ </div>
537
+ """)
538
+
539
+ with gr.TabItem("ℹ️ About & How It Works", elem_id="about_tab"):
540
+ gr.Markdown("""
541
+ <div class="section-header">
542
+ <h3>πŸ”§ How ReadRight Works</h3>
543
+ <p>Understanding the technology behind your ReadRight</p>
544
+ </div>
545
+ """)
546
+
547
+ gr.Markdown("""
548
+ ## 🎯 What This Platform Does
549
+
550
+ ReadRight is an AI-powered tool designed to help students improve their reading skills through:
551
+
552
+ - **✨ Personalized Story Generation**: Creates age-appropriate reading passages tailored to your grade level and interests
553
+ - **πŸ”Š Audio Pronunciation Models**: Provides clear audio examples of proper pronunciation
554
+ - **⚑ Real-time Speech Analysis**: Analyzes your reading accuracy and identifies areas for improvement
555
+ - **🎯 Detailed Feedback**: Offers specific pronunciation tips and practice suggestions
556
+
557
+ ## πŸ—οΈ Reading Practice Application Workflow
558
+ """)
559
+
560
+ # Use HTML component for the SVG
561
+ gr.HTML("""
562
+ <div style="width: 100%; overflow-x: auto; padding: 20px 0;">
563
+ <svg width="1400" height="700" xmlns="http://www.w3.org/2000/svg" style="max-width: 100%; height: auto;">
564
+ <!-- Background -->
565
+ <rect width="1400" height="600" fill="#fafafa"/>
566
+
567
+ <!-- Title -->
568
+ <text x="700" y="30" text-anchor="middle" font-size="24" font-weight="bold" fill="#1f2937">Reading Practice Application Workflow</text>
569
+
570
+ <!-- Top Row - Input to Audio -->
571
+ <rect x="100" y="80" width="200" height="100" rx="20" fill="#dbeafe" stroke="#2563eb" stroke-width="3"/>
572
+ <text x="200" y="115" text-anchor="middle" font-size="18" font-weight="bold" fill="#1e40af">User Input</text>
573
+ <text x="200" y="140" text-anchor="middle" font-size="14" fill="#3730a3">Student Name</text>
574
+ <text x="200" y="160" text-anchor="middle" font-size="14" fill="#3730a3">Grade Level & Topic</text>
575
+
576
+ <!-- Arrow 1 -->
577
+ <path d="M300 130 L380 130" stroke="#6b7280" stroke-width="3" marker-end="url(#arrowhead)" fill="none"/>
578
+ <text x="340" y="120" text-anchor="middle" font-size="12" font-weight="bold" fill="#6b7280">INPUT</text>
579
+
580
+ <rect x="380" y="80" width="200" height="100" rx="20" fill="#dcfce7" stroke="#16a34a" stroke-width="3"/>
581
+ <text x="480" y="115" text-anchor="middle" font-size="18" font-weight="bold" fill="#15803d">Story Generator</text>
582
+ <text x="480" y="140" text-anchor="middle" font-size="14" fill="#166534">AI creates personalized</text>
583
+ <text x="480" y="160" text-anchor="middle" font-size="14" fill="#166534">reading story</text>
584
+
585
+ <!-- Arrow 2 -->
586
+ <path d="M580 130 L660 130" stroke="#6b7280" stroke-width="3" marker-end="url(#arrowhead)" fill="none"/>
587
+ <text x="620" y="120" text-anchor="middle" font-size="12" font-weight="bold" fill="#6b7280">STORY</text>
588
+
589
+ <rect x="660" y="80" width="200" height="100" rx="20" fill="#fef3c7" stroke="#d97706" stroke-width="3"/>
590
+ <text x="760" y="115" text-anchor="middle" font-size="18" font-weight="bold" fill="#b45309">Audio Synthesis</text>
591
+ <text x="760" y="140" text-anchor="middle" font-size="14" fill="#92400e">Text-to-Speech</text>
592
+ <text x="760" y="160" text-anchor="middle" font-size="14" fill="#92400e">Audio Generation</text>
593
+
594
+ <!-- Arrow 3 -->
595
+ <path d="M860 130 L960 130" stroke="#6b7280" stroke-width="3" marker-end="url(#arrowhead)" fill="none"/>
596
+ <text x="910" y="120" text-anchor="middle" font-size="12" font-weight="bold" fill="#6b7280">AUDIO</text>
597
+
598
+ <rect x="960" y="80" width="200" height="100" rx="20" fill="#f3e8ff" stroke="#9333ea" stroke-width="3"/>
599
+ <text x="1060" y="115" text-anchor="middle" font-size="18" font-weight="bold" fill="#7c3aed">Text Comparison</text>
600
+ <text x="1060" y="140" text-anchor="middle" font-size="14" fill="#6b21a8">Analysis Engine</text>
601
+ <text x="1060" y="160" text-anchor="middle" font-size="14" fill="#6b21a8">Accuracy Detection</text>
602
+
603
+ <!-- Vertical Flow Arrow (Audio to Student Recording) -->
604
+ <path d="M760 180 L760 250" stroke="#6b7280" stroke-width="3" marker-end="url(#arrowhead)" fill="none"/>
605
+ <text x="790" y="220" text-anchor="middle" font-size="12" font-weight="bold" fill="#6b7280">STUDENT LISTENS</text>
606
+
607
+ <!-- Bottom Row - Student Practice to Feedback -->
608
+ <rect x="660" y="250" width="200" height="100" rx="20" fill="#fce7f3" stroke="#ec4899" stroke-width="3"/>
609
+ <text x="760" y="285" text-anchor="middle" font-size="18" font-weight="bold" fill="#be185d">Student Recording</text>
610
+ <text x="760" y="310" text-anchor="middle" font-size="14" fill="#9d174d">Student reads</text>
611
+ <text x="760" y="330" text-anchor="middle" font-size="14" fill="#9d174d">story aloud</text>
612
+
613
+ <!-- Arrow 4 (Student Recording to Speech Recognition) -->
614
+ <path d="M660 300 L580 300" stroke="#6b7280" stroke-width="3" marker-end="url(#arrowhead)" fill="none"/>
615
+ <text x="620" y="290" text-anchor="middle" font-size="12" font-weight="bold" fill="#6b7280">RECORDING</text>
616
+
617
+ <rect x="380" y="250" width="200" height="100" rx="20" fill="#e0e7ff" stroke="#6366f1" stroke-width="3"/>
618
+ <text x="480" y="285" text-anchor="middle" font-size="18" font-weight="bold" fill="#4338ca">Speech Recognition</text>
619
+ <text x="480" y="310" text-anchor="middle" font-size="14" fill="#3730a3">Speech-to-Text</text>
620
+ <text x="480" y="330" text-anchor="middle" font-size="14" fill="#3730a3">Transcription</text>
621
+
622
+ <!-- Arrow 5 (Speech Recognition to Feedback) -->
623
+ <path d="M380 300 L300 300" stroke="#6b7280" stroke-width="3" marker-end="url(#arrowhead)" fill="none"/>
624
+ <text x="340" y="290" text-anchor="middle" font-size="12" font-weight="bold" fill="#6b7280">TRANSCRIPT</text>
625
+
626
+ <rect x="100" y="250" width="200" height="100" rx="20" fill="#fef2f2" stroke="#ef4444" stroke-width="3"/>
627
+ <text x="200" y="285" text-anchor="middle" font-size="18" font-weight="bold" fill="#dc2626">Feedback System</text>
628
+ <text x="200" y="310" text-anchor="middle" font-size="14" fill="#b91c1c">Performance Analysis</text>
629
+ <text x="200" y="330" text-anchor="middle" font-size="14" fill="#b91c1c">Improvement Tips</text>
630
+
631
+ <!-- Arrow from Feedback to Report -->
632
+ <path d="M200 350 L200 450" stroke="#6b7280" stroke-width="3" marker-end="url(#arrowhead)" fill="none"/>
633
+ <text x="230" y="400" text-anchor="middle" font-size="12" font-weight="bold" fill="#6b7280">RESULTS</text>
634
+
635
+ <!-- Output Box -->
636
+ <rect x="100" y="450" width="200" height="80" rx="20" fill="#f0fdf4" stroke="#22c55e" stroke-width="3"/>
637
+ <text x="200" y="480" text-anchor="middle" font-size="18" font-weight="bold" fill="#16a34a">Student Report</text>
638
+ <text x="200" y="505" text-anchor="middle" font-size="14" fill="#15803d">Reading accuracy</text>
639
+ <text x="200" y="520" text-anchor="middle" font-size="14" fill="#15803d">& improvement areas</text>
640
+
641
+ <!-- Process Flow Indicators -->
642
+ <circle cx="760" cy="400" r="8" fill="#3b82f6"/>
643
+ <text x="780" y="370" font-size="12" font-weight="bold" fill="#3b82f6">ACTIVE LEARNING</text>
644
+ <text x="780" y="385" font-size="10" fill="#3b82f6">Student practices reading</text>
645
+ <text x="780" y="415" font-size="12" font-weight="bold" fill="#3b82f6">AI ASSESSMENT</text>
646
+ <text x="780" y="430" font-size="10" fill="#3b82f6">Real-time analysis & feedback</text>
647
+
648
+ <!-- Arrowhead Definition -->
649
+ <defs>
650
+ <marker id="arrowhead" markerWidth="12" markerHeight="7" refX="10" refY="3.5" orient="auto">
651
+ <polygon points="0 0, 12 3.5, 0 7" fill="#6b7280"/>
652
+ </marker>
653
+ </defs>
654
+ </svg>
655
+ </div>
656
+ """)
657
+
658
+ gr.Markdown("""
659
+ ---
660
+
661
+ ## πŸ”§ Key Components
662
+
663
+ - **User Input (UI Agent)**: Collects student details (name, grade, topic) via an intuitive interface.
664
+ - **Story Generator (LLM Agent)**: Utilizes advanced language models to craft personalized, engaging stories.
665
+ - **Audio Synthesis (TTS Agent)**: Converts text stories into natural-sounding speech for accurate pronunciation guidance.
666
+ - **Student Recording (Recording Agent)**: Captures student readings for analysis.
667
+ - **Speech Recognition (STT Agent)**: Transcribes recorded readings into text for comparison.
668
+ - **Text Comparison (Analysis Agent)**: Analyzes transcription accuracy, comparing student readings to the original text.
669
+ - **Feedback Generation (Feedback Agent)**: Creates detailed feedback reports, highlighting strengths and areas for improvement.
670
+
671
+ """)
672
+
673
+ gr.Markdown("""
674
+ <div style="text-align: center; margin-top: 30px; padding: 20px; background: white; border-radius: 12px; font-size: 0.96em; color: #6b7280;">
675
+ Built for reading practice with modern AI tools.
676
+ </div>
677
+ """)
678
+
679
+ def generate_story_and_setup_ui(name, grade, topic):
680
+ story_text, audio_btn_update, audio_player_update, passage_state = "", gr.update(interactive=False, visible=False), gr.update(value=None, visible=False), ""
681
+ res = generate_story_from_llm(name, grade, topic)
682
+ if res:
683
+ story_text, audio_btn_update, audio_player_update = res
684
+ if story_text and not any(err in story_text.lower() for err in ["error", "blocked", "couldn't", "api key not configured"]):
685
+ passage_state = story_text
686
+ return story_text, audio_btn_update, audio_player_update, passage_state
687
+
688
+ def assess_reading_with_analysis(original_passage_state, student_audio_path, progress=gr.Progress(track_tqdm=True)):
689
+ if not student_audio_path:
690
+ return (
691
+ """
692
+ <div class="status-indicator">
693
+ <p style="margin: 0; font-weight: 500;">🎀 Please record your reading first!</p>
694
+ </div>
695
+ """,
696
+ "🎀 Please record your reading first!",
697
+ ""
698
+ )
699
+ if not original_passage_state:
700
+ return (
701
+ """
702
+ <div class="status-indicator">
703
+ <p style="margin: 0; font-weight: 500;">πŸ“š Please generate a story first in the Story Creator tab.</p>
704
+ </div>
705
+ """,
706
+ "Please generate a story first in the Story Creator tab.",
707
+ ""
708
+ )
709
+
710
+ # Immediate feedback that analysis is starting
711
+ progress(0.05, desc="Analysis starting...")
712
+
713
+ # Start transcription
714
+ progress(0.1, desc="Starting transcription...")
715
+ transcribed_text = speech_to_text_whisper_space(student_audio_path, progress=progress)
716
+
717
+ stt_errors = ["couldn't understand", "had trouble", "service isn't working", "service is busy", "didn't get any recording", "filepath type issue"]
718
+ if any(err in (transcribed_text or "").lower() for err in stt_errors):
719
+ return (
720
+ """
721
+ <div class="status-indicator status-error">
722
+ <p style="margin: 0; font-weight: 500;">❌ Transcription Error</p>
723
+ <p style="margin: 5px 0 0 0; font-size: 13px;">Please try recording again</p>
724
+ </div>
725
+ """,
726
+ transcribed_text,
727
+ ""
728
+ )
729
+
730
+ progress(0.6, desc="Analyzing your reading accuracy...")
731
+ feedback, highlighted_passage = compare_texts_for_feedback(original_passage_state, transcribed_text)
732
+ progress(1.0, desc="Assessment complete!")
733
+
734
+ analysis_msg = """
735
+ <div class="status-indicator status-success">
736
+ <p style="margin: 0; font-weight: 500;">βœ… Analysis Complete!</p>
737
+ <p style="margin: 5px 0 0 0; font-size: 13px;">Head over to the "Analysis & Feedback" tab to see your results! 🎯</p>
738
  </div>
739
  """
740
+ return (analysis_msg, feedback, highlighted_passage)
741
+
742
+ def update_recording_status(audio_file):
743
+ if audio_file is not None:
744
+ return (
745
+ gr.update(value="""
746
+ <div class="status-indicator status-success">
747
+ <p style="margin: 0; font-weight: 500;">πŸŽ‰ Recording Complete!</p>
748
+ <p style="margin: 5px 0 0 0; font-size: 12px;">Ready for analysis</p>
749
+ </div>
750
+ """),
751
+ gr.update(visible=True),
752
+ gr.update(visible=True),
753
+ gr.update(interactive=True)
754
+ )
755
+ else:
756
+ return (
757
+ gr.update(value="""
758
+ <div class="status-indicator">
759
+ <p style="margin: 0; font-weight: 500;">🎀 Ready to Record</p>
760
+ <p style="margin: 5px 0 0 0; font-size: 12px;">Click microphone to start</p>
761
+ </div>
762
+ """),
763
+ gr.update(visible=False),
764
+ gr.update(visible=False),
765
+ gr.update(interactive=False)
766
+ )
767
+
768
+ def clear_recording():
769
+ return (
770
+ None,
771
+ gr.update(value="""
772
+ <div class="status-indicator">
773
+ <p style="margin: 0; font-weight: 500;">🎀 Ready to Record</p>
774
+ <p style="margin: 5px 0 0 0; font-size: 12px;">Click microphone to start</p>
775
+ </div>
776
+ """),
777
+ gr.update(visible=False),
778
+ gr.update(visible=False),
779
+ gr.update(interactive=False),
780
+ """
781
+ <div style="text-align: center; color: #6b7280;">
782
+ <h4>Analysis Results</h4>
783
+ <p>Your feedback will appear here.</p>
784
+ <div class="status-indicator">
785
+ <p style="margin: 0; font-size: 14px;">πŸ’‘ Record yourself reading to get started!</p>
786
+ </div>
787
+ </div>
788
+ """,
789
+ """
790
+ <div style="text-align: center; color: #6b7280;">
791
+ <h4>Word-by-Word Analysis</h4>
792
+ <p>Get color-coded feedback below.</p>
793
+ <div class="status-indicator">
794
+ <p style="margin: 0; font-size: 14px;">🎀 Complete a reading practice session to see your analysis!</p>
795
+ </div>
796
+ </div>
797
+ """
798
+ )
799
 
800
+ def record_again_action():
801
+ return (
802
+ None,
803
+ gr.update(value="""
804
+ <div class="status-indicator status-warning">
805
+ <p style="margin: 0; font-weight: 500;">πŸ”„ Ready for Take 2!</p>
806
+ <p style="margin: 5px 0 0 0; font-size: 12px;">Click microphone to record again</p>
807
+ </div>
808
+ """),
809
+ gr.update(visible=False),
810
+ gr.update(visible=False),
811
+ gr.update(interactive=False)
812
+ )
813
 
814
+ def handle_audio_generation(story_text, progress=gr.Progress(track_tqdm=True)):
815
+ """Handle audio generation with visual progress indicator"""
816
+ if not story_text or not story_text.strip():
817
+ return (
818
+ gr.update(value=None, visible=True),
819
+ gr.update(value="", visible=False)
 
 
820
  )
821
+
822
+ # Generate the audio file
823
+ audio_filepath = text_to_speech_using_space(story_text, progress)
824
+ print(f"AUDIO HANDLER: Received audio file path: {audio_filepath}")
825
+
826
+ if audio_filepath:
827
+ print(f"AUDIO HANDLER: Updating audio component with file: {audio_filepath}")
828
+ success_msg = """
829
+ <div style="background: #f0fdf4; border: 1px solid #22c55e; border-radius: 12px; padding: 12px 20px; margin: 8px 0; text-align: center;">
830
+ <span style="color: #15803d; font-weight: 500;">βœ… Audio ready! You can now listen to your story.</span>
831
+ </div>
832
+ """
833
+ return (
834
+ gr.update(value=audio_filepath, visible=True),
835
+ gr.update(value=success_msg, visible=True)
836
+ )
837
+ else:
838
+ print("AUDIO HANDLER: No audio file received, returning None")
839
+ error_msg = """
840
+ <div style="background: #fef2f2; border: 1px solid #ef4444; border-radius: 12px; padding: 12px 20px; margin: 8px 0; text-align: center;">
841
+ <span style="color: #dc2626; font-weight: 500;">❌ Audio generation failed. Please try again.</span>
842
+ </div>
843
+ """
844
+ return (
845
+ gr.update(value=None, visible=True),
846
+ gr.update(value=error_msg, visible=True)
847
+ )
848
+
849
+ def generate_story_and_audio_automatically(name, grade, topic, progress=gr.Progress(track_tqdm=True)):
850
+ """Generate story and automatically create audio in one seamless flow"""
851
+ progress(0.0, desc="Starting story creation...")
852
+
853
+ # First generate the story using the existing function
854
+ story_result = generate_story_from_llm(name, grade, topic, progress)
855
+ if not story_result:
856
+ return "", gr.update(value=None, visible=True), ""
857
+
858
+ # Extract story text from the result tuple
859
+ story_text = story_result[0] if isinstance(story_result, tuple) else story_result
860
+
861
+ # Check if story generation was successful
862
+ if not story_text or any(err in story_text.lower() for err in ["error", "blocked", "couldn't", "api key not configured"]):
863
+ return story_text, gr.update(value=None, visible=True), ""
864
+
865
+ # Story generated successfully, now automatically generate audio
866
+ progress(0.5, desc="Story complete! Now generating audio...")
867
+
868
+ try:
869
+ # Generate audio automatically
870
+ audio_filepath = text_to_speech_using_space(story_text, progress)
871
+
872
+ if audio_filepath:
873
+ print(f"AUTO AUDIO: Successfully generated audio: {audio_filepath}")
874
+ return story_text, gr.update(value=audio_filepath, visible=True), story_text
875
+ else:
876
+ print("AUTO AUDIO: Audio generation failed, but story is still available")
877
+ return story_text, gr.update(value=None, visible=True), story_text
878
+
879
+ except Exception as e:
880
+ print(f"AUTO AUDIO ERROR: {e}")
881
+ return story_text, gr.update(value=None, visible=True), story_text
882
+
883
+ # Event handlers with automatic audio generation
884
+ gen_btn.click(
885
+ fn=generate_story_and_audio_automatically,
886
+ inputs=[s_name, s_grade, s_topic],
887
+ outputs=[passage_out, audio_out, original_passage_state],
888
+ show_progress=True
889
  )
890
+
891
+ assess_btn.click(
892
+ fn=assess_reading_with_analysis,
893
+ inputs=[original_passage_state, stud_audio_in],
894
+ outputs=[analysis_status, feedback_out, highlighted_out],
895
+ show_progress=True
896
  )
897
+
898
+ stud_audio_in.change(
899
+ fn=update_recording_status,
900
+ inputs=[stud_audio_in],
901
+ outputs=[recording_status, record_again_btn, clear_recording_btn, assess_btn]
902
+ )
903
+
904
+ record_again_btn.click(
905
+ fn=record_again_action,
906
+ outputs=[stud_audio_in, recording_status, record_again_btn, clear_recording_btn, assess_btn]
907
+ )
908
+
909
+ clear_recording_btn.click(
910
+ fn=clear_recording,
911
+ outputs=[stud_audio_in, recording_status, record_again_btn, clear_recording_btn, assess_btn, feedback_out, highlighted_out]
912
  )
913
 
914
+ # Launch the application
915
  if __name__ == "__main__":
916
+ app.launch(debug=True, share=False)