Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,385 +1,916 @@
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import os
|
| 3 |
import difflib
|
| 4 |
-
from gradio_client import Client
|
| 5 |
import time
|
| 6 |
import google.generativeai as genai
|
| 7 |
|
| 8 |
# --- Configuration & Clients ---
|
| 9 |
|
| 10 |
-
def
|
| 11 |
-
"""Configures the Google Gemini API with API key from Secrets or environment."""
|
| 12 |
api_key = None
|
| 13 |
try:
|
| 14 |
-
api_key = gr.Secrets.get("GOOGLE_API_KEY")
|
| 15 |
-
except AttributeError
|
| 16 |
-
api_key = os.environ.get("GOOGLE_API_KEY")
|
| 17 |
-
except FileNotFoundError: # gr.Secrets.get can raise this if no secrets file found
|
| 18 |
api_key = os.environ.get("GOOGLE_API_KEY")
|
| 19 |
-
|
| 20 |
if api_key:
|
| 21 |
try:
|
| 22 |
genai.configure(api_key=api_key)
|
| 23 |
-
print("Google Gemini API configured successfully.")
|
| 24 |
return True
|
| 25 |
except Exception as e:
|
| 26 |
-
print(f"Error configuring Gemini API: {e}")
|
| 27 |
-
return False
|
| 28 |
else:
|
| 29 |
-
print("WARN: GOOGLE_API_KEY not found
|
| 30 |
-
|
| 31 |
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
# Initialize TTS Client (Using ESPnet VITS as an alternative to Bark)
|
| 35 |
try:
|
| 36 |
-
tts_client = Client("
|
| 37 |
-
print("
|
| 38 |
-
# --- IMPORTANT: For Debugging VITS API if issues persist ---
|
| 39 |
-
# print("--- ESPnet VITS TTS API Details (Uncomment to view) ---")
|
| 40 |
-
# print(tts_client.view_api(all_endpoints=True))
|
| 41 |
-
# print("----------------------------------------------------")
|
| 42 |
-
# For a more structured dictionary output:
|
| 43 |
-
# api_info_tts = tts_client.view_api(return_format="dict")
|
| 44 |
-
# import json
|
| 45 |
-
# print(json.dumps(api_info_tts, indent=2))
|
| 46 |
-
# --- End Debugging Section ---
|
| 47 |
except Exception as e:
|
| 48 |
-
print(f"
|
| 49 |
tts_client = None
|
| 50 |
|
| 51 |
-
# Initialize STT Client for Whisper (abidlabs/whisper-large-v2)
|
| 52 |
try:
|
| 53 |
whisper_stt_client = Client("abidlabs/whisper-large-v2")
|
| 54 |
-
|
| 55 |
-
# --- For Debugging Whisper API ---
|
| 56 |
-
# print("--- Whisper STT API Details (Uncomment to view) ---")
|
| 57 |
-
# print(whisper_stt_client.view_api(all_endpoints=True))
|
| 58 |
-
# print("-------------------------------------------------")
|
| 59 |
-
except Exception as e:
|
| 60 |
-
print(f"Fatal: Could not initialize Whisper STT client: {e}. STT will not work.")
|
| 61 |
-
whisper_stt_client = None
|
| 62 |
|
| 63 |
# --- Helper Functions ---
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
try:
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
prompt = (
|
| 71 |
-
f"You are
|
| 72 |
-
f"
|
| 73 |
-
f"The story
|
| 74 |
-
f"
|
| 75 |
-
f"Make the story engaging and positive. Jump right into the story without any introduction like 'Here is a story for you'."
|
| 76 |
-
)
|
| 77 |
-
safety_settings = [
|
| 78 |
-
{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
|
| 79 |
-
{"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
|
| 80 |
-
{"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
|
| 81 |
-
{"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
|
| 82 |
-
]
|
| 83 |
-
generation_config = genai.types.GenerationConfig(
|
| 84 |
-
candidate_count=1, max_output_tokens=300, temperature=0.75
|
| 85 |
-
)
|
| 86 |
-
response = model.generate_content(
|
| 87 |
-
prompt, generation_config=generation_config, safety_settings=safety_settings
|
| 88 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
if response.candidates and response.candidates[0].content.parts:
|
| 90 |
story = response.text
|
| 91 |
if response.prompt_feedback and response.prompt_feedback.block_reason:
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
|
|
|
|
|
|
| 96 |
else:
|
| 97 |
if response.prompt_feedback and response.prompt_feedback.block_reason:
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
|
|
|
|
|
|
| 101 |
except Exception as e:
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
if not tts_client:
|
| 109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
try:
|
| 111 |
-
#
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
text_to_speak,
|
| 116 |
-
"
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
return audio_filepath
|
| 130 |
else:
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
if isinstance(audio_filepath, tuple) and len(audio_filepath) > 0 and isinstance(audio_filepath[0], str):
|
| 135 |
-
return audio_filepath[0] # Assume audio path is the first element if it's a tuple
|
| 136 |
-
return "Hmm, the sound from VITS came out a bit funny. π€"
|
| 137 |
except Exception as e:
|
| 138 |
-
print(f"
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
if not whisper_stt_client:
|
| 149 |
-
|
|
|
|
| 150 |
if not audio_filepath:
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
"
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
return
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
return f"Oh no! Whisper had trouble hearing that. π Error: {e}"
|
| 174 |
|
| 175 |
def clean_text_for_comparison(text):
|
| 176 |
if not isinstance(text, str): return []
|
| 177 |
-
text = text.lower()
|
| 178 |
-
|
| 179 |
-
text = text.translate(str.maketrans('', '', punctuation_to_remove))
|
| 180 |
-
return text.split()
|
| 181 |
|
| 182 |
def compare_texts_for_feedback(original_text, student_text):
|
| 183 |
-
original_words = clean_text_for_comparison(original_text)
|
| 184 |
-
student_words
|
| 185 |
-
|
| 186 |
-
if not student_words:
|
| 187 |
-
return "It sounds like you didn't record anything, or maybe it was super quiet! π€« Try recording again nice and clear!", ""
|
| 188 |
-
|
| 189 |
matcher = difflib.SequenceMatcher(None, original_words, student_words, autojunk=False)
|
| 190 |
-
feedback_lines = []
|
| 191 |
-
|
| 192 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
for tag, i1, i2, j1, j2 in matcher.get_opcodes():
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
if tag == 'equal':
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
if
|
| 202 |
-
for
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
if not feedback_lines:
|
| 220 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 221 |
else:
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
)
|
| 252 |
|
| 253 |
def assess_student_reading_ui(original_passage_state, student_audio_path, progress=gr.Progress(track_tqdm=True)):
|
| 254 |
-
if not student_audio_path:
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
progress(0.
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
"couldn't understand the words", "had trouble hearing that", "listening ears aren't working",
|
| 263 |
-
"listening ears are super busy", "didn't get any recording"
|
| 264 |
-
]
|
| 265 |
-
if any(err in (transcribed_text or "") for err in error_conditions_stt):
|
| 266 |
-
return transcribed_text, "" # Show STT error
|
| 267 |
-
|
| 268 |
-
progress(0.7, desc="π§ Thinking about the words...")
|
| 269 |
-
feedback, highlighted_passage = compare_texts_for_feedback(original_passage_state, transcribed_text)
|
| 270 |
-
progress(1.0, desc="β Feedback is ready!")
|
| 271 |
-
return feedback, highlighted_passage
|
| 272 |
-
|
| 273 |
-
# --- Gradio Interface ---
|
| 274 |
css = """
|
| 275 |
-
body
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
|
|
|
|
|
|
| 279 |
border-radius: 20px !important;
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
|
|
|
|
|
|
| 283 |
}
|
| 284 |
-
.
|
| 285 |
-
.
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 290 |
}
|
| 291 |
-
|
| 292 |
-
.gr-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 299 |
}
|
| 300 |
"""
|
| 301 |
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
<
|
| 307 |
-
|
| 308 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 309 |
</div>
|
| 310 |
"""
|
| 311 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 312 |
|
| 313 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 314 |
|
| 315 |
-
|
| 316 |
-
with
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
choices=[f"{i}" for i in range(1, 11)], # Grades 1 to 10
|
| 322 |
-
value="3" # Default value
|
| 323 |
)
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 356 |
)
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
fn=
|
| 360 |
-
inputs=[original_passage_state,
|
| 361 |
-
outputs=[
|
|
|
|
| 362 |
)
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 372 |
)
|
| 373 |
|
| 374 |
-
#
|
| 375 |
if __name__ == "__main__":
|
| 376 |
-
|
| 377 |
-
print("π¨ GOOGLE_API_KEY not configured for local testing or failed to initialize!")
|
| 378 |
-
print("Please set it: export GOOGLE_API_KEY='your_key_here'")
|
| 379 |
-
|
| 380 |
-
if not tts_client:
|
| 381 |
-
print("π¨ ESPnet VITS TTS client (espnet/kan-bayashi_ljspeech_vits) could not be initialized. TTS will not work.")
|
| 382 |
-
if not whisper_stt_client:
|
| 383 |
-
print("π¨ Whisper STT client (abidlabs/whisper-large-v2) could not be initialized. STT will not work.")
|
| 384 |
-
|
| 385 |
-
app.launch(debug=True) # Set share=True for a temporary public link if running locally
|
|
|
|
| 1 |
+
from dotenv import load_dotenv
|
| 2 |
+
load_dotenv()
|
| 3 |
+
|
| 4 |
import gradio as gr
|
| 5 |
import os
|
| 6 |
import difflib
|
| 7 |
+
from gradio_client import Client
|
| 8 |
import time
|
| 9 |
import google.generativeai as genai
|
| 10 |
|
| 11 |
# --- Configuration & Clients ---
|
| 12 |
|
| 13 |
+
def configure_llm_api():
|
|
|
|
| 14 |
api_key = None
|
| 15 |
try:
|
| 16 |
+
api_key = gr.Secrets.get("GOOGLE_API_KEY")
|
| 17 |
+
except (AttributeError, FileNotFoundError):
|
|
|
|
|
|
|
| 18 |
api_key = os.environ.get("GOOGLE_API_KEY")
|
|
|
|
| 19 |
if api_key:
|
| 20 |
try:
|
| 21 |
genai.configure(api_key=api_key)
|
|
|
|
| 22 |
return True
|
| 23 |
except Exception as e:
|
| 24 |
+
print(f"Error configuring LLM (Gemini) API: {e}"); return False
|
|
|
|
| 25 |
else:
|
| 26 |
+
print("WARN: LLM API Key (GOOGLE_API_KEY) not found."); return False
|
| 27 |
+
LLM_API_CONFIGURED = configure_llm_api()
|
| 28 |
|
| 29 |
+
# Initialize new TTS client
|
|
|
|
|
|
|
| 30 |
try:
|
| 31 |
+
tts_client = Client("NihalGazi/Text-To-Speech-Unlimited")
|
| 32 |
+
print("β
Connected to advanced TTS service (Text-To-Speech-Unlimited)")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
except Exception as e:
|
| 34 |
+
print(f"β Failed to connect to TTS service: {e}")
|
| 35 |
tts_client = None
|
| 36 |
|
|
|
|
| 37 |
try:
|
| 38 |
whisper_stt_client = Client("abidlabs/whisper-large-v2")
|
| 39 |
+
except Exception: whisper_stt_client = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
# --- Helper Functions ---
|
| 42 |
+
def generate_story_from_llm(name, grade_str, topic, progress=gr.Progress(track_tqdm=True)):
|
| 43 |
+
progress(0.0, desc="Starting story creation...")
|
| 44 |
+
default_passage_val = ""
|
| 45 |
+
default_audio_gen_update = gr.update(interactive=False, visible=False)
|
| 46 |
+
default_audio_player_update = gr.update(value=None, visible=False)
|
| 47 |
+
if not LLM_API_CONFIGURED:
|
| 48 |
+
progress(1.0, desc="Complete")
|
| 49 |
+
return "LLM API key not configured...", default_audio_gen_update, default_audio_player_update
|
| 50 |
try:
|
| 51 |
+
if grade_str.startswith("Grade "):
|
| 52 |
+
grade = int(grade_str.replace("Grade ", ""))
|
| 53 |
+
else:
|
| 54 |
+
grade = int(grade_str)
|
| 55 |
+
except ValueError:
|
| 56 |
+
progress(1.0, desc="Complete")
|
| 57 |
+
return "Invalid grade level selected.", default_audio_gen_update, default_audio_player_update
|
| 58 |
+
if grade <= 2: word_target, max_llm_tokens = "around 40-60 words", 100
|
| 59 |
+
elif grade <= 5: word_target, max_llm_tokens = "around 80-100 words", 200
|
| 60 |
+
elif grade <= 8: word_target, max_llm_tokens = "around 100-120 words", 250
|
| 61 |
+
else: word_target, max_llm_tokens = "around 120-150 words", 300
|
| 62 |
+
|
| 63 |
+
progress(0.1, desc="Setting up AI story generator...")
|
| 64 |
+
story_text_result = default_passage_val
|
| 65 |
+
audio_gen_btn_update = default_audio_gen_update
|
| 66 |
+
try:
|
| 67 |
+
model = genai.GenerativeModel(model_name="gemini-1.5-flash-latest")
|
| 68 |
prompt = (
|
| 69 |
+
f"You are an AI assistant that creates engaging short reading passages. "
|
| 70 |
+
f"Generate a story of {word_target} suitable for a student named {name} in Grade {grade}. "
|
| 71 |
+
f"The story topic is: '{topic}'. Use age-appropriate vocabulary for Grade {grade}. Ensure the story is interesting and easy to read aloud. "
|
| 72 |
+
f"Do not include any introductory or concluding phrases like 'Here is a story'."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
)
|
| 74 |
+
safety_settings = [{"category": c, "threshold": "BLOCK_MEDIUM_AND_ABOVE"} for c in [
|
| 75 |
+
"HARM_CATEGORY_HARASSMENT",
|
| 76 |
+
"HARM_CATEGORY_HATE_SPEECH",
|
| 77 |
+
"HARM_CATEGORY_SEXUALLY_EXPLICIT",
|
| 78 |
+
"HARM_CATEGORY_DANGEROUS_CONTENT"
|
| 79 |
+
]]
|
| 80 |
+
generation_config = genai.types.GenerationConfig(candidate_count=1, max_output_tokens=max_llm_tokens, temperature=0.7)
|
| 81 |
+
progress(0.3, desc="AI is writing your story...")
|
| 82 |
+
response = model.generate_content(prompt, generation_config=generation_config, safety_settings=safety_settings)
|
| 83 |
+
progress(0.8, desc="Polishing your story...")
|
| 84 |
if response.candidates and response.candidates[0].content.parts:
|
| 85 |
story = response.text
|
| 86 |
if response.prompt_feedback and response.prompt_feedback.block_reason:
|
| 87 |
+
story_text_result = f"Story idea for '{topic}' blocked (Reason: {response.prompt_feedback.block_reason}). Try a different topic. π"
|
| 88 |
+
elif not story.strip():
|
| 89 |
+
story_text_result = f"The LLM couldn't generate a story for '{topic}'. Try another topic or rephrase. β¨"
|
| 90 |
+
else:
|
| 91 |
+
story_text_result = story.strip()
|
| 92 |
+
audio_gen_btn_update = gr.update(interactive=True, visible=True)
|
| 93 |
else:
|
| 94 |
if response.prompt_feedback and response.prompt_feedback.block_reason:
|
| 95 |
+
story_text_result = f"Story idea for '{topic}' got blocked (Reason: {response.prompt_feedback.block_reason}). Try a different topic. π"
|
| 96 |
+
else:
|
| 97 |
+
story_text_result = "Hmm, LLM had trouble with that topic. Maybe try another one? π€"
|
| 98 |
+
progress(1.0, desc="Story complete!")
|
| 99 |
+
return story_text_result, audio_gen_btn_update, default_audio_player_update
|
| 100 |
except Exception as e:
|
| 101 |
+
progress(1.0, desc="Complete")
|
| 102 |
+
return f"Oh no! π Error generating story. Details: {e}", default_audio_gen_update, default_audio_player_update
|
| 103 |
+
|
| 104 |
+
def text_to_speech_using_space(text_to_speak, progress=gr.Progress(track_tqdm=True)):
|
| 105 |
+
global tts_client
|
| 106 |
+
progress(0.0, desc="π Preparing voice synthesis...")
|
| 107 |
+
|
| 108 |
+
if not text_to_speak or not text_to_speak.strip():
|
| 109 |
+
progress(1.0, desc="Complete")
|
| 110 |
+
return None
|
| 111 |
+
|
| 112 |
+
progress(0.1, desc="π Initializing audio generation...")
|
| 113 |
+
|
| 114 |
+
# Reconnect to TTS client if needed
|
| 115 |
if not tts_client:
|
| 116 |
+
progress(0.2, desc="π Connecting to advanced voice service...")
|
| 117 |
+
try:
|
| 118 |
+
tts_client = Client("NihalGazi/Text-To-Speech-Unlimited")
|
| 119 |
+
progress(0.3, desc="π Connected to voice service...")
|
| 120 |
+
except Exception as e:
|
| 121 |
+
print(f"Failed to connect to TTS service: {e}")
|
| 122 |
+
progress(1.0, desc="Complete")
|
| 123 |
+
return None
|
| 124 |
+
|
| 125 |
+
if not tts_client:
|
| 126 |
+
progress(1.0, desc="Complete")
|
| 127 |
+
return None
|
| 128 |
+
|
| 129 |
+
progress(0.4, desc="ποΈ AI is reading your story aloud...")
|
| 130 |
try:
|
| 131 |
+
# Try the correct API configuration with emotion parameter
|
| 132 |
+
api_methods = [
|
| 133 |
+
{"params": [text_to_speak, "alloy", "happy"], "api_name": "/text_to_speech_app"},
|
| 134 |
+
{"params": [text_to_speak, "alloy", "neutral"], "api_name": "/text_to_speech_app"},
|
| 135 |
+
{"params": [text_to_speak, "nova", "neutral"], "api_name": "/text_to_speech_app"},
|
| 136 |
+
{"params": [text_to_speak], "api_name": "/predict"}
|
| 137 |
+
]
|
| 138 |
+
|
| 139 |
+
audio_filepath = None
|
| 140 |
+
for method in api_methods:
|
| 141 |
+
try:
|
| 142 |
+
print(f"Trying TTS with params: {method['params']} and api_name: {method['api_name']}")
|
| 143 |
+
audio_result = tts_client.predict(
|
| 144 |
+
*method["params"],
|
| 145 |
+
api_name=method["api_name"]
|
| 146 |
+
)
|
| 147 |
+
print(f"TTS result type: {type(audio_result)}, content: {audio_result}")
|
| 148 |
+
|
| 149 |
+
# Extract audio file path from result
|
| 150 |
+
if isinstance(audio_result, tuple) and len(audio_result) > 0:
|
| 151 |
+
audio_filepath = audio_result[0]
|
| 152 |
+
elif isinstance(audio_result, str) and audio_result.endswith(('.wav', '.mp3', '.flac', '.m4a')):
|
| 153 |
+
audio_filepath = audio_result
|
| 154 |
+
elif isinstance(audio_result, list) and len(audio_result) > 0:
|
| 155 |
+
audio_filepath = audio_result[0]
|
| 156 |
+
|
| 157 |
+
if audio_filepath:
|
| 158 |
+
print(f"Successfully generated audio: {audio_filepath}")
|
| 159 |
+
break
|
| 160 |
+
|
| 161 |
+
except Exception as method_error:
|
| 162 |
+
print(f"TTS method failed: {method_error}")
|
| 163 |
+
continue
|
| 164 |
+
|
| 165 |
+
if audio_filepath:
|
| 166 |
+
progress(0.9, desc="π΅ Voice generation complete!")
|
| 167 |
+
progress(1.0, desc="π Audio ready!")
|
| 168 |
+
print(f"FINAL: Returning audio file path: {audio_filepath}")
|
| 169 |
return audio_filepath
|
| 170 |
else:
|
| 171 |
+
print("All TTS methods failed, trying to reconnect...")
|
| 172 |
+
raise Exception("All API methods failed")
|
| 173 |
+
|
|
|
|
|
|
|
|
|
|
| 174 |
except Exception as e:
|
| 175 |
+
print(f"TTS error: {e}")
|
| 176 |
+
# Try to reconnect on error
|
| 177 |
+
try:
|
| 178 |
+
progress(0.6, desc="π Reconnecting to voice service...")
|
| 179 |
+
tts_client = Client("NihalGazi/Text-To-Speech-Unlimited")
|
| 180 |
+
if tts_client:
|
| 181 |
+
progress(0.8, desc="ποΈ Retrying voice generation...")
|
| 182 |
+
# Try the most basic approach with emotion parameter
|
| 183 |
+
audio_result = tts_client.predict(
|
| 184 |
+
text_to_speak,
|
| 185 |
+
"alloy", # voice
|
| 186 |
+
"neutral", # emotion
|
| 187 |
+
api_name="/text_to_speech_app"
|
| 188 |
+
)
|
| 189 |
+
print(f"Retry result: {type(audio_result)}, {audio_result}")
|
| 190 |
+
|
| 191 |
+
audio_filepath = None
|
| 192 |
+
if isinstance(audio_result, tuple) and len(audio_result) > 0:
|
| 193 |
+
audio_filepath = audio_result[0]
|
| 194 |
+
elif isinstance(audio_result, str) and audio_result.endswith(('.wav', '.mp3', '.flac', '.m4a')):
|
| 195 |
+
audio_filepath = audio_result
|
| 196 |
+
elif isinstance(audio_result, list) and len(audio_result) > 0:
|
| 197 |
+
audio_filepath = audio_result[0]
|
| 198 |
+
|
| 199 |
+
if audio_filepath:
|
| 200 |
+
progress(1.0, desc="π Audio ready!")
|
| 201 |
+
print(f"RETRY SUCCESS: Returning audio file path: {audio_filepath}")
|
| 202 |
+
return audio_filepath
|
| 203 |
+
|
| 204 |
+
except Exception as retry_error:
|
| 205 |
+
print(f"TTS retry failed: {retry_error}")
|
| 206 |
+
pass
|
| 207 |
+
|
| 208 |
+
progress(1.0, desc="Audio generation failed")
|
| 209 |
+
print("TTS failed completely - returning None")
|
| 210 |
+
return None
|
| 211 |
+
|
| 212 |
+
def speech_to_text_whisper_space(audio_filepath, progress=gr.Progress(track_tqdm=True), max_retries=3):
|
| 213 |
+
progress(0.1, desc="Sending your reading for transcription...")
|
| 214 |
if not whisper_stt_client:
|
| 215 |
+
progress(1.0, desc="Complete")
|
| 216 |
+
return "Speech-to-text service is not available. π οΈ"
|
| 217 |
if not audio_filepath:
|
| 218 |
+
progress(1.0, desc="Complete")
|
| 219 |
+
return "No recording received for transcription. π€"
|
| 220 |
+
for attempt in range(max_retries):
|
| 221 |
+
try:
|
| 222 |
+
progress(0.2 + (attempt * 0.1), desc=f"Transcribing your voice (Whisper) - Attempt {attempt + 1}...")
|
| 223 |
+
result = whisper_stt_client.predict(audio_filepath, api_name="/predict")
|
| 224 |
+
progress(0.9, desc="Transcription complete.")
|
| 225 |
+
if isinstance(result, tuple) and len(result) > 0:
|
| 226 |
+
transcribed_text = result[0] if result[0] else ""
|
| 227 |
+
elif isinstance(result, list) and len(result) > 0:
|
| 228 |
+
transcribed_text = result[0] if result[0] else ""
|
| 229 |
+
elif isinstance(result, str):
|
| 230 |
+
transcribed_text = result
|
| 231 |
+
else:
|
| 232 |
+
progress(1.0, desc="Complete")
|
| 233 |
+
return "Hmm, STT service returned unexpected format. π€"
|
| 234 |
+
progress(1.0, desc="Transcription complete!")
|
| 235 |
+
return transcribed_text if transcribed_text else "No speech detected in the recording. π€«"
|
| 236 |
+
except Exception:
|
| 237 |
+
continue
|
| 238 |
+
progress(1.0, desc="Complete")
|
| 239 |
+
return "Unexpected error during transcription. Please try again! π"
|
|
|
|
| 240 |
|
| 241 |
def clean_text_for_comparison(text):
|
| 242 |
if not isinstance(text, str): return []
|
| 243 |
+
text = text.lower(); punctuation_to_remove = "!\"#$%&()*+,-./:;<=>?@[\\]^_`{|}~"
|
| 244 |
+
text = text.translate(str.maketrans('', '', punctuation_to_remove)); return text.split()
|
|
|
|
|
|
|
| 245 |
|
| 246 |
def compare_texts_for_feedback(original_text, student_text):
|
| 247 |
+
original_words, student_words = clean_text_for_comparison(original_text), clean_text_for_comparison(student_text)
|
| 248 |
+
if not student_words: return "It sounds like you didn't record or it was very quiet! π€« Try recording again nice and clear!", ""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 249 |
matcher = difflib.SequenceMatcher(None, original_words, student_words, autojunk=False)
|
| 250 |
+
feedback_lines, highlighted_parts = [], []
|
| 251 |
+
word_diff_count = 0
|
| 252 |
+
pronunciation_tips = []
|
| 253 |
+
pronunciation_guide = {
|
| 254 |
+
'the': 'thuh or thee', 'through': 'threw', 'though': 'thoh', 'thought': 'thawt',
|
| 255 |
+
'knight': 'night', 'know': 'noh', 'write': 'right', 'wrong': 'rawng', 'what': 'wot',
|
| 256 |
+
'where': 'wair', 'when': 'wen', 'why': 'wy', 'who': 'hoo', 'laugh': 'laff',
|
| 257 |
+
'enough': 'ee-nuff', 'cough': 'koff', 'rough': 'ruff', 'tough': 'tuff', 'magic': 'maj-ik',
|
| 258 |
+
'school': 'skool', 'friend': 'frend', 'said': 'sed', 'says': 'sez', 'once': 'wunts',
|
| 259 |
+
'was': 'wuz', 'were': 'wur', 'you': 'yoo', 'your': 'yor', 'there': 'thair', 'their': 'thair', 'they': 'thay'
|
| 260 |
+
}
|
| 261 |
+
def get_pronunciation_tip(word):
|
| 262 |
+
word_lower = word.lower()
|
| 263 |
+
if word_lower in pronunciation_guide:
|
| 264 |
+
return f"π£οΈ Try saying: \"{pronunciation_guide[word_lower]}\""
|
| 265 |
+
elif len(word) > 6:
|
| 266 |
+
syllables = []
|
| 267 |
+
vowels = 'aeiou'
|
| 268 |
+
current_syllable = ''
|
| 269 |
+
for i, char in enumerate(word_lower):
|
| 270 |
+
current_syllable += char
|
| 271 |
+
if char in vowels and i < len(word_lower) - 1:
|
| 272 |
+
if word_lower[i + 1] not in vowels:
|
| 273 |
+
syllables.append(current_syllable)
|
| 274 |
+
current_syllable = ''
|
| 275 |
+
if current_syllable: syllables.append(current_syllable)
|
| 276 |
+
if len(syllables) > 1: return f"π€ Break it down: \"{'-'.join(syllables)}\""
|
| 277 |
+
if word_lower.endswith('tion'): return "π£οΈ Words ending in '-tion' sound like 'shun'"
|
| 278 |
+
elif word_lower.endswith('ough'): return "π£οΈ '-ough' can be tricky! Listen to the audio again"
|
| 279 |
+
elif 'gh' in word_lower: return "π£οΈ 'gh' is often silent or sounds like 'f'"
|
| 280 |
+
elif word_lower.startswith('wr'): return "π£οΈ In 'wr-' words, the 'w' is silent"
|
| 281 |
+
elif word_lower.startswith('kn'): return "π£οΈ In 'kn-' words, the 'k' is silent"
|
| 282 |
+
return f"π― Focus on each sound in \"{word}\""
|
| 283 |
for tag, i1, i2, j1, j2 in matcher.get_opcodes():
|
| 284 |
+
orig_seg_words, stud_seg_words = original_words[i1:i2], student_words[j1:j2]
|
| 285 |
+
orig_seg_text, stud_seg_text = " ".join(orig_seg_words), " ".join(stud_seg_words)
|
|
|
|
| 286 |
if tag == 'equal':
|
| 287 |
+
highlighted_parts.append(f'<span style="background: #90EE90; padding: 2px 4px; border-radius: 4px; margin: 1px;">{orig_seg_text}</span>')
|
| 288 |
+
else:
|
| 289 |
+
word_diff_count += max(len(orig_seg_words), len(stud_seg_words))
|
| 290 |
+
if tag == 'replace':
|
| 291 |
+
for orig_word, stud_word in zip(orig_seg_words, stud_seg_words):
|
| 292 |
+
if orig_word != stud_word:
|
| 293 |
+
tip = get_pronunciation_tip(orig_word)
|
| 294 |
+
pronunciation_tips.append(f"**{orig_word.upper()}**: {tip}")
|
| 295 |
+
feedback_lines.append(f"π Instead of: \"{orig_seg_text}\", you said: \"{stud_seg_text}\"")
|
| 296 |
+
highlighted_parts.append(f'<span style="background: #FFE4B5; padding: 2px 4px; border-radius: 4px; margin: 1px; text-decoration: line-through;">{orig_seg_text}</span> <span style="background: #FFB6C1; padding: 2px 4px; border-radius: 4px; margin: 1px; font-weight: bold;">{stud_seg_text}</span>')
|
| 297 |
+
elif tag == 'delete':
|
| 298 |
+
for missed_word in orig_seg_words:
|
| 299 |
+
tip = get_pronunciation_tip(missed_word)
|
| 300 |
+
pronunciation_tips.append(f"**{missed_word.upper()}** (missed): {tip}")
|
| 301 |
+
feedback_lines.append(f"βοΈ You missed: \"{orig_seg_text}\"")
|
| 302 |
+
highlighted_parts.append(f'<span style="background: #FFA0B4; padding: 2px 4px; border-radius: 4px; margin: 1px; text-decoration: line-through;">{orig_seg_text}</span> <span style="font-style: italic; color: #666;">(*skipped*)</span>')
|
| 303 |
+
elif tag == 'insert':
|
| 304 |
+
feedback_lines.append(f"β You added: \"{stud_seg_text}\" (not in original)")
|
| 305 |
+
highlighted_parts.append(f'<span style="background: #DDA0DD; padding: 2px 4px; border-radius: 4px; margin: 1px; font-style: italic;">(*added:* {stud_seg_text})</span>')
|
| 306 |
+
final_text = " ".join(highlighted_parts)
|
|
|
|
| 307 |
if not feedback_lines:
|
| 308 |
+
feedback_html = """
|
| 309 |
+
ππ₯³ **PERFECT READING!** π₯³π
|
| 310 |
+
Amazing! You read every single word correctly! π
|
| 311 |
+
π **Reading Champion!** π
|
| 312 |
+
"""
|
| 313 |
+
return feedback_html, final_text
|
| 314 |
else:
|
| 315 |
+
feedback_parts = [
|
| 316 |
+
f"π **Reading Progress Report**",
|
| 317 |
+
f"π **Words to practice:** {word_diff_count}",
|
| 318 |
+
f"πͺ **Keep improving!** Practice makes perfect!",
|
| 319 |
+
"",
|
| 320 |
+
"π **What to work on:**"
|
| 321 |
+
]
|
| 322 |
+
for line in feedback_lines: feedback_parts.append(f"β’ {line}")
|
| 323 |
+
if pronunciation_tips:
|
| 324 |
+
feedback_parts.extend([
|
| 325 |
+
"",
|
| 326 |
+
"π€ **Pronunciation Helper**",
|
| 327 |
+
"Here's how to say the tricky words:"
|
| 328 |
+
])
|
| 329 |
+
for tip in pronunciation_tips[:5]: feedback_parts.append(f"β’ {tip}")
|
| 330 |
+
feedback_parts.extend([
|
| 331 |
+
"",
|
| 332 |
+
"π‘ **Pro tip:** Listen to the story audio again and pay special attention to these words!"
|
| 333 |
+
])
|
| 334 |
+
feedback_parts.extend([
|
| 335 |
+
"",
|
| 336 |
+
"π― **Practice Suggestions**",
|
| 337 |
+
"β’ π§ Listen to the AI reading first",
|
| 338 |
+
"β’ π€ Practice saying difficult words slowly",
|
| 339 |
+
"β’ π Read the story again at your own pace",
|
| 340 |
+
"β’ π Try recording again when you're ready!"
|
| 341 |
+
])
|
| 342 |
+
feedback_html = "\n".join(feedback_parts)
|
| 343 |
+
return feedback_html, final_text
|
|
|
|
| 344 |
|
| 345 |
def assess_student_reading_ui(original_passage_state, student_audio_path, progress=gr.Progress(track_tqdm=True)):
|
| 346 |
+
if not student_audio_path: return "π€ Please record your reading first!", ""
|
| 347 |
+
if not original_passage_state: return "Hmm, the original story is missing. π Please generate a story first.", ""
|
| 348 |
+
transcribed_text = speech_to_text_whisper_space(student_audio_path, progress=progress)
|
| 349 |
+
stt_errors = ["couldn't understand", "had trouble", "service isn't working", "service is busy", "didn't get any recording", "filepath type issue"]
|
| 350 |
+
if any(err in (transcribed_text or "").lower() for err in stt_errors): return transcribed_text, ""
|
| 351 |
+
progress(0.6, desc="Analyzing your reading accuracy..."); feedback, highlighted_passage = compare_texts_for_feedback(original_passage_state, transcribed_text)
|
| 352 |
+
progress(1.0, desc="Assessment complete!"); return feedback, highlighted_passage
|
| 353 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 354 |
css = """
|
| 355 |
+
body, .gradio-container {
|
| 356 |
+
background: #f9fafb !important;
|
| 357 |
+
font-family: -apple-system, BlinkMacSystemFont, 'San Francisco', 'Segoe UI', 'Roboto', Arial, sans-serif !important;
|
| 358 |
+
}
|
| 359 |
+
.main-header {
|
| 360 |
+
background: white !important;
|
| 361 |
border-radius: 20px !important;
|
| 362 |
+
box-shadow: 0 8px 32px 0 rgba(60,60,90,0.06) !important;
|
| 363 |
+
padding: 36px 20px 24px 20px !important;
|
| 364 |
+
margin-bottom: 28px !important;
|
| 365 |
+
text-align: center;
|
| 366 |
+
border: none !important;
|
| 367 |
}
|
| 368 |
+
.main-header h1 {font-size: 2.2rem !important; font-weight: 700 !important; color: #23232b !important;}
|
| 369 |
+
.main-header p {color: #6b7280 !important; font-size: 1.08rem !important; margin-bottom: 8px !important;}
|
| 370 |
+
.tech-badge {background: #e0e7ef !important; color: #4f8fff !important; border-radius: 12px !important; padding: 4px 12px !important; font-size: 12px !important; font-weight: 600 !important;}
|
| 371 |
+
.gr-block, .gr-panel {background: white !important; border-radius: 18px !important; box-shadow: 0 2px 8px 0 rgba(60,60,90,0.07) !important; border: none !important; padding: 28px 22px !important;}
|
| 372 |
+
.section-header {background: transparent !important; border: none !important; padding: 0 !important; margin-bottom: 16px !important;}
|
| 373 |
+
.section-header h3 {color: #1e293b !important; font-size: 1.14rem !important; font-weight: 600 !important;}
|
| 374 |
+
.section-header p {color: #8691a2 !important; font-size: 13px !important;}
|
| 375 |
+
|
| 376 |
+
/* Enhanced button styles with click feedback */
|
| 377 |
+
.gr-button {
|
| 378 |
+
background: linear-gradient(90deg, #007AFF, #2689ff) !important;
|
| 379 |
+
color: white !important;
|
| 380 |
+
border-radius: 18px !important;
|
| 381 |
+
font-weight: 600 !important;
|
| 382 |
+
border: none !important;
|
| 383 |
+
box-shadow: 0 1px 4px rgba(0, 123, 255, 0.04) !important;
|
| 384 |
+
padding: 9px 22px !important;
|
| 385 |
+
font-size: 16px !important;
|
| 386 |
+
transition: all 0.15s cubic-bezier(0.4,0.0,0.2,1) !important;
|
| 387 |
+
transform: translateY(0) !important;
|
| 388 |
}
|
| 389 |
+
|
| 390 |
+
.gr-button:hover {
|
| 391 |
+
background: linear-gradient(90deg, #2689ff, #007AFF) !important;
|
| 392 |
+
box-shadow: 0 4px 12px rgba(0, 123, 255, 0.15) !important;
|
| 393 |
+
transform: translateY(-1px) !important;
|
| 394 |
+
}
|
| 395 |
+
|
| 396 |
+
.gr-button:active {
|
| 397 |
+
background: linear-gradient(90deg, #0056CC, #1F5FFF) !important;
|
| 398 |
+
box-shadow: 0 1px 3px rgba(0, 123, 255, 0.25) !important;
|
| 399 |
+
transform: translateY(1px) !important;
|
| 400 |
+
transition: all 0.1s cubic-bezier(0.4,0.0,0.2,1) !important;
|
| 401 |
+
}
|
| 402 |
+
|
| 403 |
+
.gr-button[variant="secondary"] {
|
| 404 |
+
background: linear-gradient(90deg, #e0e7ef, #dde5f2) !important;
|
| 405 |
+
color: #2a3140 !important;
|
| 406 |
+
transition: all 0.15s cubic-bezier(0.4,0.0,0.2,1) !important;
|
| 407 |
+
transform: translateY(0) !important;
|
| 408 |
+
}
|
| 409 |
+
|
| 410 |
+
.gr-button[variant="secondary"]:hover {
|
| 411 |
+
background: linear-gradient(90deg, #dde5f2, #e0e7ef) !important;
|
| 412 |
+
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.08) !important;
|
| 413 |
+
transform: translateY(-1px) !important;
|
| 414 |
+
}
|
| 415 |
+
|
| 416 |
+
.gr-button[variant="secondary"]:active {
|
| 417 |
+
background: linear-gradient(90deg, #d1d9e0, #c9d1db) !important;
|
| 418 |
+
box-shadow: 0 1px 3px rgba(0, 0, 0, 0.15) !important;
|
| 419 |
+
transform: translateY(1px) !important;
|
| 420 |
+
transition: all 0.1s cubic-bezier(0.4,0.0,0.2,1) !important;
|
| 421 |
+
}
|
| 422 |
+
|
| 423 |
+
/* Processing state for buttons */
|
| 424 |
+
.gr-button.processing {
|
| 425 |
+
background: linear-gradient(90deg, #94a3b8, #cbd5e1) !important;
|
| 426 |
+
color: #64748b !important;
|
| 427 |
+
cursor: wait !important;
|
| 428 |
+
transform: translateY(0) !important;
|
| 429 |
+
box-shadow: inset 0 2px 4px rgba(0, 0, 0, 0.1) !important;
|
| 430 |
+
}
|
| 431 |
+
|
| 432 |
+
label {color: #374151 !important; font-weight: 600 !important; font-size: 15px !important;}
|
| 433 |
+
.gr-textbox, .gr-dropdown {border-radius: 12px !important; border: 1.5px solid #dbeafe !important; background: #f6f8fb !important; font-size: 16px !important; padding: 10px 14px !important;}
|
| 434 |
+
.gr-textbox:focus, .gr-dropdown:focus {border-color: #007AFF !important; box-shadow: 0 0 0 2px rgba(0, 122, 255, 0.10) !important; outline: none !important;}
|
| 435 |
+
.gr-audio {background: #f9fafb !important; border-radius: 16px !important; border: 1.5px solid #e5e7eb !important; padding: 18px !important;}
|
| 436 |
+
.feedback-container {background: #f4f7fa !important; border-radius: 18px !important; padding: 18px 24px !important;}
|
| 437 |
+
|
| 438 |
+
/* Spinner animation for progress indicators */
|
| 439 |
+
@keyframes spin {
|
| 440 |
+
0% { transform: rotate(0deg); }
|
| 441 |
+
100% { transform: rotate(360deg); }
|
| 442 |
+
}
|
| 443 |
+
|
| 444 |
+
/* Pulse animation for loading states */
|
| 445 |
+
@keyframes pulse {
|
| 446 |
+
0%, 100% { opacity: 1; }
|
| 447 |
+
50% { opacity: 0.7; }
|
| 448 |
+
}
|
| 449 |
+
|
| 450 |
+
.loading-pulse {
|
| 451 |
+
animation: pulse 1.5s ease-in-out infinite;
|
| 452 |
}
|
| 453 |
"""
|
| 454 |
|
| 455 |
+
with gr.Blocks(theme=gr.themes.Soft(), css=css, title="ReadRight") as app:
|
| 456 |
+
gr.Markdown("""
|
| 457 |
+
<div class="main-header">
|
| 458 |
+
<h1>π ReadRight</h1>
|
| 459 |
+
<p>AI-powered reading practice and pronunciation feedback for students</p>
|
| 460 |
+
</div>
|
| 461 |
+
""")
|
| 462 |
+
|
| 463 |
+
original_passage_state = gr.State("")
|
| 464 |
+
|
| 465 |
+
with gr.Tabs():
|
| 466 |
+
with gr.TabItem("π Practice & Generate", elem_id="main_tab"):
|
| 467 |
+
with gr.Row(equal_height=True):
|
| 468 |
+
with gr.Column(scale=1, variant="panel"):
|
| 469 |
+
gr.Markdown("""
|
| 470 |
+
<div class="section-header">
|
| 471 |
+
<h3>π Story & Reading</h3>
|
| 472 |
+
<p>Enter details, get your story, generate audio, and record yourselfβall in one flow.</p>
|
| 473 |
+
</div>
|
| 474 |
+
""")
|
| 475 |
+
s_name = gr.Textbox(label="π€ Your Name", placeholder="Enter your name")
|
| 476 |
+
s_grade = gr.Dropdown(label="π Grade Level", choices=[f"Grade {i}" for i in range(1, 11)], value="Grade 3")
|
| 477 |
+
s_topic = gr.Textbox(label="π‘ Story Topic", placeholder="E.g., space, animals, friendship")
|
| 478 |
+
gen_btn = gr.Button("β¨ Generate Story", variant="primary")
|
| 479 |
+
passage_out = gr.Textbox(label="π Story", lines=8, interactive=False, placeholder="Your story appears here...")
|
| 480 |
+
audio_out = gr.Audio(label="π΅ Story Audio", type="filepath", visible=True, autoplay=False)
|
| 481 |
+
gr.Markdown("""
|
| 482 |
+
<div style="margin: 20px 0 0 0; padding: 10px 20px; background: #f4f7fa; border-radius: 16px;">
|
| 483 |
+
<b>β‘οΈ Next:</b> Record yourself reading below for feedback.
|
| 484 |
+
</div>
|
| 485 |
+
""")
|
| 486 |
+
stud_audio_in = gr.Audio(sources=["microphone"], type="filepath", label="π€ Your Recording")
|
| 487 |
+
record_again_btn = gr.Button("π Record Again", variant="secondary", size="sm", visible=False)
|
| 488 |
+
clear_recording_btn = gr.Button("ποΈ Clear", variant="secondary", size="sm", visible=False)
|
| 489 |
+
assess_btn = gr.Button("π Analyze Reading", variant="primary", size="lg", interactive=False)
|
| 490 |
+
recording_status = gr.Markdown("", elem_id="recording_status")
|
| 491 |
+
analysis_status = gr.Markdown("", elem_id="analysis_status")
|
| 492 |
+
with gr.TabItem("π Analysis & Feedback", elem_id="analysis_tab"):
|
| 493 |
+
gr.Markdown("""
|
| 494 |
+
<div class="section-header">
|
| 495 |
+
<h3>π Analysis</h3>
|
| 496 |
+
<p>See your performance and areas to improve</p>
|
| 497 |
+
</div>
|
| 498 |
+
""")
|
| 499 |
+
feedback_out = gr.Markdown(
|
| 500 |
+
value="""
|
| 501 |
+
<div style="text-align: center; color: #6b7280;">
|
| 502 |
+
<h4>Analysis Results</h4>
|
| 503 |
+
<p>Your feedback will appear here.</p>
|
| 504 |
+
</div>
|
| 505 |
+
""",
|
| 506 |
+
elem_id="feedback_output"
|
| 507 |
+
)
|
| 508 |
+
highlighted_out = gr.Markdown(
|
| 509 |
+
value="""
|
| 510 |
+
<div style="text-align: center; color: #6b7280;">
|
| 511 |
+
<h4>Word-by-Word Analysis</h4>
|
| 512 |
+
<p>Get color-coded feedback below.</p>
|
| 513 |
+
</div>
|
| 514 |
+
""",
|
| 515 |
+
elem_id="highlighted_passage_output"
|
| 516 |
+
)
|
| 517 |
+
gr.Markdown("""
|
| 518 |
+
<div style="background: #f7fafc; border-radius: 16px; padding: 16px 12px 10px 12px; margin: 22px 0 18px 0; box-shadow: 0 2px 6px 0 rgba(60,60,90,0.04);">
|
| 519 |
+
<b>Color code:</b>
|
| 520 |
+
<div style="display: flex; gap: 14px; flex-wrap: wrap; margin-top: 8px;">
|
| 521 |
+
<span style="background: #90EE90; padding: 5px 14px; border-radius: 12px; font-size: 13px; font-weight: 500; color: #155724;">Perfect Match</span>
|
| 522 |
+
<span style="background: #FFE4B5; padding: 5px 14px; border-radius: 12px; font-size: 13px; font-weight: 500; color: #856404;">Substitution</span>
|
| 523 |
+
<span style="background: #FFA0B4; padding: 5px 14px; border-radius: 12px; font-size: 13px; font-weight: 500; color: #721c24;">Skipped Word</span>
|
| 524 |
+
<span style="background: #DDA0DD; padding: 5px 14px; border-radius: 12px; font-size: 13px; font-weight: 500; color: #5f006a;">Extra Word</span>
|
| 525 |
+
</div>
|
| 526 |
+
</div>
|
| 527 |
+
""")
|
| 528 |
+
gr.Markdown("""
|
| 529 |
+
<div style="margin: 14px 0; padding: 14px 22px; background: #f8fafc; border-radius: 14px;">
|
| 530 |
+
<span style="color: #0a58ca; font-weight: 500;">Goals:</span>
|
| 531 |
+
<ul style="margin: 7px 0 0 18px; color: #6b7280;">
|
| 532 |
+
<li>Word accuracy above 90%</li>
|
| 533 |
+
<li>Speak clearly and with confidence</li>
|
| 534 |
+
<li>Practice as much as you like</li>
|
| 535 |
+
</ul>
|
| 536 |
+
</div>
|
| 537 |
+
""")
|
| 538 |
+
|
| 539 |
+
with gr.TabItem("βΉοΈ About & How It Works", elem_id="about_tab"):
|
| 540 |
+
gr.Markdown("""
|
| 541 |
+
<div class="section-header">
|
| 542 |
+
<h3>π§ How ReadRight Works</h3>
|
| 543 |
+
<p>Understanding the technology behind your ReadRight</p>
|
| 544 |
+
</div>
|
| 545 |
+
""")
|
| 546 |
+
|
| 547 |
+
gr.Markdown("""
|
| 548 |
+
## π― What This Platform Does
|
| 549 |
+
|
| 550 |
+
ReadRight is an AI-powered tool designed to help students improve their reading skills through:
|
| 551 |
+
|
| 552 |
+
- **β¨ Personalized Story Generation**: Creates age-appropriate reading passages tailored to your grade level and interests
|
| 553 |
+
- **π Audio Pronunciation Models**: Provides clear audio examples of proper pronunciation
|
| 554 |
+
- **β‘ Real-time Speech Analysis**: Analyzes your reading accuracy and identifies areas for improvement
|
| 555 |
+
- **π― Detailed Feedback**: Offers specific pronunciation tips and practice suggestions
|
| 556 |
+
|
| 557 |
+
## ποΈ Reading Practice Application Workflow
|
| 558 |
+
""")
|
| 559 |
+
|
| 560 |
+
# Use HTML component for the SVG
|
| 561 |
+
gr.HTML("""
|
| 562 |
+
<div style="width: 100%; overflow-x: auto; padding: 20px 0;">
|
| 563 |
+
<svg width="1400" height="700" xmlns="http://www.w3.org/2000/svg" style="max-width: 100%; height: auto;">
|
| 564 |
+
<!-- Background -->
|
| 565 |
+
<rect width="1400" height="600" fill="#fafafa"/>
|
| 566 |
+
|
| 567 |
+
<!-- Title -->
|
| 568 |
+
<text x="700" y="30" text-anchor="middle" font-size="24" font-weight="bold" fill="#1f2937">Reading Practice Application Workflow</text>
|
| 569 |
+
|
| 570 |
+
<!-- Top Row - Input to Audio -->
|
| 571 |
+
<rect x="100" y="80" width="200" height="100" rx="20" fill="#dbeafe" stroke="#2563eb" stroke-width="3"/>
|
| 572 |
+
<text x="200" y="115" text-anchor="middle" font-size="18" font-weight="bold" fill="#1e40af">User Input</text>
|
| 573 |
+
<text x="200" y="140" text-anchor="middle" font-size="14" fill="#3730a3">Student Name</text>
|
| 574 |
+
<text x="200" y="160" text-anchor="middle" font-size="14" fill="#3730a3">Grade Level & Topic</text>
|
| 575 |
+
|
| 576 |
+
<!-- Arrow 1 -->
|
| 577 |
+
<path d="M300 130 L380 130" stroke="#6b7280" stroke-width="3" marker-end="url(#arrowhead)" fill="none"/>
|
| 578 |
+
<text x="340" y="120" text-anchor="middle" font-size="12" font-weight="bold" fill="#6b7280">INPUT</text>
|
| 579 |
+
|
| 580 |
+
<rect x="380" y="80" width="200" height="100" rx="20" fill="#dcfce7" stroke="#16a34a" stroke-width="3"/>
|
| 581 |
+
<text x="480" y="115" text-anchor="middle" font-size="18" font-weight="bold" fill="#15803d">Story Generator</text>
|
| 582 |
+
<text x="480" y="140" text-anchor="middle" font-size="14" fill="#166534">AI creates personalized</text>
|
| 583 |
+
<text x="480" y="160" text-anchor="middle" font-size="14" fill="#166534">reading story</text>
|
| 584 |
+
|
| 585 |
+
<!-- Arrow 2 -->
|
| 586 |
+
<path d="M580 130 L660 130" stroke="#6b7280" stroke-width="3" marker-end="url(#arrowhead)" fill="none"/>
|
| 587 |
+
<text x="620" y="120" text-anchor="middle" font-size="12" font-weight="bold" fill="#6b7280">STORY</text>
|
| 588 |
+
|
| 589 |
+
<rect x="660" y="80" width="200" height="100" rx="20" fill="#fef3c7" stroke="#d97706" stroke-width="3"/>
|
| 590 |
+
<text x="760" y="115" text-anchor="middle" font-size="18" font-weight="bold" fill="#b45309">Audio Synthesis</text>
|
| 591 |
+
<text x="760" y="140" text-anchor="middle" font-size="14" fill="#92400e">Text-to-Speech</text>
|
| 592 |
+
<text x="760" y="160" text-anchor="middle" font-size="14" fill="#92400e">Audio Generation</text>
|
| 593 |
+
|
| 594 |
+
<!-- Arrow 3 -->
|
| 595 |
+
<path d="M860 130 L960 130" stroke="#6b7280" stroke-width="3" marker-end="url(#arrowhead)" fill="none"/>
|
| 596 |
+
<text x="910" y="120" text-anchor="middle" font-size="12" font-weight="bold" fill="#6b7280">AUDIO</text>
|
| 597 |
+
|
| 598 |
+
<rect x="960" y="80" width="200" height="100" rx="20" fill="#f3e8ff" stroke="#9333ea" stroke-width="3"/>
|
| 599 |
+
<text x="1060" y="115" text-anchor="middle" font-size="18" font-weight="bold" fill="#7c3aed">Text Comparison</text>
|
| 600 |
+
<text x="1060" y="140" text-anchor="middle" font-size="14" fill="#6b21a8">Analysis Engine</text>
|
| 601 |
+
<text x="1060" y="160" text-anchor="middle" font-size="14" fill="#6b21a8">Accuracy Detection</text>
|
| 602 |
+
|
| 603 |
+
<!-- Vertical Flow Arrow (Audio to Student Recording) -->
|
| 604 |
+
<path d="M760 180 L760 250" stroke="#6b7280" stroke-width="3" marker-end="url(#arrowhead)" fill="none"/>
|
| 605 |
+
<text x="790" y="220" text-anchor="middle" font-size="12" font-weight="bold" fill="#6b7280">STUDENT LISTENS</text>
|
| 606 |
+
|
| 607 |
+
<!-- Bottom Row - Student Practice to Feedback -->
|
| 608 |
+
<rect x="660" y="250" width="200" height="100" rx="20" fill="#fce7f3" stroke="#ec4899" stroke-width="3"/>
|
| 609 |
+
<text x="760" y="285" text-anchor="middle" font-size="18" font-weight="bold" fill="#be185d">Student Recording</text>
|
| 610 |
+
<text x="760" y="310" text-anchor="middle" font-size="14" fill="#9d174d">Student reads</text>
|
| 611 |
+
<text x="760" y="330" text-anchor="middle" font-size="14" fill="#9d174d">story aloud</text>
|
| 612 |
+
|
| 613 |
+
<!-- Arrow 4 (Student Recording to Speech Recognition) -->
|
| 614 |
+
<path d="M660 300 L580 300" stroke="#6b7280" stroke-width="3" marker-end="url(#arrowhead)" fill="none"/>
|
| 615 |
+
<text x="620" y="290" text-anchor="middle" font-size="12" font-weight="bold" fill="#6b7280">RECORDING</text>
|
| 616 |
+
|
| 617 |
+
<rect x="380" y="250" width="200" height="100" rx="20" fill="#e0e7ff" stroke="#6366f1" stroke-width="3"/>
|
| 618 |
+
<text x="480" y="285" text-anchor="middle" font-size="18" font-weight="bold" fill="#4338ca">Speech Recognition</text>
|
| 619 |
+
<text x="480" y="310" text-anchor="middle" font-size="14" fill="#3730a3">Speech-to-Text</text>
|
| 620 |
+
<text x="480" y="330" text-anchor="middle" font-size="14" fill="#3730a3">Transcription</text>
|
| 621 |
+
|
| 622 |
+
<!-- Arrow 5 (Speech Recognition to Feedback) -->
|
| 623 |
+
<path d="M380 300 L300 300" stroke="#6b7280" stroke-width="3" marker-end="url(#arrowhead)" fill="none"/>
|
| 624 |
+
<text x="340" y="290" text-anchor="middle" font-size="12" font-weight="bold" fill="#6b7280">TRANSCRIPT</text>
|
| 625 |
+
|
| 626 |
+
<rect x="100" y="250" width="200" height="100" rx="20" fill="#fef2f2" stroke="#ef4444" stroke-width="3"/>
|
| 627 |
+
<text x="200" y="285" text-anchor="middle" font-size="18" font-weight="bold" fill="#dc2626">Feedback System</text>
|
| 628 |
+
<text x="200" y="310" text-anchor="middle" font-size="14" fill="#b91c1c">Performance Analysis</text>
|
| 629 |
+
<text x="200" y="330" text-anchor="middle" font-size="14" fill="#b91c1c">Improvement Tips</text>
|
| 630 |
+
|
| 631 |
+
<!-- Arrow from Feedback to Report -->
|
| 632 |
+
<path d="M200 350 L200 450" stroke="#6b7280" stroke-width="3" marker-end="url(#arrowhead)" fill="none"/>
|
| 633 |
+
<text x="230" y="400" text-anchor="middle" font-size="12" font-weight="bold" fill="#6b7280">RESULTS</text>
|
| 634 |
+
|
| 635 |
+
<!-- Output Box -->
|
| 636 |
+
<rect x="100" y="450" width="200" height="80" rx="20" fill="#f0fdf4" stroke="#22c55e" stroke-width="3"/>
|
| 637 |
+
<text x="200" y="480" text-anchor="middle" font-size="18" font-weight="bold" fill="#16a34a">Student Report</text>
|
| 638 |
+
<text x="200" y="505" text-anchor="middle" font-size="14" fill="#15803d">Reading accuracy</text>
|
| 639 |
+
<text x="200" y="520" text-anchor="middle" font-size="14" fill="#15803d">& improvement areas</text>
|
| 640 |
+
|
| 641 |
+
<!-- Process Flow Indicators -->
|
| 642 |
+
<circle cx="760" cy="400" r="8" fill="#3b82f6"/>
|
| 643 |
+
<text x="780" y="370" font-size="12" font-weight="bold" fill="#3b82f6">ACTIVE LEARNING</text>
|
| 644 |
+
<text x="780" y="385" font-size="10" fill="#3b82f6">Student practices reading</text>
|
| 645 |
+
<text x="780" y="415" font-size="12" font-weight="bold" fill="#3b82f6">AI ASSESSMENT</text>
|
| 646 |
+
<text x="780" y="430" font-size="10" fill="#3b82f6">Real-time analysis & feedback</text>
|
| 647 |
+
|
| 648 |
+
<!-- Arrowhead Definition -->
|
| 649 |
+
<defs>
|
| 650 |
+
<marker id="arrowhead" markerWidth="12" markerHeight="7" refX="10" refY="3.5" orient="auto">
|
| 651 |
+
<polygon points="0 0, 12 3.5, 0 7" fill="#6b7280"/>
|
| 652 |
+
</marker>
|
| 653 |
+
</defs>
|
| 654 |
+
</svg>
|
| 655 |
+
</div>
|
| 656 |
+
""")
|
| 657 |
+
|
| 658 |
+
gr.Markdown("""
|
| 659 |
+
---
|
| 660 |
+
|
| 661 |
+
## π§ Key Components
|
| 662 |
+
|
| 663 |
+
- **User Input (UI Agent)**: Collects student details (name, grade, topic) via an intuitive interface.
|
| 664 |
+
- **Story Generator (LLM Agent)**: Utilizes advanced language models to craft personalized, engaging stories.
|
| 665 |
+
- **Audio Synthesis (TTS Agent)**: Converts text stories into natural-sounding speech for accurate pronunciation guidance.
|
| 666 |
+
- **Student Recording (Recording Agent)**: Captures student readings for analysis.
|
| 667 |
+
- **Speech Recognition (STT Agent)**: Transcribes recorded readings into text for comparison.
|
| 668 |
+
- **Text Comparison (Analysis Agent)**: Analyzes transcription accuracy, comparing student readings to the original text.
|
| 669 |
+
- **Feedback Generation (Feedback Agent)**: Creates detailed feedback reports, highlighting strengths and areas for improvement.
|
| 670 |
+
|
| 671 |
+
""")
|
| 672 |
+
|
| 673 |
+
gr.Markdown("""
|
| 674 |
+
<div style="text-align: center; margin-top: 30px; padding: 20px; background: white; border-radius: 12px; font-size: 0.96em; color: #6b7280;">
|
| 675 |
+
Built for reading practice with modern AI tools.
|
| 676 |
+
</div>
|
| 677 |
+
""")
|
| 678 |
+
|
| 679 |
+
def generate_story_and_setup_ui(name, grade, topic):
|
| 680 |
+
story_text, audio_btn_update, audio_player_update, passage_state = "", gr.update(interactive=False, visible=False), gr.update(value=None, visible=False), ""
|
| 681 |
+
res = generate_story_from_llm(name, grade, topic)
|
| 682 |
+
if res:
|
| 683 |
+
story_text, audio_btn_update, audio_player_update = res
|
| 684 |
+
if story_text and not any(err in story_text.lower() for err in ["error", "blocked", "couldn't", "api key not configured"]):
|
| 685 |
+
passage_state = story_text
|
| 686 |
+
return story_text, audio_btn_update, audio_player_update, passage_state
|
| 687 |
+
|
| 688 |
+
def assess_reading_with_analysis(original_passage_state, student_audio_path, progress=gr.Progress(track_tqdm=True)):
|
| 689 |
+
if not student_audio_path:
|
| 690 |
+
return (
|
| 691 |
+
"""
|
| 692 |
+
<div class="status-indicator">
|
| 693 |
+
<p style="margin: 0; font-weight: 500;">π€ Please record your reading first!</p>
|
| 694 |
+
</div>
|
| 695 |
+
""",
|
| 696 |
+
"π€ Please record your reading first!",
|
| 697 |
+
""
|
| 698 |
+
)
|
| 699 |
+
if not original_passage_state:
|
| 700 |
+
return (
|
| 701 |
+
"""
|
| 702 |
+
<div class="status-indicator">
|
| 703 |
+
<p style="margin: 0; font-weight: 500;">π Please generate a story first in the Story Creator tab.</p>
|
| 704 |
+
</div>
|
| 705 |
+
""",
|
| 706 |
+
"Please generate a story first in the Story Creator tab.",
|
| 707 |
+
""
|
| 708 |
+
)
|
| 709 |
+
|
| 710 |
+
# Immediate feedback that analysis is starting
|
| 711 |
+
progress(0.05, desc="Analysis starting...")
|
| 712 |
+
|
| 713 |
+
# Start transcription
|
| 714 |
+
progress(0.1, desc="Starting transcription...")
|
| 715 |
+
transcribed_text = speech_to_text_whisper_space(student_audio_path, progress=progress)
|
| 716 |
+
|
| 717 |
+
stt_errors = ["couldn't understand", "had trouble", "service isn't working", "service is busy", "didn't get any recording", "filepath type issue"]
|
| 718 |
+
if any(err in (transcribed_text or "").lower() for err in stt_errors):
|
| 719 |
+
return (
|
| 720 |
+
"""
|
| 721 |
+
<div class="status-indicator status-error">
|
| 722 |
+
<p style="margin: 0; font-weight: 500;">β Transcription Error</p>
|
| 723 |
+
<p style="margin: 5px 0 0 0; font-size: 13px;">Please try recording again</p>
|
| 724 |
+
</div>
|
| 725 |
+
""",
|
| 726 |
+
transcribed_text,
|
| 727 |
+
""
|
| 728 |
+
)
|
| 729 |
+
|
| 730 |
+
progress(0.6, desc="Analyzing your reading accuracy...")
|
| 731 |
+
feedback, highlighted_passage = compare_texts_for_feedback(original_passage_state, transcribed_text)
|
| 732 |
+
progress(1.0, desc="Assessment complete!")
|
| 733 |
+
|
| 734 |
+
analysis_msg = """
|
| 735 |
+
<div class="status-indicator status-success">
|
| 736 |
+
<p style="margin: 0; font-weight: 500;">β
Analysis Complete!</p>
|
| 737 |
+
<p style="margin: 5px 0 0 0; font-size: 13px;">Head over to the "Analysis & Feedback" tab to see your results! π―</p>
|
| 738 |
</div>
|
| 739 |
"""
|
| 740 |
+
return (analysis_msg, feedback, highlighted_passage)
|
| 741 |
+
|
| 742 |
+
def update_recording_status(audio_file):
|
| 743 |
+
if audio_file is not None:
|
| 744 |
+
return (
|
| 745 |
+
gr.update(value="""
|
| 746 |
+
<div class="status-indicator status-success">
|
| 747 |
+
<p style="margin: 0; font-weight: 500;">π Recording Complete!</p>
|
| 748 |
+
<p style="margin: 5px 0 0 0; font-size: 12px;">Ready for analysis</p>
|
| 749 |
+
</div>
|
| 750 |
+
"""),
|
| 751 |
+
gr.update(visible=True),
|
| 752 |
+
gr.update(visible=True),
|
| 753 |
+
gr.update(interactive=True)
|
| 754 |
+
)
|
| 755 |
+
else:
|
| 756 |
+
return (
|
| 757 |
+
gr.update(value="""
|
| 758 |
+
<div class="status-indicator">
|
| 759 |
+
<p style="margin: 0; font-weight: 500;">π€ Ready to Record</p>
|
| 760 |
+
<p style="margin: 5px 0 0 0; font-size: 12px;">Click microphone to start</p>
|
| 761 |
+
</div>
|
| 762 |
+
"""),
|
| 763 |
+
gr.update(visible=False),
|
| 764 |
+
gr.update(visible=False),
|
| 765 |
+
gr.update(interactive=False)
|
| 766 |
+
)
|
| 767 |
+
|
| 768 |
+
def clear_recording():
|
| 769 |
+
return (
|
| 770 |
+
None,
|
| 771 |
+
gr.update(value="""
|
| 772 |
+
<div class="status-indicator">
|
| 773 |
+
<p style="margin: 0; font-weight: 500;">π€ Ready to Record</p>
|
| 774 |
+
<p style="margin: 5px 0 0 0; font-size: 12px;">Click microphone to start</p>
|
| 775 |
+
</div>
|
| 776 |
+
"""),
|
| 777 |
+
gr.update(visible=False),
|
| 778 |
+
gr.update(visible=False),
|
| 779 |
+
gr.update(interactive=False),
|
| 780 |
+
"""
|
| 781 |
+
<div style="text-align: center; color: #6b7280;">
|
| 782 |
+
<h4>Analysis Results</h4>
|
| 783 |
+
<p>Your feedback will appear here.</p>
|
| 784 |
+
<div class="status-indicator">
|
| 785 |
+
<p style="margin: 0; font-size: 14px;">π‘ Record yourself reading to get started!</p>
|
| 786 |
+
</div>
|
| 787 |
+
</div>
|
| 788 |
+
""",
|
| 789 |
+
"""
|
| 790 |
+
<div style="text-align: center; color: #6b7280;">
|
| 791 |
+
<h4>Word-by-Word Analysis</h4>
|
| 792 |
+
<p>Get color-coded feedback below.</p>
|
| 793 |
+
<div class="status-indicator">
|
| 794 |
+
<p style="margin: 0; font-size: 14px;">π€ Complete a reading practice session to see your analysis!</p>
|
| 795 |
+
</div>
|
| 796 |
+
</div>
|
| 797 |
+
"""
|
| 798 |
+
)
|
| 799 |
|
| 800 |
+
def record_again_action():
|
| 801 |
+
return (
|
| 802 |
+
None,
|
| 803 |
+
gr.update(value="""
|
| 804 |
+
<div class="status-indicator status-warning">
|
| 805 |
+
<p style="margin: 0; font-weight: 500;">π Ready for Take 2!</p>
|
| 806 |
+
<p style="margin: 5px 0 0 0; font-size: 12px;">Click microphone to record again</p>
|
| 807 |
+
</div>
|
| 808 |
+
"""),
|
| 809 |
+
gr.update(visible=False),
|
| 810 |
+
gr.update(visible=False),
|
| 811 |
+
gr.update(interactive=False)
|
| 812 |
+
)
|
| 813 |
|
| 814 |
+
def handle_audio_generation(story_text, progress=gr.Progress(track_tqdm=True)):
|
| 815 |
+
"""Handle audio generation with visual progress indicator"""
|
| 816 |
+
if not story_text or not story_text.strip():
|
| 817 |
+
return (
|
| 818 |
+
gr.update(value=None, visible=True),
|
| 819 |
+
gr.update(value="", visible=False)
|
|
|
|
|
|
|
| 820 |
)
|
| 821 |
+
|
| 822 |
+
# Generate the audio file
|
| 823 |
+
audio_filepath = text_to_speech_using_space(story_text, progress)
|
| 824 |
+
print(f"AUDIO HANDLER: Received audio file path: {audio_filepath}")
|
| 825 |
+
|
| 826 |
+
if audio_filepath:
|
| 827 |
+
print(f"AUDIO HANDLER: Updating audio component with file: {audio_filepath}")
|
| 828 |
+
success_msg = """
|
| 829 |
+
<div style="background: #f0fdf4; border: 1px solid #22c55e; border-radius: 12px; padding: 12px 20px; margin: 8px 0; text-align: center;">
|
| 830 |
+
<span style="color: #15803d; font-weight: 500;">β
Audio ready! You can now listen to your story.</span>
|
| 831 |
+
</div>
|
| 832 |
+
"""
|
| 833 |
+
return (
|
| 834 |
+
gr.update(value=audio_filepath, visible=True),
|
| 835 |
+
gr.update(value=success_msg, visible=True)
|
| 836 |
+
)
|
| 837 |
+
else:
|
| 838 |
+
print("AUDIO HANDLER: No audio file received, returning None")
|
| 839 |
+
error_msg = """
|
| 840 |
+
<div style="background: #fef2f2; border: 1px solid #ef4444; border-radius: 12px; padding: 12px 20px; margin: 8px 0; text-align: center;">
|
| 841 |
+
<span style="color: #dc2626; font-weight: 500;">β Audio generation failed. Please try again.</span>
|
| 842 |
+
</div>
|
| 843 |
+
"""
|
| 844 |
+
return (
|
| 845 |
+
gr.update(value=None, visible=True),
|
| 846 |
+
gr.update(value=error_msg, visible=True)
|
| 847 |
+
)
|
| 848 |
+
|
| 849 |
+
def generate_story_and_audio_automatically(name, grade, topic, progress=gr.Progress(track_tqdm=True)):
|
| 850 |
+
"""Generate story and automatically create audio in one seamless flow"""
|
| 851 |
+
progress(0.0, desc="Starting story creation...")
|
| 852 |
+
|
| 853 |
+
# First generate the story using the existing function
|
| 854 |
+
story_result = generate_story_from_llm(name, grade, topic, progress)
|
| 855 |
+
if not story_result:
|
| 856 |
+
return "", gr.update(value=None, visible=True), ""
|
| 857 |
+
|
| 858 |
+
# Extract story text from the result tuple
|
| 859 |
+
story_text = story_result[0] if isinstance(story_result, tuple) else story_result
|
| 860 |
+
|
| 861 |
+
# Check if story generation was successful
|
| 862 |
+
if not story_text or any(err in story_text.lower() for err in ["error", "blocked", "couldn't", "api key not configured"]):
|
| 863 |
+
return story_text, gr.update(value=None, visible=True), ""
|
| 864 |
+
|
| 865 |
+
# Story generated successfully, now automatically generate audio
|
| 866 |
+
progress(0.5, desc="Story complete! Now generating audio...")
|
| 867 |
+
|
| 868 |
+
try:
|
| 869 |
+
# Generate audio automatically
|
| 870 |
+
audio_filepath = text_to_speech_using_space(story_text, progress)
|
| 871 |
+
|
| 872 |
+
if audio_filepath:
|
| 873 |
+
print(f"AUTO AUDIO: Successfully generated audio: {audio_filepath}")
|
| 874 |
+
return story_text, gr.update(value=audio_filepath, visible=True), story_text
|
| 875 |
+
else:
|
| 876 |
+
print("AUTO AUDIO: Audio generation failed, but story is still available")
|
| 877 |
+
return story_text, gr.update(value=None, visible=True), story_text
|
| 878 |
+
|
| 879 |
+
except Exception as e:
|
| 880 |
+
print(f"AUTO AUDIO ERROR: {e}")
|
| 881 |
+
return story_text, gr.update(value=None, visible=True), story_text
|
| 882 |
+
|
| 883 |
+
# Event handlers with automatic audio generation
|
| 884 |
+
gen_btn.click(
|
| 885 |
+
fn=generate_story_and_audio_automatically,
|
| 886 |
+
inputs=[s_name, s_grade, s_topic],
|
| 887 |
+
outputs=[passage_out, audio_out, original_passage_state],
|
| 888 |
+
show_progress=True
|
| 889 |
)
|
| 890 |
+
|
| 891 |
+
assess_btn.click(
|
| 892 |
+
fn=assess_reading_with_analysis,
|
| 893 |
+
inputs=[original_passage_state, stud_audio_in],
|
| 894 |
+
outputs=[analysis_status, feedback_out, highlighted_out],
|
| 895 |
+
show_progress=True
|
| 896 |
)
|
| 897 |
+
|
| 898 |
+
stud_audio_in.change(
|
| 899 |
+
fn=update_recording_status,
|
| 900 |
+
inputs=[stud_audio_in],
|
| 901 |
+
outputs=[recording_status, record_again_btn, clear_recording_btn, assess_btn]
|
| 902 |
+
)
|
| 903 |
+
|
| 904 |
+
record_again_btn.click(
|
| 905 |
+
fn=record_again_action,
|
| 906 |
+
outputs=[stud_audio_in, recording_status, record_again_btn, clear_recording_btn, assess_btn]
|
| 907 |
+
)
|
| 908 |
+
|
| 909 |
+
clear_recording_btn.click(
|
| 910 |
+
fn=clear_recording,
|
| 911 |
+
outputs=[stud_audio_in, recording_status, record_again_btn, clear_recording_btn, assess_btn, feedback_out, highlighted_out]
|
| 912 |
)
|
| 913 |
|
| 914 |
+
# Launch the application
|
| 915 |
if __name__ == "__main__":
|
| 916 |
+
app.launch(debug=True, share=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|