ReadRight

Sleeping

App Files Files Community

ParulPandey commited on Jun 7

Commit

f1bbb15

verified ·

1 Parent(s): 254209b

Update app.py

Browse files

Files changed (1) hide show

app.py +845 -314

app.py CHANGED Viewed

@@ -1,385 +1,916 @@
 import gradio as gr
 import os
 import difflib
-from gradio_client import Client, file as gradio_file # Renamed to avoid conflict
 import time
 import google.generativeai as genai
 # --- Configuration & Clients ---
-def configure_gemini_api():
-    """Configures the Google Gemini API with API key from Secrets or environment."""
     api_key = None
     try:
-        api_key = gr.Secrets.get("GOOGLE_API_KEY") # For Hugging Face Spaces
-    except AttributeError: # Running locally, gr.Secrets not available
-        api_key = os.environ.get("GOOGLE_API_KEY")
-    except FileNotFoundError: # gr.Secrets.get can raise this if no secrets file found
         api_key = os.environ.get("GOOGLE_API_KEY")
     if api_key:
         try:
             genai.configure(api_key=api_key)
-            print("Google Gemini API configured successfully.")
             return True
         except Exception as e:
-            print(f"Error configuring Gemini API: {e}")
-            return False
     else:
-        print("WARN: GOOGLE_API_KEY not found in Gradio Secrets or environment. Story generation with Gemini will be disabled.")
-        return False
-GEMINI_API_CONFIGURED = configure_gemini_api()
-# Initialize TTS Client (Using ESPnet VITS as an alternative to Bark)
 try:
-    tts_client = Client("espnet/kan-bayashi_ljspeech_vits")
-    print("ESPnet VITS TTS client initialized successfully.")
-    # --- IMPORTANT: For Debugging VITS API if issues persist ---
-    # print("--- ESPnet VITS TTS API Details (Uncomment to view) ---")
-    # print(tts_client.view_api(all_endpoints=True))
-    # print("----------------------------------------------------")
-    # For a more structured dictionary output:
-    # api_info_tts = tts_client.view_api(return_format="dict")
-    # import json
-    # print(json.dumps(api_info_tts, indent=2))
-    # --- End Debugging Section ---
 except Exception as e:
-    print(f"Fatal: Could not initialize ESPnet VITS TTS client: {e}. TTS will not work.")
     tts_client = None
-# Initialize STT Client for Whisper (abidlabs/whisper-large-v2)
 try:
     whisper_stt_client = Client("abidlabs/whisper-large-v2")
-    print("Whisper STT client initialized successfully.")
-    # --- For Debugging Whisper API ---
-    # print("--- Whisper STT API Details (Uncomment to view) ---")
-    # print(whisper_stt_client.view_api(all_endpoints=True))
-    # print("-------------------------------------------------")
-except Exception as e:
-    print(f"Fatal: Could not initialize Whisper STT client: {e}. STT will not work.")
-    whisper_stt_client = None
 # --- Helper Functions ---
-def generate_story_with_gemini(name, grade, topic):
-    if not GEMINI_API_CONFIGURED:
-        return "Google Gemini API key not configured. Story generation is disabled. 🔑"
     try:
-        model = genai.GenerativeModel(model_name="gemini-1.5-flash-latest") # Fast and capable
         prompt = (
-            f"You are a super friendly and imaginative storyteller for kids. "
-            f"Please write an exciting and fun short story (around 100-120 words) for a student named {name} who is in Grade {grade}. "
-            f"The story must be about '{topic}'. "
-            f"Use simple words and sentences that a Grade {grade} student can easily read aloud and understand. "
-            f"Make the story engaging and positive. Jump right into the story without any introduction like 'Here is a story for you'."
-        )
-        safety_settings = [
-            {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
-            {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
-            {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
-            {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
-        ]
-        generation_config = genai.types.GenerationConfig(
-            candidate_count=1, max_output_tokens=300, temperature=0.75
-        )
-        response = model.generate_content(
-            prompt, generation_config=generation_config, safety_settings=safety_settings
         )
         if response.candidates and response.candidates[0].content.parts:
             story = response.text
             if response.prompt_feedback and response.prompt_feedback.block_reason:
-                 return f"Oh dear! My story idea for '{topic}' was a bit too wild and got blocked (Reason: {response.prompt_feedback.block_reason}). Let's try a different topic! 😊"
-            if not story.strip():
-                return f"Hmm, Gemini gave me a blank page for '{topic}'. Let's try a different topic or try again! ✨"
-            return story.strip()
         else:
             if response.prompt_feedback and response.prompt_feedback.block_reason:
-                 return f"Oh dear! My story idea for '{topic}' was a bit too wild and got blocked (Reason: {response.prompt_feedback.block_reason}). Let's try a different topic! 😊"
-            print(f"Gemini API response issue: {response}")
-            return f"Hmm, Gemini's story magic seems to be on a little break for '{topic}'. Maybe try another topic? 🤔"
     except Exception as e:
-        print(f"Error generating story with Gemini: {e}")
-        if "API_KEY_INVALID" in str(e).lower() or "api key not valid" in str(e).lower():
-            return "Oops! The Google Gemini API key seems to be having a problem. Please tell the grown-ups to check it! 🔑"
-        return f"Oh no! 😟 I had a little trouble dreaming up a story with Gemini. Error: {e}"
-def text_to_speech_vits(text_to_speak):
     if not tts_client:
-        return "The VITS sound machine isn't working right now. 🛠️ Please tell the grown-ups!"
     try:
-        # Parameters for espnet/kan-bayashi_ljspeech_vits.
-        # YOU MUST VERIFY these with tts_client.view_api() if TTS fails.
-        # The fn_index (or api_name) and the order/names of parameters are critical.
-        job = tts_client.submit(
-            text_to_speak,  # text (str)
-            "EN",           # lang (str) - e.g., "EN" for English in this model
-            0,              # speaker_id (int | float) - usually 0 for LJSpeech default
-            0.667,          # noise_scale (float) - variance of Z
-            0.8,            # noise_scale_w (float) - variance of Z in duration
-            1.0,            # length_scale (float) - controls speed
-            fn_index=0      # ASSUMPTION: TTS is the first function (index 0).
-                            # If view_api() shows a different fn_index or an api_name like "/predict", use that.
-        )
-        # VITS is generally faster than Bark, but network can add delays
-        audio_filepath = job.result(timeout=90)
-        # This space typically returns just the audio filepath directly.
-        if isinstance(audio_filepath, str) and audio_filepath.endswith(('.wav', '.mp3', '.flac')):
             return audio_filepath
         else:
-            # Sometimes the result might be a tuple, e.g., (filepath, samplerate)
-            # Check the actual output structure from view_api() or by printing audio_filepath
-            print(f"Unexpected VITS TTS result format: {audio_filepath}")
-            if isinstance(audio_filepath, tuple) and len(audio_filepath) > 0 and isinstance(audio_filepath[0], str):
-                return audio_filepath[0] # Assume audio path is the first element if it's a tuple
-            return "Hmm, the sound from VITS came out a bit funny. 🤔"
     except Exception as e:
-        print(f"Error with VITS TTS (espnet/kan-bayashi_ljspeech_vits): {e}")
-        if "Queue full" in str(e).lower() or "too much pending traffic" in str(e).lower():
-            return "The VITS sound machine is busy! Please try again in a moment. 🕒"
-        # Provide more specific error if submit call itself failed due to wrong params
-        if "expected" in str(e).lower() and ("argument" in str(e).lower() or "parameter" in str(e).lower()):
-             return f"VITS TTS had a hiccup with parameters. (Details: {e}). Please check view_api() output."
-        return f"Oh dear, VITS couldn't make the sound. 🔇 Error: {e}"
-def speech_to_text_whisper_space(audio_filepath):
     if not whisper_stt_client:
-        return "The Whisper listening ears aren't working right now. 🛠️ Please tell the grown-ups!"
     if not audio_filepath:
-        return "Oops! I didn't get any recording to listen to. 🎤"
-    try:
-        # API for abidlabs/whisper-large-v2 usually takes audio, task, language.
-        job = whisper_stt_client.submit(
-            gradio_file(audio_filepath), # Use gradio_client.file to handle the upload
-            "transcribe",                # task
-            "English",                   # language (can be None for auto-detect)
-            api_name="/predict"          # This is common for abidlabs/whisper spaces
-        )
-        result_dict = job.result(timeout=120) # Wait up to 2 minutes
-        if isinstance(result_dict, dict) and 'text' in result_dict:
-            return result_dict['text']
-        elif isinstance(result_dict, str): # Fallback if it's simpler and returns text directly
-            return result_dict
-        else:
-            print(f"Unexpected Whisper STT result format: {result_dict}")
-            return "Hmm, I couldn't quite understand the words from Whisper. 🤔"
-    except Exception as e:
-        print(f"Error transcribing audio with Whisper Space: {e}")
-        if "Queue full" in str(e).lower() or "too much pending traffic" in str(e).lower():
-            return "The Whisper listening ears are super busy! 인기폭발! очередь! Please try again in a bit. 🕒"
-        return f"Oh no! Whisper had trouble hearing that. 🙉 Error: {e}"
 def clean_text_for_comparison(text):
     if not isinstance(text, str): return []
-    text = text.lower()
-    punctuation_to_remove = "!\"#$%&()*+,-./:;<=>?@[\\]^_`{|}~" # Keeps apostrophes for contractions
-    text = text.translate(str.maketrans('', '', punctuation_to_remove))
-    return text.split()
 def compare_texts_for_feedback(original_text, student_text):
-    original_words = clean_text_for_comparison(original_text)
-    student_words = clean_text_for_comparison(student_text)
-    if not student_words:
-        return "It sounds like you didn't record anything, or maybe it was super quiet! 🤫 Try recording again nice and clear!", ""
     matcher = difflib.SequenceMatcher(None, original_words, student_words, autojunk=False)
-    feedback_lines = []
-    highlighted_passage_parts = []
     for tag, i1, i2, j1, j2 in matcher.get_opcodes():
-        original_segment = original_words[i1:i2]
-        student_segment = student_words[j1:j2]
         if tag == 'equal':
-            highlighted_passage_parts.append(" ".join(original_segment))
-        elif tag == 'replace':
-            # Try to highlight word by word if segments are same length for better visual
-            if len(original_segment) == len(student_segment):
-                for i in range(len(original_segment)):
-                    o_word = original_segment[i]
-                    s_word = student_segment[i]
-                    feedback_lines.append(f"- You said: \"*{s_word}*\" instead of: \"**{o_word}**\"")
-                    highlighted_passage_parts.append(f"~~{o_word}~~ **{s_word}**")
-            else: # General replacement if segment lengths differ
-                feedback_lines.append(f"- Instead of: \"**{' '.join(original_segment)}**\", you said: \"*{' '.join(student_segment)}*\"")
-                highlighted_passage_parts.append(f"~~{' '.join(original_segment)}~~ **{' '.join(student_segment)}**")
-        elif tag == 'delete': # Student skipped words from original
-            feedback_lines.append(f"- You missed: \"**{' '.join(original_segment)}**\"")
-            highlighted_passage_parts.append(f"~~{' '.join(original_segment)}~~ (*skipped*)")
-        elif tag == 'insert': # Student added words not in original
-            feedback_lines.append(f"- You added: \"*{' '.join(student_segment)}*\" (which wasn't in the story)")
-            highlighted_passage_parts.append(f"(*added:* **{' '.join(student_segment)}**)")
-    final_highlighted_text = " ".join(highlighted_passage_parts)
     if not feedback_lines:
-        return "🎉🥳 WOOHOO! Amazing reading! You got all the words spot on! 🥳🎉", final_highlighted_text
     else:
-        feedback_summary = "Great try! Here are a few words to practice to make it even better:\n" + "\n".join(feedback_lines)
-        return feedback_summary, final_highlighted_text
-# --- Gradio UI Functions ---
-def generate_story_and_audio_for_ui(name, grade, topic, progress=gr.Progress(track_tqdm=True)):
-    if not name or not grade or not topic:
-        return "Oops! Please tell me your name, grade, and a fun topic first! 😊", None, gr.update(visible=False), ""
-    progress(0.1, desc="📖 Asking Gemini to dream up a cool story for you...")
-    story_text = generate_story_with_gemini(name, grade, topic)
-    gemini_error_keywords = ["Gemini API key not configured", "Oh no!", "Oops!", "Hmm,"]
-    if any(keyword in story_text for keyword in gemini_error_keywords) or not story_text.strip() :
-        return story_text, None, gr.update(visible=False), story_text # Keep recording area hidden
-    progress(0.5, desc="🎧 Warming up the VITS sound machine... (this should be quicker!)")
-    tts_audio_path = text_to_speech_vits(story_text) # Use VITS TTS
-    error_conditions_tts = [
-        "couldn't make the sound", "sound came out a bit funny", "sound machine isn't working",
-        "sound machine is busy", "VITS had a hiccup" # Check for VITS specific errors
-    ]
-    if any(err in (tts_audio_path or "") for err in error_conditions_tts):
-        return story_text, tts_audio_path, gr.update(visible=False), story_text # Keep recording hidden
-    progress(1.0, desc="✅ Story and sound are ready! Let's go!")
-    return (
-        story_text,
-        tts_audio_path,
-        gr.update(visible=True), # Show recording_assessment_area
-        story_text               # Pass story_text to gr.State
-    )
 def assess_student_reading_ui(original_passage_state, student_audio_path, progress=gr.Progress(track_tqdm=True)):
-    if not student_audio_path:
-        return "🎤 Whoops! Did you forget to record your awesome reading? Try again!", ""
-    if not original_passage_state: # Should not happen if UI flow is correct
-        return "Hmm, I lost the story! 😟 Please generate a new story first.", ""
-    progress(0.2, desc="👂 Whisper is listening carefully to your recording...")
-    transcribed_text = speech_to_text_whisper_space(student_audio_path)
-    error_conditions_stt = [
-        "couldn't understand the words", "had trouble hearing that", "listening ears aren't working",
-        "listening ears are super busy", "didn't get any recording"
-    ]
-    if any(err in (transcribed_text or "") for err in error_conditions_stt):
-        return transcribed_text, "" # Show STT error
-    progress(0.7, desc="🧠 Thinking about the words...")
-    feedback, highlighted_passage = compare_texts_for_feedback(original_passage_state, transcribed_text)
-    progress(1.0, desc="⭐ Feedback is ready!")
-    return feedback, highlighted_passage
-# --- Gradio Interface ---
 css = """
-body { font-family: 'Comic Sans MS', 'Chalkboard SE', 'Comic Neue', cursive; background-color: #F0F8FF; } /* AliceBlue background */
-.gr-button {
-    background-color: #FF69B4 !important; /* HotPink */
-    color: white !important;
     border-radius: 20px !important;
-    font-weight: bold !important;
-    border: 2px solid #FF1493 !important; /* DeepPink border */
-    box-shadow: 0px 3px 5px rgba(0,0,0,0.2) !important;
 }
-.gr-button:hover { background-color: #FF1493 !important; } /* DeepPink on hover */
-.gr-panel {
-    border-radius: 15px !important;
-    box-shadow: 5px 5px 15px rgba(0,0,0,0.1) !important;
-    background-color: #FFFACD !important; /* LemonChiffon panel background */
-    border: 2px dashed #FFD700 !important; /* Gold dashed border */
 }
-label, .gr-checkbox-label { color: #4B0082 !important; font-weight: bold !important; } /* Indigo */
-.gr-textbox, .gr-dropdown { border-radius: 10px !important; border: 1px solid #DDA0DD !important; } /* Plum border for inputs */
-#student_audio_input audio { background-color: #E6E6FA; border-radius: 10px; } /* Lavender for audio player */
-#feedback_output, #highlighted_passage_output {
-    background-color: #FFFFE0; /* LightYellow */
-    padding: 15px;
-    border-radius: 10px;
-    border: 1px solid #FAFAD2; /* LightGoldenrodYellow */
 }
 """
-# Using a theme that allows CSS to take more precedence
-with gr.Blocks(theme=gr.themes.Base(), css=css) as app: # theme=gr.themes.Soft() or gr.themes.Base()
-    gr.Markdown(
-        """
-        <div style="text-align: center; padding: 20px 0;">
-            <h1 style="color: #FF6347; font-size: 3em; text-shadow: 2px 2px #D3D3D3;">🌈🦄✨ AI Reading Buddy ✨🦄🌈</h1>
-            <p style="font-size: 1.3em; color: #483D8B;">Let's read a super fun story from Gemini and practice our words!</p>
         </div>
         """
-    )
-    original_passage_state = gr.State("") # To store the generated story
-    with gr.Row():
-        with gr.Column(scale=1):
-            gr.Markdown("### <span style='color:#DB7093;'>✏️ Tell Me About You!</span>")
-            student_name_input = gr.Textbox(label="👑 Your Awesome Name:", placeholder="E.g., Princess Lily")
-            student_grade_input = gr.Dropdown(
-                label="🧑‍🎓 Your Grade:",
-                choices=[f"{i}" for i in range(1, 11)], # Grades 1 to 10
-                value="3" # Default value
             )
-            topic_input = gr.Textbox(label="🚀 Story Topic Idea:", placeholder="E.g., brave little astronaut")
-            generate_button = gr.Button(value="🎈 Get My Gemini Story!")
-        with gr.Column(scale=2):
-            gr.Markdown("### <span style='color:#DB7093;'>📖 Your Special Story (from Gemini AI):</span>")
-            passage_output = gr.Textbox(label="Read this aloud:", lines=10, interactive=False)
-            gr.Markdown("### <span style='color:#DB7093;'>🔊 Listen to the Story:</span>")
-            audio_output = gr.Audio(label="Hear how it sounds (with VITS TTS)", type="filepath") # Label updated for VITS
-    gr.Markdown("<hr style='border:1px dashed #FFB6C1;'>") # LightPink dashed separator
-    with gr.Row(visible=False) as recording_assessment_area: # Initially hidden
-        with gr.Column(scale=1):
-            gr.Markdown("### <span style='color:#32CD32;'>🤩 Your Turn to Shine! 🤩</span>")
-            student_audio_input = gr.Audio(sources=["microphone"], type="filepath", label="🎤 Record yourself reading the story! Press the mic, then stop.", elem_id="student_audio_input")
-            assess_button = gr.Button(value="🧐 Check My Reading!", elem_id="assess_button")
-        with gr.Column(scale=2):
-            gr.Markdown("### <span style='color:#32CD32;'>💡 Word Detective Feedback:</span>")
-            feedback_output = gr.Markdown(value="Your amazing feedback will pop up here! ✨", elem_id="feedback_output")
-            highlighted_passage_output = gr.Markdown(value="See your reading journey here! 🗺️", elem_id="highlighted_passage_output")
-    generate_button.click(
-        fn=generate_story_and_audio_for_ui,
-        inputs=[student_name_input, student_grade_input, topic_input],
-        outputs=[
-            passage_output,
-            audio_output,
-            recording_assessment_area, # Directly control visibility of the row
-            original_passage_state
-        ]
     )
-    assess_button.click(
-        fn=assess_student_reading_ui,
-        inputs=[original_passage_state, student_audio_input],
-        outputs=[feedback_output, highlighted_passage_output]
     )
-    gr.Markdown(
-        """
-        ---
-        <div style="text-align: center; font-size: 0.9em; color: #555;">
-        Built with ❤️ for the Agentic Demo Track Hackathon! Tag: <code>agent-demo-track</code>
-        <br>Stories by Google Gemini, voices by ESPnet VITS @ HF, and listening by Whisper @ HF.
-        </div>
-        """
     )
-# --- Launching the App ---
 if __name__ == "__main__":
-    if not GEMINI_API_CONFIGURED:
-        print("🚨 GOOGLE_API_KEY not configured for local testing or failed to initialize!")
-        print("Please set it: export GOOGLE_API_KEY='your_key_here'")
-    if not tts_client:
-        print("🚨 ESPnet VITS TTS client (espnet/kan-bayashi_ljspeech_vits) could not be initialized. TTS will not work.")
-    if not whisper_stt_client:
-        print("🚨 Whisper STT client (abidlabs/whisper-large-v2) could not be initialized. STT will not work.")
-    app.launch(debug=True) # Set share=True for a temporary public link if running locally

+from dotenv import load_dotenv
+load_dotenv()
 import gradio as gr
 import os
 import difflib
+from gradio_client import Client
 import time
 import google.generativeai as genai
 # --- Configuration & Clients ---
+def configure_llm_api():
     api_key = None
     try:
+        api_key = gr.Secrets.get("GOOGLE_API_KEY")
+    except (AttributeError, FileNotFoundError):
         api_key = os.environ.get("GOOGLE_API_KEY")
     if api_key:
         try:
             genai.configure(api_key=api_key)
             return True
         except Exception as e:
+            print(f"Error configuring LLM (Gemini) API: {e}"); return False
     else:
+        print("WARN: LLM API Key (GOOGLE_API_KEY) not found."); return False
+LLM_API_CONFIGURED = configure_llm_api()
+# Initialize new TTS client
 try:
+    tts_client = Client("NihalGazi/Text-To-Speech-Unlimited")
+    print("✅ Connected to advanced TTS service (Text-To-Speech-Unlimited)")
 except Exception as e:
+    print(f"❌ Failed to connect to TTS service: {e}")
     tts_client = None
 try:
     whisper_stt_client = Client("abidlabs/whisper-large-v2")
+except Exception: whisper_stt_client = None
 # --- Helper Functions ---
+def generate_story_from_llm(name, grade_str, topic, progress=gr.Progress(track_tqdm=True)):
+    progress(0.0, desc="Starting story creation...")
+    default_passage_val = ""
+    default_audio_gen_update = gr.update(interactive=False, visible=False)
+    default_audio_player_update = gr.update(value=None, visible=False)
+    if not LLM_API_CONFIGURED:
+        progress(1.0, desc="Complete")
+        return "LLM API key not configured...", default_audio_gen_update, default_audio_player_update
     try:
+        if grade_str.startswith("Grade "):
+            grade = int(grade_str.replace("Grade ", ""))
+        else:
+            grade = int(grade_str)
+    except ValueError:
+        progress(1.0, desc="Complete")
+        return "Invalid grade level selected.", default_audio_gen_update, default_audio_player_update
+    if grade <= 2: word_target, max_llm_tokens = "around 40-60 words", 100
+    elif grade <= 5: word_target, max_llm_tokens = "around 80-100 words", 200
+    elif grade <= 8: word_target, max_llm_tokens = "around 100-120 words", 250
+    else: word_target, max_llm_tokens = "around 120-150 words", 300
+    progress(0.1, desc="Setting up AI story generator...")
+    story_text_result = default_passage_val
+    audio_gen_btn_update = default_audio_gen_update
+    try:
+        model = genai.GenerativeModel(model_name="gemini-1.5-flash-latest")
         prompt = (
+            f"You are an AI assistant that creates engaging short reading passages. "
+            f"Generate a story of {word_target} suitable for a student named {name} in Grade {grade}. "
+            f"The story topic is: '{topic}'. Use age-appropriate vocabulary for Grade {grade}. Ensure the story is interesting and easy to read aloud. "
+            f"Do not include any introductory or concluding phrases like 'Here is a story'."
         )
+        safety_settings = [{"category": c, "threshold": "BLOCK_MEDIUM_AND_ABOVE"} for c in [
+            "HARM_CATEGORY_HARASSMENT",
+            "HARM_CATEGORY_HATE_SPEECH",
+            "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+            "HARM_CATEGORY_DANGEROUS_CONTENT"
+        ]]
+        generation_config = genai.types.GenerationConfig(candidate_count=1, max_output_tokens=max_llm_tokens, temperature=0.7)
+        progress(0.3, desc="AI is writing your story...")
+        response = model.generate_content(prompt, generation_config=generation_config, safety_settings=safety_settings)
+        progress(0.8, desc="Polishing your story...")
         if response.candidates and response.candidates[0].content.parts:
             story = response.text
             if response.prompt_feedback and response.prompt_feedback.block_reason:
+                 story_text_result = f"Story idea for '{topic}' blocked (Reason: {response.prompt_feedback.block_reason}). Try a different topic. 😊"
+            elif not story.strip():
+                story_text_result = f"The LLM couldn't generate a story for '{topic}'. Try another topic or rephrase. ✨"
+            else:
+                story_text_result = story.strip()
+                audio_gen_btn_update = gr.update(interactive=True, visible=True)
         else:
             if response.prompt_feedback and response.prompt_feedback.block_reason:
+                 story_text_result = f"Story idea for '{topic}' got blocked (Reason: {response.prompt_feedback.block_reason}). Try a different topic. 😊"
+            else:
+                story_text_result = "Hmm, LLM had trouble with that topic. Maybe try another one? 🤔"
+        progress(1.0, desc="Story complete!")
+        return story_text_result, audio_gen_btn_update, default_audio_player_update
     except Exception as e:
+        progress(1.0, desc="Complete")
+        return f"Oh no! 😟 Error generating story. Details: {e}", default_audio_gen_update, default_audio_player_update
+def text_to_speech_using_space(text_to_speak, progress=gr.Progress(track_tqdm=True)):
+    global tts_client
+    progress(0.0, desc="🔊 Preparing voice synthesis...")
+    if not text_to_speak or not text_to_speak.strip():
+        progress(1.0, desc="Complete")
+        return None
+    progress(0.1, desc="🔊 Initializing audio generation...")
+    # Reconnect to TTS client if needed
     if not tts_client:
+        progress(0.2, desc="🔗 Connecting to advanced voice service...")
+        try:
+            tts_client = Client("NihalGazi/Text-To-Speech-Unlimited")
+            progress(0.3, desc="🔗 Connected to voice service...")
+        except Exception as e:
+            print(f"Failed to connect to TTS service: {e}")
+            progress(1.0, desc="Complete")
+            return None
+    if not tts_client:
+        progress(1.0, desc="Complete")
+        return None
+    progress(0.4, desc="🎙️ AI is reading your story aloud...")
     try:
+        # Try the correct API configuration with emotion parameter
+        api_methods = [
+            {"params": [text_to_speak, "alloy", "happy"], "api_name": "/text_to_speech_app"},
+            {"params": [text_to_speak, "alloy", "neutral"], "api_name": "/text_to_speech_app"},
+            {"params": [text_to_speak, "nova", "neutral"], "api_name": "/text_to_speech_app"},
+            {"params": [text_to_speak], "api_name": "/predict"}
+        ]
+        audio_filepath = None
+        for method in api_methods:
+            try:
+                print(f"Trying TTS with params: {method['params']} and api_name: {method['api_name']}")
+                audio_result = tts_client.predict(
+                    *method["params"],
+                    api_name=method["api_name"]
+                )
+                print(f"TTS result type: {type(audio_result)}, content: {audio_result}")
+                # Extract audio file path from result
+                if isinstance(audio_result, tuple) and len(audio_result) > 0:
+                    audio_filepath = audio_result[0]
+                elif isinstance(audio_result, str) and audio_result.endswith(('.wav', '.mp3', '.flac', '.m4a')):
+                    audio_filepath = audio_result
+                elif isinstance(audio_result, list) and len(audio_result) > 0:
+                    audio_filepath = audio_result[0]
+                if audio_filepath:
+                    print(f"Successfully generated audio: {audio_filepath}")
+                    break
+            except Exception as method_error:
+                print(f"TTS method failed: {method_error}")
+                continue
+        if audio_filepath:
+            progress(0.9, desc="🎵 Voice generation complete!")
+            progress(1.0, desc="🔊 Audio ready!")
+            print(f"FINAL: Returning audio file path: {audio_filepath}")
             return audio_filepath
         else:
+            print("All TTS methods failed, trying to reconnect...")
+            raise Exception("All API methods failed")
     except Exception as e:
+        print(f"TTS error: {e}")
+        # Try to reconnect on error
+        try:
+            progress(0.6, desc="🔄 Reconnecting to voice service...")
+            tts_client = Client("NihalGazi/Text-To-Speech-Unlimited")
+            if tts_client:
+                progress(0.8, desc="🎙️ Retrying voice generation...")
+                # Try the most basic approach with emotion parameter
+                audio_result = tts_client.predict(
+                    text_to_speak,
+                    "alloy",  # voice
+                    "neutral",  # emotion
+                    api_name="/text_to_speech_app"
+                )
+                print(f"Retry result: {type(audio_result)}, {audio_result}")
+                audio_filepath = None
+                if isinstance(audio_result, tuple) and len(audio_result) > 0:
+                    audio_filepath = audio_result[0]
+                elif isinstance(audio_result, str) and audio_result.endswith(('.wav', '.mp3', '.flac', '.m4a')):
+                    audio_filepath = audio_result
+                elif isinstance(audio_result, list) and len(audio_result) > 0:
+                    audio_filepath = audio_result[0]
+                if audio_filepath:
+                    progress(1.0, desc="🔊 Audio ready!")
+                    print(f"RETRY SUCCESS: Returning audio file path: {audio_filepath}")
+                    return audio_filepath
+        except Exception as retry_error:
+            print(f"TTS retry failed: {retry_error}")
+            pass
+        progress(1.0, desc="Audio generation failed")
+        print("TTS failed completely - returning None")
+        return None
+def speech_to_text_whisper_space(audio_filepath, progress=gr.Progress(track_tqdm=True), max_retries=3):
+    progress(0.1, desc="Sending your reading for transcription...")
     if not whisper_stt_client:
+        progress(1.0, desc="Complete")
+        return "Speech-to-text service is not available. 🛠️"
     if not audio_filepath:
+        progress(1.0, desc="Complete")
+        return "No recording received for transcription. 🎤"
+    for attempt in range(max_retries):
+        try:
+            progress(0.2 + (attempt * 0.1), desc=f"Transcribing your voice (Whisper) - Attempt {attempt + 1}...")
+            result = whisper_stt_client.predict(audio_filepath, api_name="/predict")
+            progress(0.9, desc="Transcription complete.")
+            if isinstance(result, tuple) and len(result) > 0:
+                transcribed_text = result[0] if result[0] else ""
+            elif isinstance(result, list) and len(result) > 0:
+                transcribed_text = result[0] if result[0] else ""
+            elif isinstance(result, str):
+                transcribed_text = result
+            else:
+                progress(1.0, desc="Complete")
+                return "Hmm, STT service returned unexpected format. 🤔"
+            progress(1.0, desc="Transcription complete!")
+            return transcribed_text if transcribed_text else "No speech detected in the recording. 🤫"
+        except Exception:
+            continue
+    progress(1.0, desc="Complete")
+    return "Unexpected error during transcription. Please try again! 🔄"
 def clean_text_for_comparison(text):
     if not isinstance(text, str): return []
+    text = text.lower(); punctuation_to_remove = "!\"#$%&()*+,-./:;<=>?@[\\]^_`{|}~"
+    text = text.translate(str.maketrans('', '', punctuation_to_remove)); return text.split()
 def compare_texts_for_feedback(original_text, student_text):
+    original_words, student_words = clean_text_for_comparison(original_text), clean_text_for_comparison(student_text)
+    if not student_words: return "It sounds like you didn't record or it was very quiet! 🤫 Try recording again nice and clear!", ""
     matcher = difflib.SequenceMatcher(None, original_words, student_words, autojunk=False)
+    feedback_lines, highlighted_parts = [], []
+    word_diff_count = 0
+    pronunciation_tips = []
+    pronunciation_guide = {
+        'the': 'thuh or thee', 'through': 'threw', 'though': 'thoh', 'thought': 'thawt',
+        'knight': 'night', 'know': 'noh', 'write': 'right', 'wrong': 'rawng', 'what': 'wot',
+        'where': 'wair', 'when': 'wen', 'why': 'wy', 'who': 'hoo', 'laugh': 'laff',
+        'enough': 'ee-nuff', 'cough': 'koff', 'rough': 'ruff', 'tough': 'tuff', 'magic': 'maj-ik',
+        'school': 'skool', 'friend': 'frend', 'said': 'sed', 'says': 'sez', 'once': 'wunts',
+        'was': 'wuz', 'were': 'wur', 'you': 'yoo', 'your': 'yor', 'there': 'thair', 'their': 'thair', 'they': 'thay'
+    }
+    def get_pronunciation_tip(word):
+        word_lower = word.lower()
+        if word_lower in pronunciation_guide:
+            return f"🗣️ Try saying: \"{pronunciation_guide[word_lower]}\""
+        elif len(word) > 6:
+            syllables = []
+            vowels = 'aeiou'
+            current_syllable = ''
+            for i, char in enumerate(word_lower):
+                current_syllable += char
+                if char in vowels and i < len(word_lower) - 1:
+                    if word_lower[i + 1] not in vowels:
+                        syllables.append(current_syllable)
+                        current_syllable = ''
+            if current_syllable: syllables.append(current_syllable)
+            if len(syllables) > 1: return f"🔤 Break it down: \"{'-'.join(syllables)}\""
+        if word_lower.endswith('tion'): return "🗣️ Words ending in '-tion' sound like 'shun'"
+        elif word_lower.endswith('ough'): return "🗣️ '-ough' can be tricky! Listen to the audio again"
+        elif 'gh' in word_lower: return "🗣️ 'gh' is often silent or sounds like 'f'"
+        elif word_lower.startswith('wr'): return "🗣️ In 'wr-' words, the 'w' is silent"
+        elif word_lower.startswith('kn'): return "🗣️ In 'kn-' words, the 'k' is silent"
+        return f"🎯 Focus on each sound in \"{word}\""
     for tag, i1, i2, j1, j2 in matcher.get_opcodes():
+        orig_seg_words, stud_seg_words = original_words[i1:i2], student_words[j1:j2]
+        orig_seg_text, stud_seg_text = " ".join(orig_seg_words), " ".join(stud_seg_words)
         if tag == 'equal':
+            highlighted_parts.append(f'<span style="background: #90EE90; padding: 2px 4px; border-radius: 4px; margin: 1px;">{orig_seg_text}</span>')
+        else:
+            word_diff_count += max(len(orig_seg_words), len(stud_seg_words))
+            if tag == 'replace':
+                for orig_word, stud_word in zip(orig_seg_words, stud_seg_words):
+                    if orig_word != stud_word:
+                        tip = get_pronunciation_tip(orig_word)
+                        pronunciation_tips.append(f"**{orig_word.upper()}**: {tip}")
+                feedback_lines.append(f"🔄 Instead of: \"{orig_seg_text}\", you said: \"{stud_seg_text}\"")
+                highlighted_parts.append(f'<span style="background: #FFE4B5; padding: 2px 4px; border-radius: 4px; margin: 1px; text-decoration: line-through;">{orig_seg_text}</span> <span style="background: #FFB6C1; padding: 2px 4px; border-radius: 4px; margin: 1px; font-weight: bold;">{stud_seg_text}</span>')
+            elif tag == 'delete':
+                for missed_word in orig_seg_words:
+                    tip = get_pronunciation_tip(missed_word)
+                    pronunciation_tips.append(f"**{missed_word.upper()}** (missed): {tip}")
+                feedback_lines.append(f"⏭️ You missed: \"{orig_seg_text}\"")
+                highlighted_parts.append(f'<span style="background: #FFA0B4; padding: 2px 4px; border-radius: 4px; margin: 1px; text-decoration: line-through;">{orig_seg_text}</span> <span style="font-style: italic; color: #666;">(*skipped*)</span>')
+            elif tag == 'insert':
+                feedback_lines.append(f"➕ You added: \"{stud_seg_text}\" (not in original)")
+                highlighted_parts.append(f'<span style="background: #DDA0DD; padding: 2px 4px; border-radius: 4px; margin: 1px; font-style: italic;">(*added:* {stud_seg_text})</span>')
+    final_text = " ".join(highlighted_parts)
     if not feedback_lines:
+        feedback_html = """
+🎉🥳 **PERFECT READING!** 🥳🎉
+Amazing! You read every single word correctly! 🌟
+🏆 **Reading Champion!** 🏆
+        """
+        return feedback_html, final_text
     else:
+        feedback_parts = [
+            f"📈 **Reading Progress Report**",
+            f"📊 **Words to practice:** {word_diff_count}",
+            f"💪 **Keep improving!** Practice makes perfect!",
+            "",
+            "🔍 **What to work on:**"
+        ]
+        for line in feedback_lines: feedback_parts.append(f"• {line}")
+        if pronunciation_tips:
+            feedback_parts.extend([
+                "",
+                "🎤 **Pronunciation Helper**",
+                "Here's how to say the tricky words:"
+            ])
+            for tip in pronunciation_tips[:5]: feedback_parts.append(f"• {tip}")
+            feedback_parts.extend([
+                "",
+                "💡 **Pro tip:** Listen to the story audio again and pay special attention to these words!"
+            ])
+        feedback_parts.extend([
+            "",
+            "🎯 **Practice Suggestions**",
+            "• 🎧 Listen to the AI reading first",
+            "• 🔤 Practice saying difficult words slowly",
+            "• 📖 Read the story again at your own pace",
+            "• 🔄 Try recording again when you're ready!"
+        ])
+        feedback_html = "\n".join(feedback_parts)
+        return feedback_html, final_text
 def assess_student_reading_ui(original_passage_state, student_audio_path, progress=gr.Progress(track_tqdm=True)):
+    if not student_audio_path: return "🎤 Please record your reading first!", ""
+    if not original_passage_state: return "Hmm, the original story is missing. 😟 Please generate a story first.", ""
+    transcribed_text = speech_to_text_whisper_space(student_audio_path, progress=progress)
+    stt_errors = ["couldn't understand", "had trouble", "service isn't working", "service is busy", "didn't get any recording", "filepath type issue"]
+    if any(err in (transcribed_text or "").lower() for err in stt_errors): return transcribed_text, ""
+    progress(0.6, desc="Analyzing your reading accuracy..."); feedback, highlighted_passage = compare_texts_for_feedback(original_passage_state, transcribed_text)
+    progress(1.0, desc="Assessment complete!"); return feedback, highlighted_passage
 css = """
+body, .gradio-container {
+    background: #f9fafb !important;
+    font-family: -apple-system, BlinkMacSystemFont, 'San Francisco', 'Segoe UI', 'Roboto', Arial, sans-serif !important;
+}
+.main-header {
+    background: white !important;
     border-radius: 20px !important;
+    box-shadow: 0 8px 32px 0 rgba(60,60,90,0.06) !important;
+    padding: 36px 20px 24px 20px !important;
+    margin-bottom: 28px !important;
+    text-align: center;
+    border: none !important;
 }
+.main-header h1 {font-size: 2.2rem !important; font-weight: 700 !important; color: #23232b !important;}
+.main-header p {color: #6b7280 !important; font-size: 1.08rem !important; margin-bottom: 8px !important;}
+.tech-badge {background: #e0e7ef !important; color: #4f8fff !important; border-radius: 12px !important; padding: 4px 12px !important; font-size: 12px !important; font-weight: 600 !important;}
+.gr-block, .gr-panel {background: white !important; border-radius: 18px !important; box-shadow: 0 2px 8px 0 rgba(60,60,90,0.07) !important; border: none !important; padding: 28px 22px !important;}
+.section-header {background: transparent !important; border: none !important; padding: 0 !important; margin-bottom: 16px !important;}
+.section-header h3 {color: #1e293b !important; font-size: 1.14rem !important; font-weight: 600 !important;}
+.section-header p {color: #8691a2 !important; font-size: 13px !important;}
+/* Enhanced button styles with click feedback */
+.gr-button {
+    background: linear-gradient(90deg, #007AFF, #2689ff) !important;
+    color: white !important;
+    border-radius: 18px !important;
+    font-weight: 600 !important;
+    border: none !important;
+    box-shadow: 0 1px 4px rgba(0, 123, 255, 0.04) !important;
+    padding: 9px 22px !important;
+    font-size: 16px !important;
+    transition: all 0.15s cubic-bezier(0.4,0.0,0.2,1) !important;
+    transform: translateY(0) !important;
 }
+.gr-button:hover {
+    background: linear-gradient(90deg, #2689ff, #007AFF) !important;
+    box-shadow: 0 4px 12px rgba(0, 123, 255, 0.15) !important;
+    transform: translateY(-1px) !important;
+}
+.gr-button:active {
+    background: linear-gradient(90deg, #0056CC, #1F5FFF) !important;
+    box-shadow: 0 1px 3px rgba(0, 123, 255, 0.25) !important;
+    transform: translateY(1px) !important;
+    transition: all 0.1s cubic-bezier(0.4,0.0,0.2,1) !important;
+}
+.gr-button[variant="secondary"] {
+    background: linear-gradient(90deg, #e0e7ef, #dde5f2) !important;
+    color: #2a3140 !important;
+    transition: all 0.15s cubic-bezier(0.4,0.0,0.2,1) !important;
+    transform: translateY(0) !important;
+}
+.gr-button[variant="secondary"]:hover {
+    background: linear-gradient(90deg, #dde5f2, #e0e7ef) !important;
+    box-shadow: 0 4px 12px rgba(0, 0, 0, 0.08) !important;
+    transform: translateY(-1px) !important;
+}
+.gr-button[variant="secondary"]:active {
+    background: linear-gradient(90deg, #d1d9e0, #c9d1db) !important;
+    box-shadow: 0 1px 3px rgba(0, 0, 0, 0.15) !important;
+    transform: translateY(1px) !important;
+    transition: all 0.1s cubic-bezier(0.4,0.0,0.2,1) !important;
+}
+/* Processing state for buttons */
+.gr-button.processing {
+    background: linear-gradient(90deg, #94a3b8, #cbd5e1) !important;
+    color: #64748b !important;
+    cursor: wait !important;
+    transform: translateY(0) !important;
+    box-shadow: inset 0 2px 4px rgba(0, 0, 0, 0.1) !important;
+}
+label {color: #374151 !important; font-weight: 600 !important; font-size: 15px !important;}
+.gr-textbox, .gr-dropdown {border-radius: 12px !important; border: 1.5px solid #dbeafe !important; background: #f6f8fb !important; font-size: 16px !important; padding: 10px 14px !important;}
+.gr-textbox:focus, .gr-dropdown:focus {border-color: #007AFF !important; box-shadow: 0 0 0 2px rgba(0, 122, 255, 0.10) !important; outline: none !important;}
+.gr-audio {background: #f9fafb !important; border-radius: 16px !important; border: 1.5px solid #e5e7eb !important; padding: 18px !important;}
+.feedback-container {background: #f4f7fa !important; border-radius: 18px !important; padding: 18px 24px !important;}
+/* Spinner animation for progress indicators */
+@keyframes spin {
+    0% { transform: rotate(0deg); }
+    100% { transform: rotate(360deg); }
+}
+/* Pulse animation for loading states */
+@keyframes pulse {
+    0%, 100% { opacity: 1; }
+    50% { opacity: 0.7; }
+}
+.loading-pulse {
+    animation: pulse 1.5s ease-in-out infinite;
 }
 """
+with gr.Blocks(theme=gr.themes.Soft(), css=css, title="ReadRight") as app:
+    gr.Markdown("""
+    <div class="main-header">
+        <h1>📚 ReadRight</h1>
+        <p>AI-powered reading practice and pronunciation feedback for students</p>
+    </div>
+    """)
+    original_passage_state = gr.State("")
+    with gr.Tabs():
+        with gr.TabItem("📖 Practice & Generate", elem_id="main_tab"):
+            with gr.Row(equal_height=True):
+                with gr.Column(scale=1, variant="panel"):
+                    gr.Markdown("""
+                        <div class="section-header">
+                            <h3>📝 Story & Reading</h3>
+                            <p>Enter details, get your story, generate audio, and record yourself—all in one flow.</p>
+                        </div>
+                    """)
+                    s_name = gr.Textbox(label="👤 Your Name", placeholder="Enter your name")
+                    s_grade = gr.Dropdown(label="🎓 Grade Level", choices=[f"Grade {i}" for i in range(1, 11)], value="Grade 3")
+                    s_topic = gr.Textbox(label="💡 Story Topic", placeholder="E.g., space, animals, friendship")
+                    gen_btn = gr.Button("✨ Generate Story", variant="primary")
+                    passage_out = gr.Textbox(label="📖 Story", lines=8, interactive=False, placeholder="Your story appears here...")
+                    audio_out = gr.Audio(label="🎵 Story Audio", type="filepath", visible=True, autoplay=False)
+                    gr.Markdown("""
+                        <div style="margin: 20px 0 0 0; padding: 10px 20px; background: #f4f7fa; border-radius: 16px;">
+                            <b>➡️ Next:</b> Record yourself reading below for feedback.
+                        </div>
+                    """)
+                    stud_audio_in = gr.Audio(sources=["microphone"], type="filepath", label="🎤 Your Recording")
+                    record_again_btn = gr.Button("🔄 Record Again", variant="secondary", size="sm", visible=False)
+                    clear_recording_btn = gr.Button("🗑️ Clear", variant="secondary", size="sm", visible=False)
+                    assess_btn = gr.Button("🔍 Analyze Reading", variant="primary", size="lg", interactive=False)
+                    recording_status = gr.Markdown("", elem_id="recording_status")
+                    analysis_status = gr.Markdown("", elem_id="analysis_status")
+        with gr.TabItem("📊 Analysis & Feedback", elem_id="analysis_tab"):
+            gr.Markdown("""
+                <div class="section-header">
+                    <h3>📈 Analysis</h3>
+                    <p>See your performance and areas to improve</p>
+                </div>
+            """)
+            feedback_out = gr.Markdown(
+                value="""
+                <div style="text-align: center; color: #6b7280;">
+                    <h4>Analysis Results</h4>
+                    <p>Your feedback will appear here.</p>
+                </div>
+                """,
+                elem_id="feedback_output"
+            )
+            highlighted_out = gr.Markdown(
+                value="""
+                <div style="text-align: center; color: #6b7280;">
+                    <h4>Word-by-Word Analysis</h4>
+                    <p>Get color-coded feedback below.</p>
+                </div>
+                """,
+                elem_id="highlighted_passage_output"
+            )
+            gr.Markdown("""
+            <div style="background: #f7fafc; border-radius: 16px; padding: 16px 12px 10px 12px; margin: 22px 0 18px 0; box-shadow: 0 2px 6px 0 rgba(60,60,90,0.04);">
+                <b>Color code:</b>
+                <div style="display: flex; gap: 14px; flex-wrap: wrap; margin-top: 8px;">
+                    <span style="background: #90EE90; padding: 5px 14px; border-radius: 12px; font-size: 13px; font-weight: 500; color: #155724;">Perfect Match</span>
+                    <span style="background: #FFE4B5; padding: 5px 14px; border-radius: 12px; font-size: 13px; font-weight: 500; color: #856404;">Substitution</span>
+                    <span style="background: #FFA0B4; padding: 5px 14px; border-radius: 12px; font-size: 13px; font-weight: 500; color: #721c24;">Skipped Word</span>
+                    <span style="background: #DDA0DD; padding: 5px 14px; border-radius: 12px; font-size: 13px; font-weight: 500; color: #5f006a;">Extra Word</span>
+                </div>
+            </div>
+            """)
+            gr.Markdown("""
+                <div style="margin: 14px 0; padding: 14px 22px; background: #f8fafc; border-radius: 14px;">
+                    <span style="color: #0a58ca; font-weight: 500;">Goals:</span>
+                    <ul style="margin: 7px 0 0 18px; color: #6b7280;">
+                        <li>Word accuracy above 90%</li>
+                        <li>Speak clearly and with confidence</li>
+                        <li>Practice as much as you like</li>
+                    </ul>
+                </div>
+            """)
+        with gr.TabItem("ℹ️ About & How It Works", elem_id="about_tab"):
+            gr.Markdown("""
+                <div class="section-header">
+                    <h3>🔧 How ReadRight Works</h3>
+                    <p>Understanding the technology behind your ReadRight</p>
+                </div>
+            """)
+            gr.Markdown("""
+            ## 🎯 What This Platform Does
+            ReadRight is an AI-powered tool designed to help students improve their reading skills through:
+            - **✨ Personalized Story Generation**: Creates age-appropriate reading passages tailored to your grade level and interests
+            - **🔊 Audio Pronunciation Models**: Provides clear audio examples of proper pronunciation
+            - **⚡ Real-time Speech Analysis**: Analyzes your reading accuracy and identifies areas for improvement
+            - **🎯 Detailed Feedback**: Offers specific pronunciation tips and practice suggestions
+            ## 🏗️ Reading Practice Application Workflow
+            """)
+            # Use HTML component for the SVG
+            gr.HTML("""
+            <div style="width: 100%; overflow-x: auto; padding: 20px 0;">
+                <svg width="1400" height="700" xmlns="http://www.w3.org/2000/svg" style="max-width: 100%; height: auto;">
+                  <!-- Background -->
+                  <rect width="1400" height="600" fill="#fafafa"/>
+                  <!-- Title -->
+                  <text x="700" y="30" text-anchor="middle" font-size="24" font-weight="bold" fill="#1f2937">Reading Practice Application Workflow</text>
+                  <!-- Top Row - Input to Audio -->
+                  <rect x="100" y="80" width="200" height="100" rx="20" fill="#dbeafe" stroke="#2563eb" stroke-width="3"/>
+                  <text x="200" y="115" text-anchor="middle" font-size="18" font-weight="bold" fill="#1e40af">User Input</text>
+                  <text x="200" y="140" text-anchor="middle" font-size="14" fill="#3730a3">Student Name</text>
+                  <text x="200" y="160" text-anchor="middle" font-size="14" fill="#3730a3">Grade Level & Topic</text>
+                  <!-- Arrow 1 -->
+                  <path d="M300 130 L380 130" stroke="#6b7280" stroke-width="3" marker-end="url(#arrowhead)" fill="none"/>
+                  <text x="340" y="120" text-anchor="middle" font-size="12" font-weight="bold" fill="#6b7280">INPUT</text>
+                  <rect x="380" y="80" width="200" height="100" rx="20" fill="#dcfce7" stroke="#16a34a" stroke-width="3"/>
+                  <text x="480" y="115" text-anchor="middle" font-size="18" font-weight="bold" fill="#15803d">Story Generator</text>
+                  <text x="480" y="140" text-anchor="middle" font-size="14" fill="#166534">AI creates personalized</text>
+                  <text x="480" y="160" text-anchor="middle" font-size="14" fill="#166534">reading story</text>
+                  <!-- Arrow 2 -->
+                  <path d="M580 130 L660 130" stroke="#6b7280" stroke-width="3" marker-end="url(#arrowhead)" fill="none"/>
+                  <text x="620" y="120" text-anchor="middle" font-size="12" font-weight="bold" fill="#6b7280">STORY</text>
+                  <rect x="660" y="80" width="200" height="100" rx="20" fill="#fef3c7" stroke="#d97706" stroke-width="3"/>
+                  <text x="760" y="115" text-anchor="middle" font-size="18" font-weight="bold" fill="#b45309">Audio Synthesis</text>
+                  <text x="760" y="140" text-anchor="middle" font-size="14" fill="#92400e">Text-to-Speech</text>
+                  <text x="760" y="160" text-anchor="middle" font-size="14" fill="#92400e">Audio Generation</text>
+                  <!-- Arrow 3 -->
+                  <path d="M860 130 L960 130" stroke="#6b7280" stroke-width="3" marker-end="url(#arrowhead)" fill="none"/>
+                  <text x="910" y="120" text-anchor="middle" font-size="12" font-weight="bold" fill="#6b7280">AUDIO</text>
+                  <rect x="960" y="80" width="200" height="100" rx="20" fill="#f3e8ff" stroke="#9333ea" stroke-width="3"/>
+                  <text x="1060" y="115" text-anchor="middle" font-size="18" font-weight="bold" fill="#7c3aed">Text Comparison</text>
+                  <text x="1060" y="140" text-anchor="middle" font-size="14" fill="#6b21a8">Analysis Engine</text>
+                  <text x="1060" y="160" text-anchor="middle" font-size="14" fill="#6b21a8">Accuracy Detection</text>
+                  <!-- Vertical Flow Arrow (Audio to Student Recording) -->
+                  <path d="M760 180 L760 250" stroke="#6b7280" stroke-width="3" marker-end="url(#arrowhead)" fill="none"/>
+                  <text x="790" y="220" text-anchor="middle" font-size="12" font-weight="bold" fill="#6b7280">STUDENT LISTENS</text>
+                  <!-- Bottom Row - Student Practice to Feedback -->
+                  <rect x="660" y="250" width="200" height="100" rx="20" fill="#fce7f3" stroke="#ec4899" stroke-width="3"/>
+                  <text x="760" y="285" text-anchor="middle" font-size="18" font-weight="bold" fill="#be185d">Student Recording</text>
+                  <text x="760" y="310" text-anchor="middle" font-size="14" fill="#9d174d">Student reads</text>
+                  <text x="760" y="330" text-anchor="middle" font-size="14" fill="#9d174d">story aloud</text>
+                  <!-- Arrow 4 (Student Recording to Speech Recognition) -->
+                  <path d="M660 300 L580 300" stroke="#6b7280" stroke-width="3" marker-end="url(#arrowhead)" fill="none"/>
+                  <text x="620" y="290" text-anchor="middle" font-size="12" font-weight="bold" fill="#6b7280">RECORDING</text>
+                  <rect x="380" y="250" width="200" height="100" rx="20" fill="#e0e7ff" stroke="#6366f1" stroke-width="3"/>
+                  <text x="480" y="285" text-anchor="middle" font-size="18" font-weight="bold" fill="#4338ca">Speech Recognition</text>
+                  <text x="480" y="310" text-anchor="middle" font-size="14" fill="#3730a3">Speech-to-Text</text>
+                  <text x="480" y="330" text-anchor="middle" font-size="14" fill="#3730a3">Transcription</text>
+                  <!-- Arrow 5 (Speech Recognition to Feedback) -->
+                  <path d="M380 300 L300 300" stroke="#6b7280" stroke-width="3" marker-end="url(#arrowhead)" fill="none"/>
+                  <text x="340" y="290" text-anchor="middle" font-size="12" font-weight="bold" fill="#6b7280">TRANSCRIPT</text>
+                  <rect x="100" y="250" width="200" height="100" rx="20" fill="#fef2f2" stroke="#ef4444" stroke-width="3"/>
+                  <text x="200" y="285" text-anchor="middle" font-size="18" font-weight="bold" fill="#dc2626">Feedback System</text>
+                  <text x="200" y="310" text-anchor="middle" font-size="14" fill="#b91c1c">Performance Analysis</text>
+                  <text x="200" y="330" text-anchor="middle" font-size="14" fill="#b91c1c">Improvement Tips</text>
+                  <!-- Arrow from Feedback to Report -->
+                  <path d="M200 350 L200 450" stroke="#6b7280" stroke-width="3" marker-end="url(#arrowhead)" fill="none"/>
+                  <text x="230" y="400" text-anchor="middle" font-size="12" font-weight="bold" fill="#6b7280">RESULTS</text>
+                  <!-- Output Box -->
+                  <rect x="100" y="450" width="200" height="80" rx="20" fill="#f0fdf4" stroke="#22c55e" stroke-width="3"/>
+                  <text x="200" y="480" text-anchor="middle" font-size="18" font-weight="bold" fill="#16a34a">Student Report</text>
+                  <text x="200" y="505" text-anchor="middle" font-size="14" fill="#15803d">Reading accuracy</text>
+                  <text x="200" y="520" text-anchor="middle" font-size="14" fill="#15803d">& improvement areas</text>
+                  <!-- Process Flow Indicators -->
+                  <circle cx="760" cy="400" r="8" fill="#3b82f6"/>
+                  <text x="780" y="370" font-size="12" font-weight="bold" fill="#3b82f6">ACTIVE LEARNING</text>
+                  <text x="780" y="385" font-size="10" fill="#3b82f6">Student practices reading</text>
+                  <text x="780" y="415" font-size="12" font-weight="bold" fill="#3b82f6">AI ASSESSMENT</text>
+                  <text x="780" y="430" font-size="10" fill="#3b82f6">Real-time analysis & feedback</text>
+                  <!-- Arrowhead Definition -->
+                  <defs>
+                    <marker id="arrowhead" markerWidth="12" markerHeight="7" refX="10" refY="3.5" orient="auto">
+                      <polygon points="0 0, 12 3.5, 0 7" fill="#6b7280"/>
+                    </marker>
+                  </defs>
+                </svg>
+            </div>
+            """)
+            gr.Markdown("""
+            ---
+            ## 🔧 Key Components
+            - **User Input (UI Agent)**: Collects student details (name, grade, topic) via an intuitive interface.
+            - **Story Generator (LLM Agent)**: Utilizes advanced language models to craft personalized, engaging stories.
+            - **Audio Synthesis (TTS Agent)**: Converts text stories into natural-sounding speech for accurate pronunciation guidance.
+            - **Student Recording (Recording Agent)**: Captures student readings for analysis.
+            - **Speech Recognition (STT Agent)**: Transcribes recorded readings into text for comparison.
+            - **Text Comparison (Analysis Agent)**: Analyzes transcription accuracy, comparing student readings to the original text.
+            - **Feedback Generation (Feedback Agent)**: Creates detailed feedback reports, highlighting strengths and areas for improvement.
+            """)
+    gr.Markdown("""
+        <div style="text-align: center; margin-top: 30px; padding: 20px; background: white; border-radius: 12px; font-size: 0.96em; color: #6b7280;">
+            Built for reading practice with modern AI tools.
+        </div>
+    """)
+    def generate_story_and_setup_ui(name, grade, topic):
+        story_text, audio_btn_update, audio_player_update, passage_state = "", gr.update(interactive=False, visible=False), gr.update(value=None, visible=False), ""
+        res = generate_story_from_llm(name, grade, topic)
+        if res:
+            story_text, audio_btn_update, audio_player_update = res
+            if story_text and not any(err in story_text.lower() for err in ["error", "blocked", "couldn't", "api key not configured"]):
+                passage_state = story_text
+        return story_text, audio_btn_update, audio_player_update, passage_state
+    def assess_reading_with_analysis(original_passage_state, student_audio_path, progress=gr.Progress(track_tqdm=True)):
+        if not student_audio_path:
+            return (
+                """
+                <div class="status-indicator">
+                    <p style="margin: 0; font-weight: 500;">🎤 Please record your reading first!</p>
+                </div>
+                """,
+                "🎤 Please record your reading first!",
+                ""
+            )
+        if not original_passage_state:
+            return (
+                """
+                <div class="status-indicator">
+                    <p style="margin: 0; font-weight: 500;">📚 Please generate a story first in the Story Creator tab.</p>
+                </div>
+                """,
+                "Please generate a story first in the Story Creator tab.",
+                ""
+            )
+        # Immediate feedback that analysis is starting
+        progress(0.05, desc="Analysis starting...")
+        # Start transcription
+        progress(0.1, desc="Starting transcription...")
+        transcribed_text = speech_to_text_whisper_space(student_audio_path, progress=progress)
+        stt_errors = ["couldn't understand", "had trouble", "service isn't working", "service is busy", "didn't get any recording", "filepath type issue"]
+        if any(err in (transcribed_text or "").lower() for err in stt_errors):
+            return (
+                """
+                <div class="status-indicator status-error">
+                    <p style="margin: 0; font-weight: 500;">❌ Transcription Error</p>
+                    <p style="margin: 5px 0 0 0; font-size: 13px;">Please try recording again</p>
+                </div>
+                """,
+                transcribed_text,
+                ""
+            )
+        progress(0.6, desc="Analyzing your reading accuracy...")
+        feedback, highlighted_passage = compare_texts_for_feedback(original_passage_state, transcribed_text)
+        progress(1.0, desc="Assessment complete!")
+        analysis_msg = """
+        <div class="status-indicator status-success">
+            <p style="margin: 0; font-weight: 500;">✅ Analysis Complete!</p>
+            <p style="margin: 5px 0 0 0; font-size: 13px;">Head over to the "Analysis & Feedback" tab to see your results! 🎯</p>
         </div>
         """
+        return (analysis_msg, feedback, highlighted_passage)
+    def update_recording_status(audio_file):
+        if audio_file is not None:
+            return (
+                gr.update(value="""
+                <div class="status-indicator status-success">
+                    <p style="margin: 0; font-weight: 500;">🎉 Recording Complete!</p>
+                    <p style="margin: 5px 0 0 0; font-size: 12px;">Ready for analysis</p>
+                </div>
+                """),
+                gr.update(visible=True),
+                gr.update(visible=True),
+                gr.update(interactive=True)
+            )
+        else:
+            return (
+                gr.update(value="""
+                <div class="status-indicator">
+                    <p style="margin: 0; font-weight: 500;">🎤 Ready to Record</p>
+                    <p style="margin: 5px 0 0 0; font-size: 12px;">Click microphone to start</p>
+                </div>
+                """),
+                gr.update(visible=False),
+                gr.update(visible=False),
+                gr.update(interactive=False)
+            )
+    def clear_recording():
+        return (
+            None,
+            gr.update(value="""
+            <div class="status-indicator">
+                <p style="margin: 0; font-weight: 500;">🎤 Ready to Record</p>
+                <p style="margin: 5px 0 0 0; font-size: 12px;">Click microphone to start</p>
+            </div>
+            """),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(interactive=False),
+            """
+            <div style="text-align: center; color: #6b7280;">
+                <h4>Analysis Results</h4>
+                <p>Your feedback will appear here.</p>
+                <div class="status-indicator">
+                    <p style="margin: 0; font-size: 14px;">💡 Record yourself reading to get started!</p>
+                </div>
+            </div>
+            """,
+            """
+            <div style="text-align: center; color: #6b7280;">
+                <h4>Word-by-Word Analysis</h4>
+                <p>Get color-coded feedback below.</p>
+                <div class="status-indicator">
+                    <p style="margin: 0; font-size: 14px;">🎤 Complete a reading practice session to see your analysis!</p>
+                </div>
+            </div>
+            """
+        )
+    def record_again_action():
+        return (
+            None,
+            gr.update(value="""
+            <div class="status-indicator status-warning">
+                <p style="margin: 0; font-weight: 500;">🔄 Ready for Take 2!</p>
+                <p style="margin: 5px 0 0 0; font-size: 12px;">Click microphone to record again</p>
+            </div>
+            """),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(interactive=False)
+        )
+    def handle_audio_generation(story_text, progress=gr.Progress(track_tqdm=True)):
+        """Handle audio generation with visual progress indicator"""
+        if not story_text or not story_text.strip():
+            return (
+                gr.update(value=None, visible=True),
+                gr.update(value="", visible=False)
             )
+        # Generate the audio file
+        audio_filepath = text_to_speech_using_space(story_text, progress)
+        print(f"AUDIO HANDLER: Received audio file path: {audio_filepath}")
+        if audio_filepath:
+            print(f"AUDIO HANDLER: Updating audio component with file: {audio_filepath}")
+            success_msg = """
+            <div style="background: #f0fdf4; border: 1px solid #22c55e; border-radius: 12px; padding: 12px 20px; margin: 8px 0; text-align: center;">
+                <span style="color: #15803d; font-weight: 500;">✅ Audio ready! You can now listen to your story.</span>
+            </div>
+            """
+            return (
+                gr.update(value=audio_filepath, visible=True),
+                gr.update(value=success_msg, visible=True)
+            )
+        else:
+            print("AUDIO HANDLER: No audio file received, returning None")
+            error_msg = """
+            <div style="background: #fef2f2; border: 1px solid #ef4444; border-radius: 12px; padding: 12px 20px; margin: 8px 0; text-align: center;">
+                <span style="color: #dc2626; font-weight: 500;">❌ Audio generation failed. Please try again.</span>
+            </div>
+            """
+            return (
+                gr.update(value=None, visible=True),
+                gr.update(value=error_msg, visible=True)
+            )
+    def generate_story_and_audio_automatically(name, grade, topic, progress=gr.Progress(track_tqdm=True)):
+        """Generate story and automatically create audio in one seamless flow"""
+        progress(0.0, desc="Starting story creation...")
+        # First generate the story using the existing function
+        story_result = generate_story_from_llm(name, grade, topic, progress)
+        if not story_result:
+            return "", gr.update(value=None, visible=True), ""
+        # Extract story text from the result tuple
+        story_text = story_result[0] if isinstance(story_result, tuple) else story_result
+        # Check if story generation was successful
+        if not story_text or any(err in story_text.lower() for err in ["error", "blocked", "couldn't", "api key not configured"]):
+            return story_text, gr.update(value=None, visible=True), ""
+        # Story generated successfully, now automatically generate audio
+        progress(0.5, desc="Story complete! Now generating audio...")
+        try:
+            # Generate audio automatically
+            audio_filepath = text_to_speech_using_space(story_text, progress)
+            if audio_filepath:
+                print(f"AUTO AUDIO: Successfully generated audio: {audio_filepath}")
+                return story_text, gr.update(value=audio_filepath, visible=True), story_text
+            else:
+                print("AUTO AUDIO: Audio generation failed, but story is still available")
+                return story_text, gr.update(value=None, visible=True), story_text
+        except Exception as e:
+            print(f"AUTO AUDIO ERROR: {e}")
+            return story_text, gr.update(value=None, visible=True), story_text
+    # Event handlers with automatic audio generation
+    gen_btn.click(
+        fn=generate_story_and_audio_automatically,
+        inputs=[s_name, s_grade, s_topic],
+        outputs=[passage_out, audio_out, original_passage_state],
+        show_progress=True
     )
+    assess_btn.click(
+        fn=assess_reading_with_analysis,
+        inputs=[original_passage_state, stud_audio_in],
+        outputs=[analysis_status, feedback_out, highlighted_out],
+        show_progress=True
     )
+    stud_audio_in.change(
+        fn=update_recording_status,
+        inputs=[stud_audio_in],
+        outputs=[recording_status, record_again_btn, clear_recording_btn, assess_btn]
+    )
+    record_again_btn.click(
+        fn=record_again_action,
+        outputs=[stud_audio_in, recording_status, record_again_btn, clear_recording_btn, assess_btn]
+    )
+    clear_recording_btn.click(
+        fn=clear_recording,
+        outputs=[stud_audio_in, recording_status, record_again_btn, clear_recording_btn, assess_btn, feedback_out, highlighted_out]
     )
+# Launch the application
 if __name__ == "__main__":
+    app.launch(debug=True, share=False)