Spaces:

kaiku03
/

gemma3n-2b-it-challenge-demo

Sleeping

App Files Files Community

kaiku03 commited on Aug 2

Commit

ea0cc1f

1 Parent(s): e69488e

update(4)

Browse files

Files changed (1) hide show

app.py +27 -3

app.py CHANGED Viewed

@@ -15,12 +15,34 @@ agent = NewsReporterAgent()
 # --- 2. Define Gradio Logic Handlers ---
 # These functions orchestrate the agent's actions based on UI events.
-def run_initial_generation(audio_path, image_path):
     """Handles the first step: processing inputs and generating the initial report."""
-    if not audio_path and not image_path:
         return "Please provide an audio or image file.", None, gr.update(visible=False), None, None, None
-    state = AgentState(audio_path=audio_path,
                        image_path=image_path,
                        news_report=[])
@@ -34,6 +56,8 @@ def run_initial_generation(audio_path, image_path):
     return latest_report, state, gr.update(visible=True), "", transcribed_text, image_description
 def run_revision(feedback, current_state):
     """Handles the revision step based on user feedback."""
     if not feedback or not feedback.strip():

 # --- 2. Define Gradio Logic Handlers ---
 # These functions orchestrate the agent's actions based on UI events.
+def run_initial_generation(audio_data, image_path):
     """Handles the first step: processing inputs and generating the initial report."""
+    temp_audio_path = None
+    # --- Start of New Logic ---
+    # Robustly handle different types of audio input from the gr.Audio component
+    if isinstance(audio_data, str) and os.path.exists(audio_data):
+        # Case 1: Input is a filepath string (from Examples or a direct path).
+        temp_audio_path = audio_data
+    elif isinstance(audio_data, tuple):
+        # Case 2: Input is a (sample_rate, numpy_array) tuple from microphone or upload.
+        sample_rate, waveform = audio_data
+        # Ensure the recording is not empty before saving
+        if waveform is not None and waveform.size > 0:
+            os.makedirs("temp_audio", exist_ok=True)
+            temp_audio_path = "temp_audio/recorded_audio.wav"
+            # Save the numpy array as a WAV file
+            wavfile.write(temp_audio_path, rate=sample_rate, data=waveform)
+    # --- End of New Logic ---
+    if not temp_audio_path and not image_path:
         return "Please provide an audio or image file.", None, gr.update(visible=False), None, None, None
+    # The agent now receives a reliable file path every time
+    state = AgentState(audio_path=temp_audio_path,
                        image_path=image_path,
                        news_report=[])
     return latest_report, state, gr.update(visible=True), "", transcribed_text, image_description
 def run_revision(feedback, current_state):
     """Handles the revision step based on user feedback."""
     if not feedback or not feedback.strip():