kaiku03 commited on
Commit
ea0cc1f
·
1 Parent(s): e69488e
Files changed (1) hide show
  1. app.py +27 -3
app.py CHANGED
@@ -15,12 +15,34 @@ agent = NewsReporterAgent()
15
  # --- 2. Define Gradio Logic Handlers ---
16
  # These functions orchestrate the agent's actions based on UI events.
17
 
18
- def run_initial_generation(audio_path, image_path):
19
  """Handles the first step: processing inputs and generating the initial report."""
20
- if not audio_path and not image_path:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  return "Please provide an audio or image file.", None, gr.update(visible=False), None, None, None
22
 
23
- state = AgentState(audio_path=audio_path,
 
24
  image_path=image_path,
25
  news_report=[])
26
 
@@ -34,6 +56,8 @@ def run_initial_generation(audio_path, image_path):
34
 
35
  return latest_report, state, gr.update(visible=True), "", transcribed_text, image_description
36
 
 
 
37
  def run_revision(feedback, current_state):
38
  """Handles the revision step based on user feedback."""
39
  if not feedback or not feedback.strip():
 
15
  # --- 2. Define Gradio Logic Handlers ---
16
  # These functions orchestrate the agent's actions based on UI events.
17
 
18
+ def run_initial_generation(audio_data, image_path):
19
  """Handles the first step: processing inputs and generating the initial report."""
20
+
21
+ temp_audio_path = None
22
+
23
+ # --- Start of New Logic ---
24
+ # Robustly handle different types of audio input from the gr.Audio component
25
+ if isinstance(audio_data, str) and os.path.exists(audio_data):
26
+ # Case 1: Input is a filepath string (from Examples or a direct path).
27
+ temp_audio_path = audio_data
28
+
29
+ elif isinstance(audio_data, tuple):
30
+ # Case 2: Input is a (sample_rate, numpy_array) tuple from microphone or upload.
31
+ sample_rate, waveform = audio_data
32
+
33
+ # Ensure the recording is not empty before saving
34
+ if waveform is not None and waveform.size > 0:
35
+ os.makedirs("temp_audio", exist_ok=True)
36
+ temp_audio_path = "temp_audio/recorded_audio.wav"
37
+ # Save the numpy array as a WAV file
38
+ wavfile.write(temp_audio_path, rate=sample_rate, data=waveform)
39
+ # --- End of New Logic ---
40
+
41
+ if not temp_audio_path and not image_path:
42
  return "Please provide an audio or image file.", None, gr.update(visible=False), None, None, None
43
 
44
+ # The agent now receives a reliable file path every time
45
+ state = AgentState(audio_path=temp_audio_path,
46
  image_path=image_path,
47
  news_report=[])
48
 
 
56
 
57
  return latest_report, state, gr.update(visible=True), "", transcribed_text, image_description
58
 
59
+
60
+
61
  def run_revision(feedback, current_state):
62
  """Handles the revision step based on user feedback."""
63
  if not feedback or not feedback.strip():