Tonic commited on
Commit
4dc9e5f
1 Parent(s): 2202fe0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -1
app.py CHANGED
@@ -11,6 +11,7 @@ import json
11
  import dotenv
12
  from scipy.io.wavfile import write
13
  import PIL
 
14
  from openai import OpenAI
15
  import time
16
  from PIL import Image
@@ -79,6 +80,22 @@ def evaluate_hallucination(input1, input2):
79
 
80
  return label
81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  def process_speech(input_language, audio_input):
83
  """
84
  processing sound using seamless_m4t
@@ -333,7 +350,8 @@ def process_and_query(input_language=None, audio_input=None, image_input=None, t
333
 
334
  # Process audio input
335
  elif audio_input is not None:
336
- audio_text = process_speech(input_language, audio_input)
 
337
  combined_text += "\n\n**Audio Input:**\n" + audio_text
338
 
339
  # Process text input
 
11
  import dotenv
12
  from scipy.io.wavfile import write
13
  import PIL
14
+ import soundfile as sf
15
  from openai import OpenAI
16
  import time
17
  from PIL import Image
 
80
 
81
  return label
82
 
83
+ def save_audio(audio_input, output_dir="saved_audio"):
84
+ if not os.path.exists(output_dir):
85
+ os.makedirs(output_dir)
86
+
87
+ # Extract sample rate and audio data
88
+ sample_rate, audio_data = audio_input
89
+
90
+ # Generate a unique file name
91
+ file_name = f"audio_{int(time.time())}.wav"
92
+ file_path = os.path.join(output_dir, file_name)
93
+
94
+ # Save the audio file
95
+ sf.write(file_path, audio_data, sample_rate)
96
+
97
+ return file_path
98
+
99
  def process_speech(input_language, audio_input):
100
  """
101
  processing sound using seamless_m4t
 
350
 
351
  # Process audio input
352
  elif audio_input is not None:
353
+ audio_file_path = save_audio(audio_input)
354
+ audio_text = process_speech(input_language, audio_file_path)
355
  combined_text += "\n\n**Audio Input:**\n" + audio_text
356
 
357
  # Process text input