Baghdad99 commited on
Commit
ea3653e
1 Parent(s): 776c3e1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -9
app.py CHANGED
@@ -2,6 +2,7 @@ import gradio as gr
2
  from transformers import pipeline, AutoTokenizer
3
  import numpy as np
4
  from pydub import AudioSegment
 
5
 
6
  # Load the pipeline for speech recognition and translation
7
  pipe = pipeline(
@@ -12,18 +13,19 @@ pipe = pipeline(
12
  translator = pipeline("text2text-generation", model="Baghdad99/saad-hausa-text-to-english-text")
13
  tts = pipeline("text-to-speech", model="Baghdad99/english_voice_tts")
14
 
15
- def translate_speech(audio_data_tuple):
16
  print(f"Type of audio: {type(audio_data_tuple)}, Value of audio: {audio_data_tuple}") # Debug line
17
-
18
- # Extract the audio data from the tuple
19
- sample_rate, audio_data = audio_data_tuple
20
-
21
- # Print the shape and type of the audio data
22
- print(f"Audio data type: {type(audio_data)}, Audio data shape: {audio_data.shape}")
 
23
 
24
  # Normalize the audio data to the range [-1, 1]
25
  audio_data_normalized = audio_data / np.iinfo(audio_data.dtype).max
26
-
27
  # Convert the normalized audio data to float64
28
  audio_data_float64 = audio_data_normalized.astype(np.float64)
29
 
@@ -82,7 +84,7 @@ def translate_speech(audio_data_tuple):
82
  # Define the Gradio interface
83
  iface = gr.Interface(
84
  fn=translate_speech,
85
- inputs=gr.inputs.Audio(source="microphone"), # Change this line
86
  outputs=gr.outputs.Audio(type="numpy"),
87
  title="Hausa to English Translation",
88
  description="Realtime demo for Hausa to English translation using speech recognition and text-to-speech synthesis."
 
2
  from transformers import pipeline, AutoTokenizer
3
  import numpy as np
4
  from pydub import AudioSegment
5
+ import librosa
6
 
7
  # Load the pipeline for speech recognition and translation
8
  pipe = pipeline(
 
13
  translator = pipeline("text2text-generation", model="Baghdad99/saad-hausa-text-to-english-text")
14
  tts = pipeline("text-to-speech", model="Baghdad99/english_voice_tts")
15
 
16
+ def translate_speech(audio_input):
17
  print(f"Type of audio: {type(audio_data_tuple)}, Value of audio: {audio_data_tuple}") # Debug line
18
+ # Check if the input is a tuple (recorded audio) or a string (uploaded file)
19
+ if isinstance(audio_input, tuple):
20
+ # Extract the audio data from the tuple
21
+ sample_rate, audio_data = audio_input
22
+ else:
23
+ # Load the audio file as a floating point time series
24
+ audio_data, sample_rate = librosa.load(audio_input, sr=None)
25
 
26
  # Normalize the audio data to the range [-1, 1]
27
  audio_data_normalized = audio_data / np.iinfo(audio_data.dtype).max
28
+
29
  # Convert the normalized audio data to float64
30
  audio_data_float64 = audio_data_normalized.astype(np.float64)
31
 
 
84
  # Define the Gradio interface
85
  iface = gr.Interface(
86
  fn=translate_speech,
87
+ inputs=gr.inputs.Audio(source="microphone", type="file"), # Change this line
88
  outputs=gr.outputs.Audio(type="numpy"),
89
  title="Hausa to English Translation",
90
  description="Realtime demo for Hausa to English translation using speech recognition and text-to-speech synthesis."