Update app.py
Browse files
app.py
CHANGED
@@ -14,6 +14,10 @@ tts_tokenizer = AutoTokenizer.from_pretrained("Baghdad99/english_voice_tts")
|
|
14 |
tts_model = AutoModelForTextToWaveform.from_pretrained("Baghdad99/english_voice_tts")
|
15 |
|
16 |
def translate_speech(speech):
|
|
|
|
|
|
|
|
|
17 |
# Transcribe the speech to text
|
18 |
inputs = asr_processor(speech, return_tensors="pt", padding=True)
|
19 |
logits = asr_model(inputs.input_values).logits
|
|
|
14 |
tts_model = AutoModelForTextToWaveform.from_pretrained("Baghdad99/english_voice_tts")
|
15 |
|
16 |
def translate_speech(speech):
|
17 |
+
# Convert stereo to mono if necessary
|
18 |
+
if len(speech.shape) > 1:
|
19 |
+
speech = speech.mean(axis=0)
|
20 |
+
|
21 |
# Transcribe the speech to text
|
22 |
inputs = asr_processor(speech, return_tensors="pt", padding=True)
|
23 |
logits = asr_model(inputs.input_values).logits
|