Baghdad99 commited on
Commit
88de73c
·
1 Parent(s): 6ade673

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -10
app.py CHANGED
@@ -17,17 +17,15 @@ def translate_speech(audio_input):
17
  # Load the audio file as a floating point time series
18
  audio_data, sample_rate = librosa.load(audio_input, sr=None)
19
 
20
- # Prepare the input dictionary
21
- input_dict = pipe.tokenizer(audio_data, return_tensors="pt", padding=True)
22
 
23
- # Use the speech recognition model to get the logits
24
- logits = pipe.model(input_dict.input_values.to("cuda")).logits
25
-
26
- # Get the predicted IDs
27
- pred_ids = torch.argmax(logits, dim=-1)[0]
28
-
29
- # Decode the predicted IDs to get the transcription
30
- transcription = pipe.tokenizer.decode(pred_ids)
31
 
32
  # Use the translation pipeline to translate the transcription
33
  translated_text = translator(transcription, return_tensors="pt")
@@ -58,6 +56,7 @@ def translate_speech(audio_input):
58
 
59
  return 16000, synthesised_speech
60
 
 
61
  # Define the Gradio interface
62
  iface = gr.Interface(
63
  fn=translate_speech,
 
17
  # Load the audio file as a floating point time series
18
  audio_data, sample_rate = librosa.load(audio_input, sr=None)
19
 
20
+ # Use the speech recognition pipeline to transcribe the audio
21
+ output = pipe(audio_data)
22
 
23
+ # Check if the output contains 'text'
24
+ if 'text' in output:
25
+ transcription = output["text"]
26
+ else:
27
+ print("The output does not contain 'text'")
28
+ return
 
 
29
 
30
  # Use the translation pipeline to translate the transcription
31
  translated_text = translator(transcription, return_tensors="pt")
 
56
 
57
  return 16000, synthesised_speech
58
 
59
+
60
  # Define the Gradio interface
61
  iface = gr.Interface(
62
  fn=translate_speech,