Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -27,7 +27,7 @@ tts_model = VitsModel.from_pretrained("facebook/mms-tts-eng")
|
|
27 |
tts_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-eng")
|
28 |
|
29 |
# Function to convert audio to text using ASR
|
30 |
-
def
|
31 |
if audio_filepath is None:
|
32 |
raise gr.Error("Please provide some input audio.")
|
33 |
|
@@ -44,9 +44,9 @@ def transcribe(audio_filepath):
|
|
44 |
duration = len(data) / SAMPLE_RATE
|
45 |
manifest_data = {
|
46 |
"audio_filepath": converted_audio_filepath,
|
47 |
-
"taskname":
|
48 |
"source_lang": "en",
|
49 |
-
"target_lang": "en",
|
50 |
"pnc": "no",
|
51 |
"answer": "predict",
|
52 |
"duration": str(duration),
|
@@ -56,9 +56,9 @@ def transcribe(audio_filepath):
|
|
56 |
fout.write(json.dumps(manifest_data))
|
57 |
|
58 |
if duration < 40:
|
59 |
-
|
60 |
else:
|
61 |
-
|
62 |
frame_asr,
|
63 |
canary_model.cfg.preprocessor,
|
64 |
model_stride_in_secs,
|
@@ -66,7 +66,7 @@ def transcribe(audio_filepath):
|
|
66 |
manifest=manifest_filepath,
|
67 |
)[0].text
|
68 |
|
69 |
-
return
|
70 |
|
71 |
# Function to convert text to speech using TTS
|
72 |
def gen_speech(text):
|
@@ -81,9 +81,10 @@ def gen_speech(text):
|
|
81 |
|
82 |
# Root function for Gradio interface
|
83 |
def start_process(audio_filepath):
|
84 |
-
transcription =
|
85 |
print("Done transcribing")
|
86 |
-
translation = "
|
|
|
87 |
audio_output_filepath = gen_speech(transcription)
|
88 |
print("Done speaking")
|
89 |
return transcription, translation, audio_output_filepath
|
|
|
27 |
tts_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-eng")
|
28 |
|
29 |
# Function to convert audio to text using ASR
|
30 |
+
def gen_text(audio_filepath, action):
|
31 |
if audio_filepath is None:
|
32 |
raise gr.Error("Please provide some input audio.")
|
33 |
|
|
|
44 |
duration = len(data) / SAMPLE_RATE
|
45 |
manifest_data = {
|
46 |
"audio_filepath": converted_audio_filepath,
|
47 |
+
"taskname": action,
|
48 |
"source_lang": "en",
|
49 |
+
"target_lang": "en" if action=="asr" else "fr",
|
50 |
"pnc": "no",
|
51 |
"answer": "predict",
|
52 |
"duration": str(duration),
|
|
|
56 |
fout.write(json.dumps(manifest_data))
|
57 |
|
58 |
if duration < 40:
|
59 |
+
predicted_text = canary_model.transcribe(manifest_filepath)[0]
|
60 |
else:
|
61 |
+
predicted_text = get_buffered_pred_feat_multitaskAED(
|
62 |
frame_asr,
|
63 |
canary_model.cfg.preprocessor,
|
64 |
model_stride_in_secs,
|
|
|
66 |
manifest=manifest_filepath,
|
67 |
)[0].text
|
68 |
|
69 |
+
return predicted_text
|
70 |
|
71 |
# Function to convert text to speech using TTS
|
72 |
def gen_speech(text):
|
|
|
81 |
|
82 |
# Root function for Gradio interface
|
83 |
def start_process(audio_filepath):
|
84 |
+
transcription = gen_text(audio_filepath, "asr")
|
85 |
print("Done transcribing")
|
86 |
+
translation = gen_text(audio_filepath, "ast")
|
87 |
+
print("Done translation")
|
88 |
audio_output_filepath = gen_speech(transcription)
|
89 |
print("Done speaking")
|
90 |
return transcription, translation, audio_output_filepath
|