Spaces:

oza75
/

bambara-mt

Running on Zero

App Files Files Community

Aboubacar OUATTARA - kaira commited on Apr 22

Commit

0249b26

•

1 Parent(s): ace3461

add audios files

Browse files

Files changed (1) hide show

app.py +15 -40

app.py CHANGED Viewed

@@ -30,7 +30,7 @@ tts = BambaraTTS(tts_model)
 @spaces.GPU
 def translate_to_bambara(text, src_lang):
     translation = translator(text, src_lang=src_lang, tgt_lang="bam_Latn")
-    return translation[0]['translation_text']
 # Function to convert text to speech
@@ -133,46 +133,21 @@ def _fn(
     bambara_text = translate_to_bambara(text, source_lang)
     # Step 2: Convert the translated text to speech with reference audio
-    if reference_audio is not None:
-        audio_array, sampling_rate = text_to_speech(bambara_text, reference_audio)
-    else:
-        audio_array, sampling_rate = text_to_speech(bambara_text)
-    # Step 3: Enhance the audio
-    denoised_audio, enhanced_audio = enhance_speech(
-        audio_array,
-        sampling_rate,
-        solver,
-        nfe,
-        prior_temp,
-        denoise_before_enhancement
-    )
-    print("Audio Array Shape:", audio_array.shape)
-    print("Sample Rate:", sampling_rate)
-    print("Audio Array Dtype:", audio_array.dtype)
-    print("Max Value in Audio Array:", torch.max(audio_array))
-    print("Min Value in Audio Array:", torch.min(audio_array))
-    print("Sampling rate type: ", type(sampling_rate))
-    print("Denoised sampling rate type: ", type(denoised_audio[0]))
-    print("Enhanced sampling rate type: ", type(enhanced_audio[0]))
-    import resource
-    # Get the soft and hard limits for the number of open file descriptors
-    soft_limit, hard_limit = resource.getrlimit(resource.RLIMIT_NOFILE)
-    print('Soft limit for RLIMIT_NOFILE:', soft_limit)
-    print('Hard limit for RLIMIT_NOFILE:', hard_limit)
-    print('Is CUDA available:', torch.cuda.is_available())
-    print('CUDA version:', torch.version.cuda)
-    print('CuDNN version:', torch.backends.cudnn.version())
-    def check_tensor(tensor):
-        print('Contains NaN:', torch.isnan(tensor).any())
-        print('Contains Inf:', torch.isinf(tensor).any())
-    # Use this function to check your audio tensor
-    check_tensor(audio_array)
     # Return all outputs
     return (
         bambara_text,

 @spaces.GPU
 def translate_to_bambara(text, src_lang):
     translation = translator(text, src_lang=src_lang, tgt_lang="bam_Latn")
+    return str(translation[0]['translation_text'])
 # Function to convert text to speech
     bambara_text = translate_to_bambara(text, source_lang)
     # Step 2: Convert the translated text to speech with reference audio
+    # if reference_audio is not None:
+    #     audio_array, sampling_rate = text_to_speech(bambara_text, reference_audio)
+    # else:
+    #     audio_array, sampling_rate = text_to_speech(bambara_text)
+    #
+    # # Step 3: Enhance the audio
+    # denoised_audio, enhanced_audio = enhance_speech(
+    #     audio_array,
+    #     sampling_rate,
+    #     solver,
+    #     nfe,
+    #     prior_temp,
+    #     denoise_before_enhancement
+    # )
     # Return all outputs
     return (
         bambara_text,