Spaces:

hbui
/

Text_to_Speech

Running

hbui commited on Mar 13

Commit

4cccb66

•

1 Parent(s): 7b9c7c2

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,31 +1,25 @@
-from transformers.utils import logging
-logging.set_verbosity_error()
 from transformers import pipeline
 import gradio as gr
-import os
-import soundfile as sf
-import numpy as np
-import tempfile
-def launch(input_text):
-    try:
-        # Assuming `narrator` function returns a numpy array with audio data and a sampling rate.
-        narrator = pipeline("text-to-speech", model="kakao-enterprise/vits-ljs")
-        out = narrator(input_text)
-        audio_data, samplerate = np.array(out["audio"][0]), 22050  # Example: 22050 Hz as common sampling rate
-        # Directly return the audio data and sampling rate.
-        return audio_data, samplerate
-    except Exception as e:
-        print(f"An error occurred: {e}")
-        return None, None
-# Create the Gradio interface with the correct audio output handling.
-iface = gr.Interface(fn=launch, inputs="text", outputs=gr.Audio(type="numpy", label="Your Audio"))
-# Launch the Gradio app
-iface.launch()

 from transformers import pipeline
 import gradio as gr
+# Initialize the text-to-speech pipeline with a model from Hugging Face's Model Hub
+model_name = "kakao-enterprise/vits-ljs"
+text_to_speech_pipeline = pipeline("text-to-speech", model=model_name)
+def generate_speech(text):
+    # Generate speech from the input text
+    out = text_to_speech_pipeline(text)
+    # The output is a list of tensors, convert to numpy array
+    audio_data = out[0]["array"]
+    return audio_data, 22050  # Return audio data and sampling rate
+# Create the Gradio interface
+interface = gr.Interface(fn=generate_speech,
+                         inputs=gr.Textbox(lines=2, placeholder="Type something here..."),
+                         outputs=gr.Audio(type="numpy", label="Generated Speech"),
+                         title="Text-to-Speech with Hugging Face",
+                         description="Enter text to generate speech using a model from Hugging Face's Model Hub.")
+# Launch the app
+interface.launch()