Spaces:

unijoh
/

metaambod

Runtime error

unijoh commited on Jun 14, 2024

Commit

ba0da04

verified ·

1 Parent(s): 0455ecb

Update tts.py

Files changed (1) hide show

tts.py CHANGED Viewed

@@ -1,22 +1,39 @@
-import numpy as np
-import torchaudio
 import logging
 # Set up logging
 logging.basicConfig(level=logging.DEBUG)
 def synthesize_speech(text):
     try:
-        # Generate a simple sine wave for testing
-        sr = 16000
-        t = np.linspace(0, 1, sr)
-        waveform = 0.5 * np.sin(2 * np.pi * 440 * t).astype(np.float32)
-        # Save the sine wave to a file
-        file_path = "/tmp/output.wav"
-        torchaudio.save(file_path, torch.tensor(waveform).unsqueeze(0), sr)
-        logging.info(f"Test audio file saved successfully at {file_path}.")
-        return file_path
     except Exception as e:
-        logging.error(f"Error during test audio generation: {e}")
         return None

+import torch
+from transformers import SpeechT5ForTextToSpeech, SpeechT5Processor
 import logging
+import numpy as np
+import soundfile as sf
 # Set up logging
 logging.basicConfig(level=logging.DEBUG)
+MODEL_ID = "microsoft/speecht5_tts"
+# Try to load the model and processor
+try:
+    processor = SpeechT5Processor.from_pretrained(MODEL_ID)
+    model = SpeechT5ForTextToSpeech.from_pretrained(MODEL_ID)
+    logging.info("Model and processor loaded successfully.")
+except Exception as e:
+    logging.error(f"Error loading model or processor: {e}")
 def synthesize_speech(text):
     try:
+        inputs = processor(text, return_tensors="pt")
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        model.to(device)
+        inputs = inputs.to(device)
+        with torch.no_grad():
+            speech = model.generate(**inputs)
+        logging.info("Speech generated successfully.")
+        # Decode the generated speech and save to an audio file
+        waveform = speech.cpu().numpy().flatten()
+        # Convert waveform to audio format that Gradio can handle
+        sf.write("output.wav", waveform, 16000)
+        return "output.wav"
     except Exception as e:
+        logging.error(f"Error during speech synthesis: {e}")
         return None