Spaces:

unijoh
/

metaambod

Runtime error

File size: 1,273 Bytes

1180f3c
047c567
58f6d57
a90990e
 
58f6d57
 
 
1180f3c
b20a6cf
1180f3c
58f6d57
 
 
 
 
 
 
1180f3c
58f6d57
 
 
 
 
 
1180f3c
58f6d57
 
a90990e
58f6d57
a90990e
 
7bbbabd
 
a90990e
 
58f6d57

import torch
from transformers import SpeechT5ForTextToSpeech, SpeechT5Processor
import logging
import numpy as np
import soundfile as sf

# Set up logging
logging.basicConfig(level=logging.DEBUG)

MODEL_ID = "microsoft/speecht5_tts"

# Try to load the model and processor
try:
    processor = SpeechT5Processor.from_pretrained(MODEL_ID)
    model = SpeechT5ForTextToSpeech.from_pretrained(MODEL_ID)
    logging.info("Model and processor loaded successfully.")
except Exception as e:
    logging.error(f"Error loading model or processor: {e}")

def synthesize_speech(text):
    try:
        inputs = processor(text, return_tensors="pt")
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model.to(device)
        inputs = inputs.to(device)

        with torch.no_grad():
            speech = model.generate(**inputs)

        logging.info("Speech generated successfully.")

        # Decode the generated speech and save to an audio file
        waveform = speech.cpu().numpy().flatten()
        # Convert waveform to audio format that Gradio can handle
        sf.write("output.wav", waveform, 16000)
        return "output.wav"
    except Exception as e:
        logging.error(f"Error during speech synthesis: {e}")
        return None