gitgato's picture
Update app.py
8ae49e6 verified
raw
history blame contribute delete
No virus
2.05 kB
import torch
from transformers import pipeline
from datasets import load_dataset
from transformers import AutoModel
from transformers import pipeline, SpeechT5Processor, SpeechT5HifiGan, SpeechT5ForTextToSpeech
import numpy as np
import gradio as gr
# Configurar el pipeline de reconocimiento autom谩tico de voz
pipe = pipeline("automatic-speech-recognition", model="openai/whisper-base")
# Load model directly
# Funci贸n para traducir texto
def translate(audio):
outputs = pipe(audio, max_new_tokens=256, generate_kwargs={"task": "translate"})
return outputs["text"]
# Cargar el procesador y el modelo de SpeechT5
processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
model = AutoModel.from_pretrained("gitgato/mabama")
vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
# Cargar los datos de embeddings del hablante
embeddings_dataset = load_dataset("ovieyra21/mabama-v5", split="train")
speaker_embeddings = torch.tensor(embeddings_dataset[0]["xvector"]).unsqueeze(0)
# Funci贸n para sintetizar el habla
def synthesise(text):
inputs = processor(text=text, return_tensors="pt")
speech = model.generate(inputs["input_ids"], speaker_embedding=speaker_embeddings, vocoder=vocoder)
return speech.numpy()
# Configuraci贸n para el tipo de audio de salida
target_dtype = np.int16
max_range = np.iinfo(target_dtype).max
# Funci贸n para traducci贸n de habla a habla
def speech_to_speech_translation(audio):
translated_text = translate(audio)
synthesised_speech = synthesise(translated_text)
synthesised_speech = (synthesised_speech * max_range).astype(np.int16)
return 16000, synthesised_speech
# Interfaz de Gradio
demo = gr.Interface(
fn=speech_to_speech_translation,
inputs=gr.Audio(sources=["microphone"], type="file", label="Input Audio"),
outputs=gr.Audio(label="Generated Speech", type="numpy"),
title="Speech-to-Speech Translation",
description="Translate speech input to synthesized speech output."
)
# Lanzar la interfaz
demo.launch(debug=True)