translate / audio.py
Hugo Rodrigues
initial commit
357cae7
# %%
import time
from IPython.display import Audio
import numpy as np
from scipy.io.wavfile import write
from IPython.display import Audio
import torch
# from transformers import pipeline
from transformers import SeamlessM4Tv2Model
from transformers import AutoProcessor
model_name = "facebook/seamless-m4t-v2-large"
# model_name = "facebook/hf-seamless-m4t-medium"
processor = AutoProcessor.from_pretrained(model_name)
model = SeamlessM4Tv2Model.from_pretrained(model_name)
device = "cuda:0" if torch.cuda.is_available() else "cpu"
# torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
model.to(device)
start_time = time.time()
src_lang = "eng"
tgt_lang = "por"
text_to_translate = "My life is a beautifull thing"
text_inputs = processor(text=text_to_translate,
src_lang=src_lang, return_tensors="pt").to(device)
# output_tokens = model.generate(
# **text_inputs, tgt_lang=tgt_lang, generate_speech=False)
# translated_text_from_text = processor.decode(
# output_tokens[0].tolist()[0], skip_special_tokens=True)
# %%
print(text_inputs)
# %%
audio_array_from_text = model.generate(
**text_inputs, tgt_lang=tgt_lang)[0].cpu().numpy().squeeze()
# %%
print(audio_array_from_text)
# %%
a = Audio(audio_array_from_text, rate=model.config.sampling_rate)
print(a)
# %%