Spaces:
Runtime error
Runtime error
File size: 2,998 Bytes
f146c37 d347764 f146c37 d347764 f146c37 d347764 f146c37 d347764 f146c37 d347764 f146c37 d347764 f146c37 d347764 f146c37 d347764 f146c37 d347764 f146c37 d347764 f146c37 d347764 f146c37 f805e49 f146c37 f805e49 c737803 d347764 f146c37 d347764 f805e49 d347764 c737803 f146c37 c737803 f146c37 c737803 d347764 f146c37 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
# -*- coding: utf-8 -*-
"""ML_task3.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1DfK6fjkAd9RjVx3MUGfDtAOulvEenk0E
"""
!pip install gradio
!pip install datasets
!pip install transformers
import gradio as gr
import numpy as np
import torch
from datasets import load_dataset
from transformers import SpeechT5ForTextToSpeech, SpeechT5HifiGan, SpeechT5Processor, pipeline, WhisperProcessor
device = "cuda:0" if torch.cuda.is_available() else "cpu"
# распознавание речи
asr_pipe = pipeline("automatic-speech-recognition", model="voidful/wav2vec2-xlsr-multilingual-56", device=device)
!pip -q install sentencepiece
processor = WhisperProcessor.from_pretrained(
"openai/whisper-small")
translator_en = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en")
translator_ru = pipeline("translation", model="Helsinki-NLP/opus-mt-en-ru")
from transformers import VitsModel, VitsTokenizer
model = VitsModel.from_pretrained("facebook/mms-tts-rus")
tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-rus")
def translator_mul_ru(text):
translation = translator_ru(translator_en(text)[0]['translation_text'])
return translation[0]['translation_text']
def translate(audio):
outputs = asr_pipe(audio, max_new_tokens=256, generate_kwargs={"task": "translate"})
return outputs["text"]
def synthesise(text):
translated_text = translator_mul_ru(text)
inputs = tokenizer(translated_text, return_tensors="pt")
input_ids = inputs["input_ids"]
with torch.no_grad():
outputs = model(input_ids)
speech = outputs["waveform"]
return speech.cpu()
def speech_to_speech_translation(audio):
translated_text = translate(audio)
print(translated_text)
synthesised_speech = synthesise(translated_text)
synthesised_speech = (synthesised_speech.numpy() * 32767).astype(np.int16)
return 16000, synthesised_speech[0]
title = "Speech-to-Speech Translation"
description = """
* Выбранная ASR модель - https://huggingface.co/voidful/wav2vec2-xlsr-multilingual-56
* Перевод текста на русский с помощью модели https://huggingface.co/Helsinki-NLP/opus-mt-mul-en
* Синтез речи на русском языке с помощью модели https://huggingface.co/facebook/mms-tts-rus
"""
demo = gr.Blocks()
mic_translate = gr.Interface(
fn=speech_to_speech_translation,
inputs=gr.Audio(sources="microphone", type="filepath"),
outputs=gr.Audio(label="Generated Speech", type="numpy"),
title=title,
description=description,
)
file_translate = gr.Interface(
fn=speech_to_speech_translation,
inputs=gr.Audio(sources="upload", type="filepath"),
outputs=gr.Audio(label="Generated Speech", type="numpy"),
title=title,
description=description,
)
with demo:
gr.TabbedInterface([mic_translate, file_translate], ["Microphone", "File"])
demo.launch()
|