|
import gradio as gr |
|
import torchaudio |
|
from transformers import pipeline |
|
|
|
|
|
asr_model = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-large-xlsr-53-arabic") |
|
translation_model = pipeline("translation", model="Helsinki-NLP/opus-mt-ar-en") |
|
|
|
def process_audio(audio_path): |
|
|
|
result = asr_model(audio_path) |
|
arabic_text = result["text"] |
|
|
|
|
|
translated_text = translation_model(arabic_text)[0]['translation_text'] |
|
|
|
|
|
|
|
|
|
|
|
return translated_text |
|
|
|
|
|
iface = gr.Interface( |
|
fn=process_audio, |
|
inputs=gr.Audio(type="filepath", label="Upload Audio"), |
|
outputs="text", |
|
title="Arabic to English Speech Translation with Voice Cloning", |
|
description="Upload an Arabic audio file to translate it to English while maintaining the singer's voice." |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
iface.launch() |