Spaces:
Sleeping
Sleeping
import streamlit as st | |
import numpy as np | |
import torch | |
from transformers import pipeline | |
import librosa | |
# Load the pipelines | |
asr_pipe = pipeline("automatic-speech-recognition", model="alvanlii/whisper-small-cantonese") | |
translation_pipe = pipeline("translation", model="raptorkwok/cantonese-chinese-translation") | |
tts_pipe = pipeline("text-to-speech", model="myshell-ai/MeloTTS-Chinese") | |
# Streamlit UI | |
st.title("Cantonese to Chinese Translator") | |
st.write("Upload your Cantonese audio file (WAV format) below.") | |
# File upload | |
uploaded_file = st.file_uploader("Choose a WAV file", type="wav") | |
if uploaded_file is not None: | |
# Load the audio file | |
audio, sr = librosa.load(uploaded_file, sr=16000) | |
# Recognize Cantonese speech | |
audio_input = torch.tensor(audio) | |
result = asr_pipe(audio_input) | |
cantonese_text = result['text'] | |
st.write(f"Cantonese Text: {cantonese_text}") | |
# Translate Cantonese to Chinese | |
chinese_text = translation_pipe(cantonese_text)[0]['translation_text'] | |
st.write(f"Chinese Text: {chinese_text}") | |
# Convert Chinese text to speech | |
tts_output = tts_pipe(chinese_text) | |
# Play back the Chinese output | |
st.audio(tts_output['audio'], format='audio/wav') |