huimanho's picture
Update app.py
cd7ce5f verified
raw
history blame
1.22 kB
import streamlit as st
import numpy as np
import torch
from transformers import pipeline
import librosa
# Load the pipelines
asr_pipe = pipeline("automatic-speech-recognition", model="alvanlii/whisper-small-cantonese")
translation_pipe = pipeline("translation", model="raptorkwok/cantonese-chinese-translation")
tts_pipe = pipeline("text-to-speech", model="myshell-ai/MeloTTS-Chinese")
# Streamlit UI
st.title("Cantonese to Chinese Translator")
st.write("Upload your Cantonese audio file (WAV format) below.")
# File upload
uploaded_file = st.file_uploader("Choose a WAV file", type="wav")
if uploaded_file is not None:
# Load the audio file
audio, sr = librosa.load(uploaded_file, sr=16000)
# Recognize Cantonese speech
audio_input = torch.tensor(audio)
result = asr_pipe(audio_input)
cantonese_text = result['text']
st.write(f"Cantonese Text: {cantonese_text}")
# Translate Cantonese to Chinese
chinese_text = translation_pipe(cantonese_text)[0]['translation_text']
st.write(f"Chinese Text: {chinese_text}")
# Convert Chinese text to speech
tts_output = tts_pipe(chinese_text)
# Play back the Chinese output
st.audio(tts_output['audio'], format='audio/wav')