Brasd99's picture
Bug fix
c8c664f
raw
history blame
2.91 kB
from TTS.api import TTS
from bs4 import BeautifulSoup
import requests
import streamlit as st
import tempfile
import os
import json
import datetime
with open('config.json', 'r') as f:
config = json.load(f)
APP_NAME = config['APP_NAME']
APP_LOGO = config['APP_LOGO']
APP_DESCRIPTION = config['APP_DESCRIPTION']
LANGUAGES_URL = config['LANGUAGES_URL']
def contains_only_ascii(input_string):
return all(ord(char) < 128 for char in input_string)
def get_iso_languages():
response = requests.get(LANGUAGES_URL)
soup = BeautifulSoup(response.text, 'html.parser')
p_tags = soup.find_all('p')
iso_language_dict = {}
for p_tag in p_tags[1:]: # Skipping the first <p> which contains the header
parts = p_tag.get_text().split()
if len(parts) == 2:
iso_code, language_name = parts
if contains_only_ascii(language_name):
iso_language_dict[language_name] = iso_code
return iso_language_dict
def create_temp_file(input_wav):
temp_file = tempfile.NamedTemporaryFile(delete=False)
temp_file.write(input_wav.read())
return temp_file
def remove_temp_file(temp_file):
temp_file.close()
os.remove(temp_file.name)
def update_progress(percent, text):
progress_bar.progress(percent)
status_text.text(text)
iso_languages = get_iso_languages()
languages = list(iso_languages.keys())
st.set_page_config(page_title=APP_NAME)
st.title(APP_NAME)
st.image(APP_LOGO, use_column_width=True)
st.markdown(APP_DESCRIPTION)
language = st.selectbox('Select a language', languages)
prompt = st.text_input('Enter your prompt')
input_wav = st.file_uploader("Upload a WAV file", type=["wav"])
if input_wav:
if not input_wav or input_wav is None:
st.error('Please upload wav input audio')
elif not prompt:
st.error('Please write prompt')
else:
progress_bar = st.progress(0)
status_text = st.empty()
current_datetime = datetime.datetime.now()
formatted_datetime = current_datetime.strftime("%Y-%m-%d_%H%M%S")
output_filename = f"recording_{formatted_datetime}.wav"
temp_file = create_temp_file(input_wav)
iso_code = iso_languages[language]
print(f'Language: {language}, prompt: {prompt}')
update_progress(0, 'Loading TTS model...')
api = TTS(f"tts_models/{iso_code}/fairseq/vits")
update_progress(50, 'Generating audio...')
api.tts_with_vc_to_file(
prompt,
speaker_wav=temp_file.name,
file_path=output_filename
)
remove_temp_file(temp_file)
audio_file = open(output_filename, 'rb')
audio_bytes = audio_file.read()
update_progress(100, 'Audio generated successfully!')
st.audio(audio_bytes, format='audio/wav')
st.download_button('Download WAV', data=audio_bytes, file_name='output.wav')