Spaces:
Running
Running
from TTS.api import TTS | |
from bs4 import BeautifulSoup | |
import requests | |
import streamlit as st | |
import tempfile | |
import os | |
import json | |
import datetime | |
with open('config.json', 'r') as f: | |
config = json.load(f) | |
APP_NAME = config['APP_NAME'] | |
APP_LOGO = config['APP_LOGO'] | |
APP_DESCRIPTION = config['APP_DESCRIPTION'] | |
LANGUAGES_URL = config['LANGUAGES_URL'] | |
def contains_only_ascii(input_string): | |
return all(ord(char) < 128 for char in input_string) | |
def get_iso_languages(): | |
response = requests.get(LANGUAGES_URL) | |
soup = BeautifulSoup(response.text, 'html.parser') | |
p_tags = soup.find_all('p') | |
iso_language_dict = {} | |
for p_tag in p_tags[1:]: # Skipping the first <p> which contains the header | |
parts = p_tag.get_text().split() | |
if len(parts) == 2: | |
iso_code, language_name = parts | |
if contains_only_ascii(language_name): | |
iso_language_dict[language_name] = iso_code | |
return iso_language_dict | |
def create_temp_file(input_wav): | |
temp_file = tempfile.NamedTemporaryFile(delete=False) | |
temp_file.write(input_wav.read()) | |
return temp_file | |
def remove_temp_file(temp_file): | |
temp_file.close() | |
os.remove(temp_file.name) | |
def update_progress(percent, text): | |
progress_bar.progress(percent) | |
status_text.text(text) | |
iso_languages = get_iso_languages() | |
languages = list(iso_languages.keys()) | |
st.set_page_config(page_title=APP_NAME) | |
st.title(APP_NAME) | |
st.image(APP_LOGO, use_column_width=True) | |
st.markdown(APP_DESCRIPTION) | |
language = st.selectbox('Select a language', languages) | |
prompt = st.text_input('Enter your prompt') | |
input_wav = st.file_uploader("Upload a WAV file", type=["wav"]) | |
if input_wav: | |
if not input_wav or input_wav is None: | |
st.error('Please upload wav input audio') | |
elif not prompt: | |
st.error('Please write prompt') | |
else: | |
progress_bar = st.progress(0) | |
status_text = st.empty() | |
current_datetime = datetime.datetime.now() | |
formatted_datetime = current_datetime.strftime("%Y-%m-%d_%H%M%S") | |
output_filename = f"recording_{formatted_datetime}.wav" | |
temp_file = create_temp_file(input_wav) | |
iso_code = iso_languages[language] | |
print(f'Language: {language}, prompt: {prompt}') | |
update_progress(0, 'Loading TTS model...') | |
api = TTS(f"tts_models/{iso_code}/fairseq/vits") | |
update_progress(50, 'Generating audio...') | |
api.tts_with_vc_to_file( | |
prompt, | |
speaker_wav=temp_file.name, | |
file_path=output_filename | |
) | |
remove_temp_file(temp_file) | |
audio_file = open(output_filename, 'rb') | |
audio_bytes = audio_file.read() | |
update_progress(100, 'Audio generated successfully!') | |
st.audio(audio_bytes, format='audio/wav') | |
st.download_button('Download WAV', data=audio_bytes, file_name='output.wav') |