94insane's picture
Update app.py
8332bdf
raw
history blame contribute delete
No virus
5.12 kB
'''import gradio as gr
gr.Interface.load("models/94insane/tts-fastspeech-mydata").launch()'''
import os
os.system('pip freeze')
os.system('pip install jamo')
import gradio as gr
from subprocess import call
if (not os.path.exists("korean.py")):
#os.system("wget https://github.com/zhanglina94/TTS_Projects/blob/main/fastspeech2_ko/korean.py -O korean.py")
os.system("wget https://raw.githubusercontent.com/TensorSpeech/TensorFlowTTS/master/tensorflow_tts/utils/korean.py -O korean.py")
import korean
import argparse
import re
from string import punctuation
'''
# 1:Bark Γ—
from bark import SAMPLE_RATE, generate_audio, preload_models
classifier = pipeline(model="suno/bark")
output = pipeline("Hey it's HuggingFace on the phone!")
DEBUG_MODE = False
if not DEBUG_MODE:
_ = preload_models()
audio = output["audio"]
sampling_rate = output["sampling_rate"]
def gen_tts(text, prompt):
prompt = [prompt]
if DEBUG_MODE:
audio_arr = np.zeros(SAMPLE_RATE)
else:
# , text_temp=temp_semantic, waveform_temp=temp_waveform)
audio_arr = generate_audio(text, prompt=prompt)
audio_arr = (audio_arr * 32767).astype(np.int16)
return (SAMPLE_RATE, audio_arr)
audio_out = gr.Audio(label="Generated Audio",type="numpy", elem_id="audio_out")
inputs = gr.inputs.Textbox(lines=5, label="Input Korean Text")
outputs =gr.outputs.Audio(label="Generated Audio",type="numpy", elem_id="audio_out")
title = "Bark TTS"
examples = [
["컴퓨터 비전은 κΈ°κ³„μ˜ μ‹œκ°μ— ν•΄λ‹Ήν•˜λŠ” 뢀뢄을 μ—°κ΅¬ν•˜λŠ” 컴퓨터 κ³Όν•™μ˜ μ΅œμ‹  연ꡬ λΆ„μ•Ό 쀑 ν•˜λ‚˜μ΄λ‹€."],
["μžμ—°μ–΄ μ²˜λ¦¬μ—λŠ” μžμ—°μ–΄ 뢄석, μžμ—°μ–΄ 이해, μžμ—°μ–΄ 생성 λ“±μ˜ 기술이 μ‚¬μš©λœλ‹€."],
["μΈμœ„μ μœΌλ‘œ μ‚¬λžŒμ˜ μ†Œλ¦¬λ₯Ό ν•©μ„±ν•˜λŠ” μ‹œμŠ€ν…œμ΄λ©°, ν…μŠ€νŠΈλ₯Ό μŒμ„±μœΌλ‘œ λ³€ν™˜ν•œλ‹€λŠ” λ°μ„œ ν…μŠ€νŠΈ μŒμ„± λ³€ν™˜."]
]
gr.Interface(gen_tts, inputs, outputs, title=title, description=description, article=article, examples=examples, enable_queue=True).launch()'''
# 2:TTS coqui-ai
def run_cmd(command):
try:
print(command)
call(command)
except KeyboardInterrupt:
print("Process interrupted")
sys.exit(1)
def inference(text):
cmd = ['tts', '--text', "".join(korean.tokenize(text)), '--model_path', 'vits-kss-checkpoint_90000.pth', '--config_path', 'vits-kss-config.json']
run_cmd(cmd)
return 'tts_output.wav'
if (not os.path.exists("vits-kss-checkpoint_90000.pth")):
os.system("wget -q https://huggingface.co/youngs3/coqui-vits-ko/resolve/main/vits-kss-checkpoint_90000.pth -O vits-kss-checkpoint_90000.pth")
os.system("wget -q https://huggingface.co/youngs3/coqui-vits-ko/resolve/main/vits-kss-config.json -O vits-kss-config.json")
inputs = gr.inputs.Textbox(lines=5, label="Input Text")
outputs = gr.outputs.Audio(type="file",label="Output Audio")
title = "Korean Language TTS"
description = "Gradio demo for coqui-ai-TTS, using a VITS model trained on the kss dataset. To use it, simply add your text, or click one of the examples to load them. Read more at the links below."
article = "<p style='text-align: center'><a href='https://tts.readthedocs.io/en/latest/'>TTS </a> | <a href='https://github.com/zhanglina94/TTS_Projects'>Github Repo</a></p>"
examples = [
["컴퓨터 비전은 κΈ°κ³„μ˜ μ‹œκ°μ— ν•΄λ‹Ήν•˜λŠ” 뢀뢄을 μ—°κ΅¬ν•˜λŠ” 컴퓨터 κ³Όν•™μ˜ μ΅œμ‹  연ꡬ λΆ„μ•Ό 쀑 ν•˜λ‚˜μ΄λ‹€."],
["μžμ—°μ–΄ μ²˜λ¦¬μ—λŠ” μžμ—°μ–΄ 뢄석, μžμ—°μ–΄ 이해, μžμ—°μ–΄ 생성 λ“±μ˜ 기술이 μ‚¬μš©λœλ‹€."],
["μΈμœ„μ μœΌλ‘œ μ‚¬λžŒμ˜ μ†Œλ¦¬λ₯Ό ν•©μ„±ν•˜λŠ” μ‹œμŠ€ν…œμ΄λ©°, ν…μŠ€νŠΈλ₯Ό μŒμ„±μœΌλ‘œ λ³€ν™˜ν•œλ‹€λŠ” λ°μ„œ ν…μŠ€νŠΈ μŒμ„± λ³€ν™˜."]
]
gr.Interface(inference, inputs, outputs, title=title, description=description, article=article, examples=examples, enable_queue=True).launch()
'''
# 3:fastspeech2 + vocgan + myvoice data
def inference(text):
config = ['tts', '--text', "".join(korean.tokenize(text)), '--model_path', 'checkpoint_elena_ko.pth.tar', '--config_path', 'config.json']
synthesize(config)
return 'tts_output.wav'
inputs = gr.inputs.Textbox(lines=5, label="Input Korean Text")
outputs = gr.outputs.Audio(type="file",label="Output Audio")
title = "Korean TTS"
description = "Gradio demo for TTS, using a Fastspeech2 model trained on the MyVoice dataset(elena). To use it, simply add your text, or click one of the examples to load them. Read more at the links below."
article = "<p style='text-align: center'><a href=''></a> πŸ¦‹ | <a href='https://github.com/zhanglina94/TTS_Projects'>Github Repo</a></p>"
examples = [
["컴퓨터 비전은 κΈ°κ³„μ˜ μ‹œκ°μ— ν•΄λ‹Ήν•˜λŠ” 뢀뢄을 μ—°κ΅¬ν•˜λŠ” 컴퓨터 κ³Όν•™μ˜ μ΅œμ‹  연ꡬ λΆ„μ•Ό 쀑 ν•˜λ‚˜μ΄λ‹€."],
["μžμ—°μ–΄ μ²˜λ¦¬μ—λŠ” μžμ—°μ–΄ 뢄석, μžμ—°μ–΄ 이해, μžμ—°μ–΄ 생성 λ“±μ˜ 기술이 μ‚¬μš©λœλ‹€."],
["μΈμœ„μ μœΌλ‘œ μ‚¬λžŒμ˜ μ†Œλ¦¬λ₯Ό ν•©μ„±ν•˜λŠ” μ‹œμŠ€ν…œμ΄λ©°, ν…μŠ€νŠΈλ₯Ό μŒμ„±μœΌλ‘œ λ³€ν™˜ν•œλ‹€λŠ” λ°μ„œ ν…μŠ€νŠΈ μŒμ„± λ³€ν™˜."]
]
gr.Interface( inputs, outputs, title=title, description=description, article=article, examples=examples, enable_queue=True).launch()'''