Spaces:

Hyeonseo
/

Text-to-Speech-Korean_Digit2Text

Running

File size: 2,636 Bytes

import gradio as gr
from openai import OpenAI
import re
from konlpy.tag import Kkma
from TextUtil.digit2text import digit2txt, NNGdigit2txt, CSign2txt

def process_txt(text):
    kkma = Kkma()

    result = ""
    pattern = re.compile(r'([가-힣]+)|([a-zA-Z.]+)|(\d[\d,.]*)|(\$|€|£|¥|￦)|(\s+)')

    matches = pattern.finditer(text)
    for match in matches:
        if match.group(1):  # Korean part
            result += match.group(1)
        elif match.group(2):
            result += match.group(2)
        elif match.group(3):  # Number part
            end_index = match.end(3)
            # NNG Case
            next_word = kkma.pos(text[end_index:])[0]
            if next_word[1] == "NNG" and next_word[0] not in ['달러', '유료', '파운드', '엔', '원']:
                result += NNGdigit2txt(match.group(3).replace(',', ''))
            else:
                result += digit2txt(match.group(3).replace(',', ''))
        elif match.group(4):  # Currency symbol part
            result += CSign2txt(match.group(4))
        elif match.group(5):  # Space part
            result += match.group(5)
    return result
    

def generate_audio(api_key, file, model, voice):
    # OpenAI 클라이언트 초기화 (사용자 입력 API 키 사용)
    client = OpenAI(api_key=api_key)

    # 파일 읽기
    text = file.decode("utf-8")
    
    # 텍스트 처리
    text = process_txt(text)
    print(text)

    # TTS 요청
    response = client.audio.speech.create(
        model=model,
        voice=voice,
        input=text
    )
    
    # MP3 파일로 저장
    f_name = "generated_audio"
    speech_file_path = f"{f_name}.mp3"
    response.stream_to_file(speech_file_path)
    
    return speech_file_path

# Gradio 인터페이스 정의
iface = gr.Interface(
    fn=generate_audio,
    inputs=[
        gr.Text(label="Enter OpenAI API Key"),
        gr.File(label="Upload Text File", type="binary"),
        gr.Radio(choices=["tts-1", "tts-1-hd"], label="Model"),
        gr.Radio(choices=["alloy", "echo", "fable", "onyx", "nova", "shimmer"], label="Voice")
    ],
    outputs=gr.File(label="Download MP3 File"),
    title="Text-to-Speech Converter (Korean Digit2Text)",
    description="Upload a text file and enter your OpenAI API key to convert it into speech using OpenAI's Text-to-Speech models.<br>*해당 서비스는 한국어에 맞춤화되어 있습니다. <br>*한국어 숫자 발음 변환을 통해 더 정확한 숫자 TTS를 가능하게 합니다.<br>*예시: 50,000$ -> 오만달러, 5가지 -> 다섯가지, 99권 -> 아흔아홉권"
)

if __name__ == "__main__":
    iface.launch()