File size: 2,636 Bytes
e1c7b15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dd52a89
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import gradio as gr
from openai import OpenAI
import re
from konlpy.tag import Kkma
from TextUtil.digit2text import digit2txt, NNGdigit2txt, CSign2txt

def process_txt(text):
    kkma = Kkma()

    result = ""
    pattern = re.compile(r'([κ°€-힣]+)|([a-zA-Z.]+)|(\d[\d,.]*)|(\$|€|Β£|Β₯|οΏ¦)|(\s+)')

    matches = pattern.finditer(text)
    for match in matches:
        if match.group(1):  # Korean part
            result += match.group(1)
        elif match.group(2):
            result += match.group(2)
        elif match.group(3):  # Number part
            end_index = match.end(3)
            # NNG Case
            next_word = kkma.pos(text[end_index:])[0]
            if next_word[1] == "NNG" and next_word[0] not in ['λ‹¬λŸ¬', '유료', 'νŒŒμš΄λ“œ', 'μ—”', '원']:
                result += NNGdigit2txt(match.group(3).replace(',', ''))
            else:
                result += digit2txt(match.group(3).replace(',', ''))
        elif match.group(4):  # Currency symbol part
            result += CSign2txt(match.group(4))
        elif match.group(5):  # Space part
            result += match.group(5)
    return result
    

def generate_audio(api_key, file, model, voice):
    # OpenAI ν΄λΌμ΄μ–ΈνŠΈ μ΄ˆκΈ°ν™” (μ‚¬μš©μž μž…λ ₯ API ν‚€ μ‚¬μš©)
    client = OpenAI(api_key=api_key)

    # 파일 읽기
    text = file.decode("utf-8")
    
    # ν…μŠ€νŠΈ 처리
    text = process_txt(text)
    print(text)

    # TTS μš”μ²­
    response = client.audio.speech.create(
        model=model,
        voice=voice,
        input=text
    )
    
    # MP3 파일둜 μ €μž₯
    f_name = "generated_audio"
    speech_file_path = f"{f_name}.mp3"
    response.stream_to_file(speech_file_path)
    
    return speech_file_path

# Gradio μΈν„°νŽ˜μ΄μŠ€ μ •μ˜
iface = gr.Interface(
    fn=generate_audio,
    inputs=[
        gr.Text(label="Enter OpenAI API Key"),
        gr.File(label="Upload Text File", type="binary"),
        gr.Radio(choices=["tts-1", "tts-1-hd"], label="Model"),
        gr.Radio(choices=["alloy", "echo", "fable", "onyx", "nova", "shimmer"], label="Voice")
    ],
    outputs=gr.File(label="Download MP3 File"),
    title="Text-to-Speech Converter (Korean Digit2Text)",
    description="Upload a text file and enter your OpenAI API key to convert it into speech using OpenAI's Text-to-Speech models.<br>*ν•΄λ‹Ή μ„œλΉ„μŠ€λŠ” ν•œκ΅­μ–΄μ— λ§žμΆ€ν™”λ˜μ–΄ μžˆμŠ΅λ‹ˆλ‹€. <br>*ν•œκ΅­μ–΄ 숫자 발음 λ³€ν™˜μ„ 톡해 더 μ •ν™•ν•œ 숫자 TTSλ₯Ό κ°€λŠ₯ν•˜κ²Œ ν•©λ‹ˆλ‹€.<br>*μ˜ˆμ‹œ: 50,000$ -> μ˜€λ§Œλ‹¬λŸ¬, 5가지 -> 닀섯가지, 99ꢌ -> μ•„ν”μ•„ν™‰κΆŒ"
)

if __name__ == "__main__":
    iface.launch()