englissi commited on
Commit
9e130e4
ยท
verified ยท
1 Parent(s): f5e8362

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +100 -0
app.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from gtts import gTTS
3
+ from pydub import AudioSegment
4
+ from io import BytesIO
5
+
6
+ def multilingual_tts(
7
+ korean_text,
8
+ british_text,
9
+ american_text,
10
+ british_text_add1,
11
+ british_text_add2,
12
+ australian_text_add1,
13
+ australian_text_add2,
14
+ american_text_add1,
15
+ additional_english_text_9,
16
+ additional_english_text_10
17
+ ):
18
+ # ๊ฐ ์Œ์„ฑ์— ๋Œ€ํ•ด (์–ธ์–ด ์ฝ”๋“œ, tld, ํ…์ŠคํŠธ) ํŠœํ”Œ์„ ๋ฆฌ์ŠคํŠธ์— ์ €์žฅํ•ฉ๋‹ˆ๋‹ค.
19
+ voices = [
20
+ ("ko", "com", korean_text), # ํ•œ๊ตญ์–ด
21
+ ("en", "co.uk", british_text), # ๊ธฐ์กด ์˜๊ตญ์‹
22
+ ("en", "com", american_text), # ๊ธฐ์กด ๋ฏธ๊ตญ์‹
23
+ ("en", "co.uk", british_text_add1), # ์ถ”๊ฐ€ ์˜๊ตญ์‹ 1
24
+ ("en", "co.uk", british_text_add2), # ์ถ”๊ฐ€ ์˜๊ตญ์‹ 2
25
+ ("en", "com.au", australian_text_add1), # ์ถ”๊ฐ€ ํ˜ธ์ฃผ์‹ 1
26
+ ("en", "com.au", australian_text_add2), # ์ถ”๊ฐ€ ํ˜ธ์ฃผ์‹ 2
27
+ ("en", "com", american_text_add1), # ์ถ”๊ฐ€ ๋ฏธ๊ตญ์‹ 1
28
+ ("en", "com", additional_english_text_9), # ์ถ”๊ฐ€ ์˜์–ด 9
29
+ ("en", "com", additional_english_text_10) # ์ถ”๊ฐ€ ์˜์–ด 10
30
+ ]
31
+
32
+ combined_audio = AudioSegment.silent(duration=0) # ๋นˆ ์˜ค๋””์˜ค
33
+
34
+ for lang, tld, text in voices:
35
+ if text.strip(): # ํ…์ŠคํŠธ๊ฐ€ ์ž…๋ ฅ๋˜์–ด ์žˆ์„ ๋•Œ๋งŒ ์ฒ˜๋ฆฌ
36
+ tts = gTTS(text, lang=lang, tld=tld)
37
+ audio_file = BytesIO()
38
+ tts.write_to_fp(audio_file)
39
+ audio_file.seek(0)
40
+ tts_audio = AudioSegment.from_file(audio_file, format="mp3")
41
+ # ๊ฐ ์Œ์„ฑ ์‚ฌ์ด์— 500ms์˜ ์นจ๋ฌต ์ถ”๊ฐ€
42
+ combined_audio += tts_audio + AudioSegment.silent(duration=500)
43
+
44
+ # ์ตœ์ข… ๊ฒฐํ•ฉ๋œ ์˜ค๋””์˜ค๋ฅผ mp3 ํŒŒ์ผ๋กœ ์ €์žฅ
45
+ output_file = "combined_output.mp3"
46
+ combined_audio.export(output_file, format="mp3")
47
+
48
+ return output_file
49
+
50
+ with gr.Blocks() as demo:
51
+ gr.Markdown("## Multilingual TTS: Generate a Single Audio File (์ด 10๊ฐœ ์Œ์„ฑ)")
52
+
53
+ # ํ•œ๊ตญ์–ด ์ž…๋ ฅ๋ž€
54
+ korean_input = gr.Textbox(label="Enter Korean Text:", placeholder="์•ˆ๋…•ํ•˜์„ธ์š”")
55
+
56
+ # ๊ธฐ์กด ์˜์–ด ์ž…๋ ฅ๋ž€ (์˜๊ตญ์‹, ๋ฏธ๊ตญ์‹)
57
+ with gr.Row():
58
+ british_input = gr.Textbox(label="Enter British English Text:", placeholder="Hello (British)")
59
+ american_input = gr.Textbox(label="Enter American English Text:", placeholder="Hello (American)")
60
+
61
+ # ์ถ”๊ฐ€ ์˜์–ด ์ž…๋ ฅ๋ž€ (์ถ”๊ฐ€ ์˜๊ตญ์‹)
62
+ with gr.Row():
63
+ british_input_add1 = gr.Textbox(label="Enter Additional British English Text 1:", placeholder="Hi there (British)")
64
+ british_input_add2 = gr.Textbox(label="Enter Additional British English Text 2:", placeholder="Good day (British)")
65
+
66
+ # ์ถ”๊ฐ€ ์˜์–ด ์ž…๋ ฅ๋ž€ (์ถ”๊ฐ€ ํ˜ธ์ฃผ์‹)
67
+ with gr.Row():
68
+ australian_input_add1 = gr.Textbox(label="Enter Additional Australian English Text 1:", placeholder="G'day (Australian)")
69
+ australian_input_add2 = gr.Textbox(label="Enter Additional Australian English Text 2:", placeholder="How ya going? (Australian)")
70
+
71
+ # ์ถ”๊ฐ€ ์˜์–ด ์ž…๋ ฅ๋ž€ (์ถ”๊ฐ€ ๋ฏธ๊ตญ์‹ ๋ฐ ์ถ”๊ฐ€ ์˜์–ด)
72
+ with gr.Row():
73
+ american_input_add1 = gr.Textbox(label="Enter Additional American English Text 1:", placeholder="Hey (American)")
74
+ additional_english_input_9 = gr.Textbox(label="Enter Additional English Text 9:", placeholder="Additional dialogue 9 (English)")
75
+
76
+ # ๋งˆ์ง€๋ง‰ ์ถ”๊ฐ€ ์˜์–ด ์ž…๋ ฅ๋ž€
77
+ additional_english_input_10 = gr.Textbox(label="Enter Additional English Text 10:", placeholder="Additional dialogue 10 (English)")
78
+
79
+ output_audio = gr.Audio(label="Generated Speech", type="filepath")
80
+ generate_button = gr.Button("Generate Speech")
81
+
82
+ generate_button.click(
83
+ multilingual_tts,
84
+ inputs=[
85
+ korean_input,
86
+ british_input,
87
+ american_input,
88
+ british_input_add1,
89
+ british_input_add2,
90
+ australian_input_add1,
91
+ australian_input_add2,
92
+ american_input_add1,
93
+ additional_english_input_9,
94
+ additional_english_input_10
95
+ ],
96
+ outputs=output_audio
97
+ )
98
+
99
+ if __name__ == "__main__":
100
+ demo.launch()