Illioa commited on
Commit
f27fd1f
β€’
1 Parent(s): 1928d70

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -23
app.py CHANGED
@@ -1,32 +1,84 @@
1
- import spaces
2
- import gradio as gr
3
  import torch
4
  from TTS.api import TTS
5
  import os
 
 
 
 
 
 
 
6
  os.environ["COQUI_TOS_AGREED"] = "1"
7
 
8
  device = "cuda" if torch.cuda.is_available() else "cpu"
9
 
10
  tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
11
 
12
- @spaces.GPU(enable_queue=True)
13
- def clone(text, audio, language):
14
- tts.tts_to_file(text=text, speaker_wav=audio, language=language, file_path="./output.wav")
15
- return "./output.wav"
16
-
17
- iface = gr.Interface(fn=clone,
18
- inputs=[gr.Textbox(label='Text'),gr.Audio(type='filepath', label='Voice reference audio file'),gr.Textbox(label='Langauge')],
19
- outputs=gr.Audio(type='filepath'),
20
- title='Voice Clone',
21
- description="""
22
- by [Tony Assi](https://www.tonyassi.com/)
23
-
24
- Please ❀️ this Space. I build custom AI apps for companies. <a href="mailto: tony.assi.media@gmail.com">Email me</a> for business inquiries.
25
- """,
26
- theme = gr.themes.Base(primary_hue="teal",secondary_hue="teal",neutral_hue="slate"),
27
- examples=[["Hey! It's me Dorthy, from the Wizard of Oz. Type in whatever you'd like me to say.","./audio/Wizard-of-Oz-Dorthy.wav"],
28
- ["It's me Vito Corleone, from the Godfather. Type in whatever you'd like me to say.","./audio/Godfather.wav"],
29
- ["Hey, it's me Paris Hilton. Type in whatever you'd like me to say.","./audio/Paris-Hilton.mp3"],
30
- ["Hey, it's me Megan Fox from Transformers. Type in whatever you'd like me to say.","./audio/Megan-Fox.mp3"],
31
- ["Hey there, it's me Jeff Goldblum. Type in whatever you'd like me to say.","./audio/Jeff-Goldblum.mp3"],])
32
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
 
2
  import torch
3
  from TTS.api import TTS
4
  import os
5
+ import docx2txt
6
+ import gradio as gr
7
+ import json
8
+
9
+ folder = '.'
10
+
11
+
12
  os.environ["COQUI_TOS_AGREED"] = "1"
13
 
14
  device = "cuda" if torch.cuda.is_available() else "cpu"
15
 
16
  tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
17
 
18
+ folder = '.'
19
+
20
+ with open(f"{folder}/config.json") as f:
21
+ config_settings = json.load(f)
22
+
23
+ def generate_voiceover(text, voice, n, name):
24
+ if not os.path.exists(f"{folder}/Result/{voice}"):
25
+ os.makedirs(f"{folder}/Result/{voice}")
26
+ file_path = f"{folder}/Result/{voice}/{name}_{n}.mp3"
27
+ tts.tts_to_file(text=text, speaker_wav=f'{folder}/voices/{voice}.mp3', language=config_settings["voices"][f'{voice}.mp3'], file_path= file_path)
28
+ # speed up for 1.2
29
+ os.system("play " +file_path+" tempo {}".format(config_settings["speed"]))
30
+
31
+ return file_path
32
+
33
+
34
+
35
+ def process_file(file, voice):
36
+ if file is None:
37
+ return "Please upload a file.", None
38
+
39
+ file_type = file.name.split(".")[-1]
40
+
41
+ if file_type == "txt":
42
+ with open(file) as file_:
43
+ text = file_.read()
44
+ elif file_type == "docx":
45
+ text = docx2txt.process()
46
+ else:
47
+ return "Unsupported file type.", None
48
+
49
+ if not text.strip():
50
+ return "File is empty. Please upload a file with content.", None
51
+
52
+ paragraphs = text.split("\n\n")
53
+ print(paragraphs)
54
+ audio_outputs = []
55
+ n=1
56
+ for paragraph in paragraphs:
57
+ if paragraph.strip():
58
+ audio_placeholder = generate_voiceover(paragraph, voice, n, file.name.split("/")[-1].split(".")[0])
59
+ audio_outputs.append(audio_placeholder)
60
+ n+=1
61
+
62
+ return "\n\n".join(audio_outputs)
63
+
64
+ # all fileanmes in voices folder
65
+ voices = [f.name.split(".")[0] for f in os.scandir("voices") if f.is_file()]
66
+
67
+
68
+ def main():
69
+ gr.Interface(
70
+ fn=process_file,
71
+ inputs=[
72
+ gr.File(file_types=['.txt', '.docx'], label="Upload a file"),
73
+
74
+ gr.Dropdown(voices, label="Select a language")
75
+ ],
76
+ outputs=[
77
+ gr.Textbox(label="Voiceover Text")
78
+ ],
79
+ title="Text to Speech App",
80
+ description="Upload a .txt or .docx file, select a language, and generate voiceovers for the content."
81
+ ).launch(debug=True)
82
+
83
+ if __name__ == "__main__":
84
+ main()