AngeT10 commited on
Commit
3251e7e
·
verified ·
1 Parent(s): e41aad0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -10
app.py CHANGED
@@ -2,27 +2,39 @@ import subprocess
2
  import os
3
  import torch
4
  import urllib.request
 
 
 
5
 
6
  # Set the device to CPU
7
  device = "cpu"
8
 
9
  os.environ["COQUI_TOS_AGREED"] = "1"
10
 
11
- import gradio as gr
12
- import torch
13
- import os
14
- import zipfile
15
- import requests
16
- from TTS.api import TTS
17
 
18
- os.environ["COQUI_TOS_AGREED"] = "1"
 
 
 
19
 
20
- # Set the device to CPU
21
- device = "cpu"
 
 
 
 
 
 
 
 
 
 
22
 
23
- tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
24
 
25
  def clone(text, url, language):
 
26
  response = requests.get(url)
27
 
28
  with open("temp.zip", "wb") as f:
@@ -33,6 +45,10 @@ def clone(text, url, language):
33
 
34
  audio_file = [f for f in os.listdir(".") if f.endswith(".wav")][0]
35
 
 
 
 
 
36
  tts.tts_to_file(text=text, speaker_wav=audio_file, language=language, file_path="./output.wav")
37
 
38
  os.remove(audio_file)
@@ -50,4 +66,46 @@ iface = gr.Interface(fn=clone,
50
  """,
51
  theme=gr.themes.Base(primary_hue="teal", secondary_hue="teal", neutral_hue="slate"))
52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  iface.launch(share=True)
 
2
  import os
3
  import torch
4
  import urllib.request
5
+ import librosa
6
+ from moviepy.editor import VideoFileClip
7
+ from TTS.api import TTS
8
 
9
  # Set the device to CPU
10
  device = "cpu"
11
 
12
  os.environ["COQUI_TOS_AGREED"] = "1"
13
 
14
+ tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
 
 
 
 
 
15
 
16
+ def convert_audio_to_wav(file_path):
17
+ """Convert the given audio file to WAV format."""
18
+ file_name = os.path.basename(file_path)
19
+ file_ext = os.path.splitext(file_name)[1].lower()
20
 
21
+ if file_ext == ".mp3":
22
+ audio, sr = librosa.load(file_path)
23
+ librosa.output.write_wav(f"temp_{file_name}", audio, sr)
24
+ file_path = f"temp_{file_name}"
25
+ elif file_ext == ".flac":
26
+ os.system(f"ffmpeg -i {file_path} -acodec pcm_s16le -ar 16000 temp_{file_name}")
27
+ file_path = f"temp_{file_name}"
28
+ elif file_ext == ".mp4":
29
+ clip = VideoFileClip(file_path, audio_codec="aac")
30
+ audio = clip.audio
31
+ audio.write_audiofile(f"temp_{file_name}")
32
+ file_path = f"temp_{file_name}"
33
 
34
+ return file_path
35
 
36
  def clone(text, url, language):
37
+ """Generate a voice clone using the given parameters."""
38
  response = requests.get(url)
39
 
40
  with open("temp.zip", "wb") as f:
 
45
 
46
  audio_file = [f for f in os.listdir(".") if f.endswith(".wav")][0]
47
 
48
+ # Convert the audio file to WAV format
49
+ if os.path.splitext(audio_file)[1].lower() not in [".wav", ".flac"]:
50
+ audio_file = convert_audio_to_wav(audio_file)
51
+
52
  tts.tts_to_file(text=text, speaker_wav=audio_file, language=language, file_path="./output.wav")
53
 
54
  os.remove(audio_file)
 
66
  """,
67
  theme=gr.themes.Base(primary_hue="teal", secondary_hue="teal", neutral_hue="slate"))
68
 
69
+ iface.launch(share=True)
70
+
71
+ with open("temp.zip", "wb") as f:
72
+ f.write(response.content)
73
+
74
+ with zipfile.ZipFile("temp.zip", "r") as zip_ref:
75
+ zip_ref.extractall()
76
+
77
+ audio_file = [f for f in os.listdir(".") if f.endswith(".wav")][0]
78
+
79
+ # Convert the audio file to the desired format
80
+ if output_format == "mp3":
81
+ audio, sr = librosa.load(audio_file)
82
+ librosa.output.write_wav("temp.wav", audio, sr)
83
+ os.system(f"ffmpeg -i temp.wav -acodec libmp3lame -qscale:a 4 {audio_file[:-4]}.mp3")
84
+ os.remove("temp.wav")
85
+ audio_file = f"{audio_file[:-4]}.mp3"
86
+ elif output_format == "flac":
87
+ os.system(f"ffmpeg -i {audio_file} {audio_file[:-4]}.flac")
88
+ audio_file = f"{audio_file[:-4]}.flac"
89
+ elif output_format == "mp4":
90
+ clip = VideoFileClip(audio_file, audio_codec="aac")
91
+ clip.write_videofile("output.mp4", fps=24, codec="libx264")
92
+ audio_file = "output.mp4"
93
+
94
+ tts.tts_to_file(text=text, speaker_wav=audio_file, language=language, file_path="./output.wav")
95
+
96
+ os.remove(audio_file)
97
+ os.remove("temp.zip")
98
+
99
+ return "./output.wav"
100
+
101
+ iface = gr.Interface(fn=clone,
102
+ inputs=["text", gr.components.Text(label="URL"), gr.Dropdown(choices=["en", "es", "fr", "de", "it", "pt", "pl", "tr", "ru", "nl", "cs", "ar", "zh-cn", "ja", "hu", "ko", "hi"], label="Language"), gr.Dropdown(choices=["wav", "mp3", "flac", "mp4"], label="Output Format")],
103
+ outputs=gr.Audio(type='filepath'),
104
+ title='Voice Clone',
105
+ description="""
106
+ by [Angetyde](https://youtube.com/@Angetyde?si=7nusP31nTumIkPTF) and [Tony Assi](https://www.tonyassi.com/ )
107
+ use this colab with caution <3.
108
+ """,
109
+ theme=gr.themes.Base(primary_hue="teal", secondary_hue="teal", neutral_hue="slate"))
110
+
111
  iface.launch(share=True)