EC2 Default User commited on
Commit
7df64f6
1 Parent(s): 03cce66

Adding Text to Video Message

Browse files
.gitattributes CHANGED
@@ -29,3 +29,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
29
  *.zip filter=lfs diff=lfs merge=lfs -text
30
  *.zst filter=lfs diff=lfs merge=lfs -text
31
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
29
  *.zip filter=lfs diff=lfs merge=lfs -text
30
  *.zst filter=lfs diff=lfs merge=lfs -text
31
  *tfevents* filter=lfs diff=lfs merge=lfs -text
32
+ *.tar filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ .ipynb_checkpoints
2
+ */.ipynb_checkpoints/*
3
+ .ipynb_checkpoints*
.ipynb_checkpoints/app-checkpoint.py DELETED
@@ -1,20 +0,0 @@
1
- import os
2
- import sys
3
- import gradio as gr
4
-
5
- os.system('git clone https://github.com/Rudrabha/Wav2Lip.git')
6
- os.system('curl -o ./Wav2Lip/face_detection/detection/sfd/s3fd.pth https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth')
7
- os.system('mv ./Wav2Lip/* .')
8
-
9
- title = "Text2Lip"
10
- description = "Wav2Lip With Text"
11
-
12
-
13
- def inference(face, audio):
14
- os.system("python inference.py --checkpoint_path ./wav2lip.pth --face {} --audio {}".format(face, audio))
15
-
16
- return "./results/result_voice.mp4"
17
-
18
-
19
- iface = gr.Interface(inference, inputs=[gr.inputs.Video(type="mp4", source="upload", label="Talking Face Video (in mp4 format)", optional=False), gr.inputs.Audio(source="upload", type="filepath", label="Audio", optional=False)], outputs=["video"], title=title, description=description, article=article, examples=[["./examples/w2l_test_f1.mp4", "./examples/w2l_test_a1.wav"]], enable_queue=True)
20
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md CHANGED
@@ -3,8 +3,9 @@ title: Text2Lip
3
  emoji: 👀
4
  colorFrom: pink
5
  colorTo: indigo
 
6
  sdk: gradio
7
- sdk_version: 3.3
8
  app_file: app.py
9
  pinned: false
10
  ---
 
3
  emoji: 👀
4
  colorFrom: pink
5
  colorTo: indigo
6
+ python_version: 3.7.13
7
  sdk: gradio
8
+ sdk_version: 3.0.4
9
  app_file: app.py
10
  pinned: false
11
  ---
SE_checkpoint.pth.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f96efb20cbeeefd81fd8336d7f0155bf8902f82f9474e58ccb19d9e12345172
3
+ size 44610930
app.py CHANGED
@@ -1,20 +1,66 @@
 
1
  import os
2
  import sys
3
- import gradio as gr
4
-
5
- os.system('git clone https://github.com/Rudrabha/Wav2Lip.git')
6
- os.system('curl -o ./Wav2Lip/face_detection/detection/sfd/s3fd.pth https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth')
7
- os.system('mv ./Wav2Lip/* .')
8
-
9
- title = "Text2Lip"
10
- description = "Wav2Lip With Text"
11
-
12
-
13
- def inference(face, audio):
14
- os.system("python inference.py --checkpoint_path ./wav2lip.pth --face {} --audio {}".format(face, audio))
15
-
16
- return "./results/result_voice.mp4"
17
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
- iface = gr.Interface(inference, inputs=[gr.inputs.Video(type="mp4", source="upload", label="Talking Face Video (in mp4 format)", optional=False), gr.inputs.Audio(source="upload", type="filepath", label="Audio", optional=False)], outputs=["video"], title=title, description=description, article=article, examples=[["./examples/w2l_test_f1.mp4", "./examples/w2l_test_a1.wav"]], enable_queue=True)
20
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
  import os
3
  import sys
4
+ #Installation of libraries
5
+ EC2_INSTANCE = False
6
+ if EC2_INSTANCE : os.system('cd scripts && sh install.sh')
7
+ os.system('python installation.py')
8
+ TTS_PATH = "TTS/"
9
+ # add libraries into environment
10
+ sys.path.append(TTS_PATH) # set this if TTS is not installed globally
11
+ VOICE_PATH = "utils/"
12
+ # add libraries into environment
13
+ sys.path.append(VOICE_PATH) # set this if modules and voice are not installed globally
14
+ from utils.modules import *
15
+ from utils.voice import *
16
+ #Definition Web App in Gradio
17
+ text_to_say=gr.inputs.Textbox(label='What would you like the voice to say? (max. 2000 characters per request)')
18
+ url =gr.inputs.Textbox(label = "Enter the YouTube URL below:")
19
+ initial_time = gr.inputs.Textbox(label='Initial time of trim? (format: hh:mm:ss)')
20
+ final_time= gr.inputs.Textbox(label='Final time to trim? (format: hh:mm:ss)')
21
+ demo = gr.Interface(fn = video_generator,
22
+ inputs = [text_to_say,url,initial_time,final_time],
23
+ outputs = 'video',
24
+ verbose = True,
25
+ title = 'Video Speech Generator from Youtube Videos',
26
+ description = 'A simple application that replaces the original speech of the video by your text. Wait one minute to process.',
27
+ article =
28
+ '''<div>
29
+ <p style="text-align: center">
30
+ All you need to do is to paste the Youtube link and
31
+ set the initial time and final time of the real speach.
32
+ (The limit of the trim is 5 minutes and not larger than video length)
33
+ hit submit, then wait for compiling.
34
+ After that click on Play/Pause for listing to the video.
35
+ The video is saved in an mp4 format.
36
+ For more information visit <a href="https://ruslanmv.com/">ruslanmv.com</a>
37
+ </p>
38
+ </div>''',
39
 
40
+ examples = [['I am clonning your voice, Charles!. Machine intelligence is the last invention that humanity will ever need to make.',
41
+ "https://www.youtube.com/watch?v=xw5dvItD5zY",
42
+ "00:00:01","00:00:10"],
43
+ ['I am clonning your voice, Jim Carrey!. Machine intelligence is the last invention that humanity will ever need to make.',
44
+ "https://www.youtube.com/watch?v=uIaY0l5qV0c",
45
+ "00:00:29", "00:01:05"],
46
+ ['I am clonning your voice, Mark Zuckerberg!. Machine intelligence is the last invention that humanity will ever need to make.',
47
+ "https://www.youtube.com/watch?v=AYjDIFrY9rc",
48
+ "00:00:11", "00:00:44"],
49
+ ['I am clonning your voice, Ronald Reagan!. Machine intelligence is the last invention that humanity will ever need to make.',
50
+ "https://www.youtube.com/watch?v=iuoRDY9c5SQ",
51
+ "00:01:03", "00:01:22"],
52
+ ['I am clonning your voice, Elon Musk!. Machine intelligence is the last invention that humanity will ever need to make.',
53
+ "https://www.youtube.com/watch?v=IZ8JQ_1gytg",
54
+ "00:00:10", "00:00:43"],
55
+ ['I am clonning your voice, Hitler!. Machine intelligence is the last invention that humanity will ever need to make.',
56
+ "https://www.youtube.com/watch?v=F08wrLyH5cs",
57
+ "00:00:15", "00:00:40"],
58
+ ['I am clonning your voice, Alexandria!. Machine intelligence is the last invention that humanity will ever need to make.',
59
+ "https://www.youtube.com/watch?v=Eht6oIkzkew",
60
+ "00:00:02", "00:00:30"],
61
+ ['I am clonning your voice, Deborah!. Machine intelligence is the last invention that humanity will ever need to make.',
62
+ "https://www.youtube.com/watch?v=qbq4_Swj0Gg",
63
+ "00:00:03", "00:0:44"],
64
+ ]
65
+ )
66
+ demo.launch()
best_model.pth.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:017bfd8907c80bb5857d65d0223f0e4e4b9d699ef52e2a853d9cc7eb7e308cf0
3
+ size 379957289
best_model_latest.pth.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:017bfd8907c80bb5857d65d0223f0e4e4b9d699ef52e2a853d9cc7eb7e308cf0
3
+ size 379957289
errormessage.wav ADDED
Binary file (889 kB). View file
 
installation.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import the os module
2
+ import os
3
+ from utils.default_models import ensure_default_models
4
+ from pathlib import Path
5
+ Sagemaker = False
6
+ if Sagemaker :
7
+ env='source activate python3 && conda activate VideoMessage &&'
8
+ else:
9
+ env=''
10
+ ## Step 1. Setup of the dependencies
11
+ is_first_time = True
12
+
13
+ #Install dependency
14
+ # Download pretrained model
15
+
16
+ # Get the current working directory
17
+ parent_dir = os.getcwd()
18
+ print(parent_dir)
19
+ if is_first_time:
20
+ # Directory
21
+ directory = "sample_data"
22
+ # Path
23
+ path = os.path.join(parent_dir, directory)
24
+ print(path)
25
+ try:
26
+ os.mkdir(path)
27
+ print("Directory '% s' created" % directory)
28
+ except Exception:
29
+ print("Directory '% s'was already created" % directory)
30
+ if is_first_time:
31
+ os.system('git clone https://github.com/Rudrabha/Wav2Lip')
32
+ os.system('cd Wav2Lip &&{} pip install -r requirements.txt'.format(env))
33
+ ## Load the models one by one.
34
+ print("Preparing the models of Wav2Lip")
35
+ ensure_default_models(Path("Wav2Lip"))
36
+ os.system('git clone https://github.com/Edresson/Coqui-TTS -b multilingual-torchaudio-SE TTS')
37
+ os.system('{} pip install -q -e TTS/'.format(env))
38
+ os.system('{} pip install -q torchaudio==0.9.0'.format(env))
39
+ os.system('{} pip install -q youtube-dl'.format(env))
40
+ os.system('{} pip install ffmpeg-python'.format(env))
41
+ os.system('{} pip install gradio==3.0.4'.format(env))
42
+ os.system('{} pip install pytube==12.1.0'.format(env))
43
+ os.system('{} pip install torchaudio==0.9.0 TTS'.format(env))
44
+ os.system('{} pip install opencv-contrib-python-headless==4.1.2.30'.format(env))
45
+ print("Installation repositories DONE!!")
scripts/install.sh ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ source activate python3
3
+ # check prerequisites
4
+ command -v conda >/dev/null 2>&1 || { echo >&2 "conda not found. Please refer to the README and install Miniconda."; exit 1; }
5
+ command -v git >/dev/null 2>&1 || { echo >&2 "git not found. Please refer to the README and install Git."; exit 1; }
6
+ # Conda environment name
7
+ CONDA_ENV_NAME=VideoMessage
8
+ source $(conda info --base)/etc/profile.d/conda.sh
9
+ conda create -y -n $CONDA_ENV_NAME python=3.7.13
10
+ conda activate $CONDA_ENV_NAME
11
+ conda install -y ipykernel
12
+ python -m ipykernel install --user --name VideoMessage --display-name "Python 3 (VideoMessage)"
13
+ sh install_git-lfs.sh
14
+ sh install_ffmpeg.sh
15
+
16
+
17
+
18
+
scripts/install_ffmpeg.sh ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ cd /usr/local/bin
3
+ sudo mkdir ffmpeg && cd ffmpeg
4
+ sudo wget https://johnvansickle.com/ffmpeg/releases/ffmpeg-release-amd64-static.tar.xz
5
+ sudo tar -xf ffmpeg-release-amd64-static.tar.xz
6
+ sudo ln -s /usr/local/bin/ffmpeg/ffmpeg-5.1.1-amd64-static/ffmpeg /usr/bin/ffmpeg
7
+ sudo ln -s /usr/local/bin/ffmpeg/ffmpeg-5.1.1-amd64-static/ffprobe /usr/bin/ffprobe
scripts/install_git-lfs.sh ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ #!/bin/bash
2
+ curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.rpm.sh | sudo bash
3
+ sudo yum install git-lfs -y
4
+ git lfs install
speakers.json ADDED
The diff for this file is too large to render. See raw diff
 
utils/__init__.py ADDED
File without changes
utils/default_models.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import urllib.request
2
+ from pathlib import Path
3
+ from threading import Thread
4
+ from urllib.error import HTTPError
5
+
6
+ from tqdm import tqdm
7
+ #/Wav2Lip/checkpoints/wav2lip_gan.pth
8
+ #/Wav2Lip/face_detection/detection/sfd/s3fd.pth
9
+ default_models = {
10
+ "wav2lip_gan": ("https://drive.google.com/u/0/uc?id=1V8hobVlZJdp8dzI8qWaAlbhCrXdBiUET&export=download&confirm=t", 435801865,'checkpoints'),
11
+ "s3fd": ("https://drive.google.com/u/0/uc?id=1Y-mgxW8iq1pXUQicU_8ClNB85eQ1lk0o&export=download", 89843225,'face_detection/detection/sfd'),
12
+
13
+ }
14
+
15
+
16
+ class DownloadProgressBar(tqdm):
17
+ def update_to(self, b=1, bsize=1, tsize=None):
18
+ if tsize is not None:
19
+ self.total = tsize
20
+ self.update(b * bsize - self.n)
21
+
22
+
23
+ def download(url: str, target: Path, bar_pos=0):
24
+ # Ensure the directory exists
25
+ target.parent.mkdir(exist_ok=True, parents=True)
26
+
27
+ desc = f"Downloading {target.name}"
28
+ with DownloadProgressBar(unit="B", unit_scale=True, miniters=1, desc=desc, position=bar_pos, leave=False) as t:
29
+ try:
30
+ urllib.request.urlretrieve(url, filename=target, reporthook=t.update_to)
31
+ except HTTPError:
32
+ return
33
+
34
+
35
+ def ensure_default_models(models_dir: Path):
36
+ # Define download tasks
37
+ jobs = []
38
+ for model_name, (url, size,path_tobe) in default_models.items():
39
+ target_path = models_dir / path_tobe / f"{model_name}.pth"
40
+ print(target_path)
41
+ if target_path.exists():
42
+ if target_path.stat().st_size != size:
43
+ print(f"File {target_path} is not of expected size, redownloading...")
44
+ else:
45
+ continue
46
+
47
+ thread = Thread(target=download, args=(url, target_path, len(jobs)))
48
+ thread.start()
49
+ jobs.append((thread, target_path, size))
50
+
51
+ # Run and join threads
52
+ for thread, target_path, size in jobs:
53
+ thread.join()
54
+
55
+ assert target_path.exists() and target_path.stat().st_size == size, \
56
+ f"Download for {target_path.name} failed. You may download models manually instead.\n" \
utils/modules.py ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Modules for the Video Messsage Generator From Youtube
2
+
3
+ from IPython.display import HTML, Audio
4
+ from base64 import b64decode
5
+ import numpy as np
6
+ from scipy.io.wavfile import read as wav_read
7
+ import io
8
+ import ffmpeg
9
+ from pytube import YouTube
10
+ import random
11
+ from subprocess import call
12
+ import os
13
+ from datetime import datetime
14
+
15
+
16
+ def time_between(t1, t2):
17
+ FMT = '%H:%M:%S'
18
+ t1 = datetime.strptime(t1, FMT)
19
+ t2 = datetime.strptime(t2, FMT)
20
+ delta = t2 - t1
21
+ return str(delta)
22
+
23
+ def download_video(url):
24
+
25
+ print("Downloading...")
26
+ local_file = (
27
+ YouTube(url)
28
+ .streams.filter(progressive=True, file_extension="mp4")
29
+ .first()
30
+ .download(filename="youtube{}.mp4".format(random.randint(0, 10000)))
31
+ )
32
+ print("Downloaded")
33
+ return local_file
34
+ # download(output_path=destination, filename="name.mp4")
35
+
36
+
37
+ def download_youtube(url):
38
+ #Select a Youtube Video
39
+ #find youtube video id
40
+ from urllib import parse as urlparse
41
+ url_data = urlparse.urlparse(url)
42
+ query = urlparse.parse_qs(url_data.query)
43
+ YOUTUBE_ID = query["v"][0]
44
+ url_download ="https://www.youtube.com/watch?v={}".format(YOUTUBE_ID)
45
+ # download the youtube with the given ID
46
+ os.system("{} youtube-dl -f mp4 --output youtube.mp4 '{}'".format(env,url_download))
47
+ return "youtube.mp4"
48
+
49
+
50
+
51
+ def cleanup():
52
+ import pathlib
53
+ import glob
54
+ types = ('*.mp4','*.mp3', '*.wav') # the tuple of file types
55
+ #Finding mp4 and wave files
56
+ junks = []
57
+ for files in types:
58
+ junks.extend(glob.glob(files))
59
+ try:
60
+ # Deleting those files
61
+ for junk in junks:
62
+ print("Deleting",junk)
63
+ # Setting the path for the file to delete
64
+ file = pathlib.Path(junk)
65
+ # Calling the unlink method on the path
66
+ file.unlink()
67
+ except Exception:
68
+ print("I cannot delete the file because it is being used by another process")
69
+
70
+
71
+ def clean_data():
72
+ # importing all necessary libraries
73
+ import sys, os
74
+ # initial directory
75
+ home_dir = os.getcwd()
76
+ # some non existing directory
77
+ fd = 'sample_data/'
78
+ # Join various path components
79
+ path_to_clean=os.path.join(home_dir,fd)
80
+ print("Path to clean:",path_to_clean)
81
+ # trying to insert to false directory
82
+ try:
83
+ os.chdir(path_to_clean)
84
+ print("Inside to clean", os.getcwd())
85
+ cleanup()
86
+ # Caching the exception
87
+ except:
88
+ print("Something wrong with specified\
89
+ directory. Exception- ", sys.exc_info())
90
+ # handling with finally
91
+ finally:
92
+ print("Restoring the path")
93
+ os.chdir(home_dir)
94
+ print("Current directory is-", os.getcwd())
95
+
96
+ def youtube_trim(url,start,end):
97
+ #cancel previous youtube
98
+ cleanup()
99
+ #download youtube
100
+ #download_youtube(url) # with youtube-dl (slow)
101
+ input_videos=download_video(url)
102
+ # Get the current working directory
103
+ parent_dir = os.getcwd()
104
+ # Trim the video (start, end) seconds
105
+ start = start
106
+ end = end
107
+ #Note: the trimmed video must have face on all frames
108
+ #interval = end - start
109
+ interval = time_between(start, end)
110
+ #trimmed_video= parent_dir+'/sample_data/input_vid{}.mp4'.format(random.randint(0, 10000))
111
+ #trimmed_audio= parent_dir+'/sample_data/input_audio{}.mp3'.format(random.randint(0, 10000))
112
+ trimmed_video= parent_dir+'/sample_data/input_video.mp4'
113
+ trimmed_audio= parent_dir+'/sample_data/input_audio.mp3'
114
+ #delete trimmed if already exits
115
+ clean_data()
116
+ #call(["rm","-f",trimmed_audio])
117
+ #call(["rm","-f",trimmed_video])
118
+
119
+ #!rm -f {trimmed_video}
120
+ # cut the video
121
+ call(["ffmpeg","-y","-i",input_videos,"-ss", start,"-t",interval,"-async","1",trimmed_video])
122
+ #!ffmpeg -y -i youtube.mp4 -ss {start} -t {interval} -async 1 {trimmed_video}
123
+ # cut the audio
124
+ call(["ffmpeg","-i",trimmed_video, "-q:a", "0", "-map","a",trimmed_audio])
125
+ #Preview trimmed video
126
+ #clear_output()
127
+ print("Trimmed Video+Audio")
128
+ return trimmed_video, trimmed_audio
129
+
130
+ def create_video(Text,Voicetoclone):
131
+ out_audio=greet(Text,Voicetoclone)
132
+ current_dir=os.getcwd()
133
+ clonned_audio = os.path.join(current_dir, out_audio)
134
+
135
+ #Start Crunching and Preview Output
136
+ #Note: Only change these, if you have to
137
+ pad_top = 0#@param {type:"integer"}
138
+ pad_bottom = 10#@param {type:"integer"}
139
+ pad_left = 0#@param {type:"integer"}
140
+ pad_right = 0#@param {type:"integer"}
141
+ rescaleFactor = 1#@param {type:"integer"}
142
+ nosmooth = False #@param {type:"boolean"}
143
+
144
+ out_name ="result_voice_{}.mp4".format(random.randint(0, 10000))
145
+ out_file="../"+out_name
146
+
147
+ if nosmooth == False:
148
+ os.system('{} cd Wav2Lip && python inference.py --checkpoint_path checkpoints/wav2lip_gan.pth --face "../sample_data/input_video.mp4" --audio "../out/clonned_audio.wav" --outfile {} --pads {} {} {} {} --resize_factor {}'.format(env,out_file,pad_top ,pad_bottom ,pad_left ,pad_right ,rescaleFactor))
149
+ else:
150
+ os.system('{} cd Wav2Lip && python inference.py --checkpoint_path checkpoints/wav2lip_gan.pth --face "../sample_data/input_video.mp4" --audio "../out/clonned_audio.wav" --outfile {} --pads {} {} {} {} --resize_factor {} --nosmooth'.format(env,out_file,pad_top ,pad_bottom ,pad_left ,pad_right ,rescaleFactor))
151
+
152
+ #clear_output()
153
+ print("Creation of Video done")
154
+ return out_name
155
+
156
+
157
+ def time_format_check(input1):
158
+ timeformat = "%H:%M:%S"
159
+ #input1 = input("At what time did sensor 1 actuate? ")
160
+ try:
161
+ validtime = datetime.strptime(input1, timeformat)
162
+ print("The time format is valid", input1)
163
+ #Do your logic with validtime, which is a valid format
164
+ return False
165
+ except ValueError:
166
+ print("The time {} has not valid format hh:mm:ss".format(input1))
167
+ return True
168
+
169
+
170
+ def to_seconds(datetime_obj):
171
+ from datetime import datetime
172
+ time =datetime_obj
173
+ date_time = datetime.strptime(time, "%H:%M:%S")
174
+ a_timedelta = date_time - datetime(1900, 1, 1)
175
+ seconds = a_timedelta.total_seconds()
176
+ return seconds
177
+
178
+
179
+ def validate_youtube(url):
180
+ #This creates a youtube objet
181
+ try:
182
+ yt = YouTube(url)
183
+ except Exception:
184
+ print("Hi there URL seems invalid")
185
+ return True, 0
186
+ #This will return the length of the video in sec as an int
187
+ video_length = yt.length
188
+ if video_length > 600:
189
+ print("Your video is larger than 10 minutes")
190
+ return True, video_length
191
+ else:
192
+ print("Your video is less than 10 minutes")
193
+ return False, video_length
194
+
195
+
196
+ def video_generator(text_to_say,url,initial_time,final_time):
197
+ print('Checking the url',url)
198
+ check1, video_length = validate_youtube(url)
199
+ if check1 is True: return "./demo/tryagain2.mp4"
200
+ check2 = validate_time(initial_time,final_time, video_length)
201
+ if check2 is True: return "./demo/tryagain0.mp4"
202
+ trimmed_video, trimmed_audio=youtube_trim(url,initial_time,final_time)
203
+ voicetoclone=trimmed_audio
204
+ print(voicetoclone)
205
+ outvideo=create_video(text_to_say,voicetoclone)
206
+ #Preview output video
207
+ print("Final Video Preview")
208
+ final_video= parent_dir+'/'+outvideo
209
+ print("DONE")
210
+ #showVideo(final_video)
211
+ return final_video
212
+
213
+
214
+ def validate_time(initial_time,final_time,video_length):
215
+ is_wrong1=time_format_check(initial_time)
216
+ is_wrong2=time_format_check(final_time)
217
+ #print(is_wrong1,is_wrong2)
218
+ if is_wrong1 is False and is_wrong2 is False:
219
+ delta=time_between(initial_time,final_time)
220
+ if len(str(delta)) > 8:
221
+ print("Final Time is Smaller than Initial Time: t1>t2")
222
+ is_wrong = True
223
+ return is_wrong
224
+ else:
225
+ print("OK")
226
+ is_wrong=False
227
+ if int(to_seconds(delta)) > 300 :
228
+ print("The trim is larger than 5 minutes")
229
+ is_wrong = True
230
+ return is_wrong
231
+
232
+ elif int(to_seconds(delta)) > video_length :
233
+ print("The trim is larger than video lenght")
234
+ is_wrong = True
235
+ return is_wrong
236
+ else:
237
+ return is_wrong
238
+
239
+ else:
240
+ print("Your time format is invalid")
241
+ is_wrong = True
242
+ return is_wrong
utils/voice.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import sys
4
+ TTS_PATH = "TTS/"
5
+ # add libraries into environment
6
+ sys.path.append(TTS_PATH) # set this if TTS is not installed globally
7
+ import os
8
+ import string
9
+ import time
10
+ import argparse
11
+ import json
12
+ import numpy as np
13
+ import IPython
14
+ from IPython.display import Audio
15
+ import torch
16
+ from TTS.tts.utils.synthesis import synthesis
17
+ from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols
18
+ try:
19
+ from TTS.utils.audio import AudioProcessor
20
+ except:
21
+ from TTS.utils.audio import AudioProcessor
22
+ from TTS.tts.models import setup_model
23
+ from TTS.config import load_config
24
+ from TTS.tts.models.vits import *
25
+ OUT_PATH = 'out/'
26
+ # create output path
27
+ os.makedirs(OUT_PATH, exist_ok=True)
28
+ import os
29
+ # Get the current working directory
30
+ parent_dir = os.getcwd()
31
+ print(parent_dir)
32
+ # model vars
33
+ MODEL_PATH = parent_dir+'/best_model.pth.tar'
34
+ CONFIG_PATH = parent_dir+'/config.json'
35
+ TTS_LANGUAGES = parent_dir+"/language_ids.json"
36
+ TTS_SPEAKERS = parent_dir+"/speakers.json"
37
+ USE_CUDA = torch.cuda.is_available()
38
+ # load the config
39
+ C = load_config(CONFIG_PATH)
40
+ # load the audio processor
41
+ ap = AudioProcessor(**C.audio)
42
+ speaker_embedding = None
43
+ C.model_args['d_vector_file'] = TTS_SPEAKERS
44
+ C.model_args['use_speaker_encoder_as_loss'] = False
45
+ model = setup_model(C)
46
+ model.language_manager.set_language_ids_from_file(TTS_LANGUAGES)
47
+ # print(model.language_manager.num_languages, model.embedded_language_dim)
48
+ # print(model.emb_l)
49
+ cp = torch.load(MODEL_PATH, map_location=torch.device('cpu'))
50
+ # remove speaker encoder
51
+ model_weights = cp['model'].copy()
52
+ for key in list(model_weights.keys()):
53
+ if "speaker_encoder" in key:
54
+ del model_weights[key]
55
+ model.load_state_dict(model_weights)
56
+ model.eval()
57
+ if USE_CUDA:
58
+ model = model.cuda()
59
+ # synthesize voice
60
+ use_griffin_lim = False
61
+ os.system('pip install -q pydub ffmpeg-normalize')
62
+ CONFIG_SE_PATH = "config_se.json"
63
+ CHECKPOINT_SE_PATH = "SE_checkpoint.pth.tar"
64
+ from TTS.tts.utils.speakers import SpeakerManager
65
+ from pydub import AudioSegment
66
+ import librosa
67
+ SE_speaker_manager = SpeakerManager(encoder_model_path=CHECKPOINT_SE_PATH, encoder_config_path=CONFIG_SE_PATH, use_cuda=USE_CUDA)
68
+ def compute_spec(ref_file):
69
+ y, sr = librosa.load(ref_file, sr=ap.sample_rate)
70
+ spec = ap.spectrogram(y)
71
+ spec = torch.FloatTensor(spec).unsqueeze(0)
72
+ return spec
73
+
74
+ def greet(Text,Voicetoclone):
75
+ text= "%s" % (Text)
76
+ reference_files= "%s" % (Voicetoclone)
77
+ print("path url")
78
+ print(Voicetoclone)
79
+ sample= str(Voicetoclone)
80
+ size= len(reference_files)*sys.getsizeof(reference_files)
81
+ size2= size / 1000000
82
+ if (size2 > 0.012) or len(text)>2000:
83
+ message="File is greater than 30mb or Text inserted is longer than 2000 characters. Please re-try with smaller sizes."
84
+ print(message)
85
+ raise SystemExit("File is greater than 30mb. Please re-try or Text inserted is longer than 2000 characters. Please re-try with smaller sizes.")
86
+ else:
87
+ os.system('ffmpeg-normalize $sample -nt rms -t=-27 -o $sample -ar 16000 -f')
88
+ reference_emb = SE_speaker_manager.compute_d_vector_from_clip(reference_files)
89
+ model.length_scale = 1 # scaler for the duration predictor. The larger it is, the slower the speech.
90
+ model.inference_noise_scale = 0.3 # defines the noise variance applied to the random z vector at inference.
91
+ model.inference_noise_scale_dp = 0.3 # defines the noise variance applied to the duration predictor z vector at inference.
92
+ text = text
93
+ model.language_manager.language_id_mapping
94
+ language_id = 0
95
+
96
+ print(" > text: {}".format(text))
97
+ wav, alignment, _, _ = synthesis(
98
+ model,
99
+ text,
100
+ C,
101
+ "cuda" in str(next(model.parameters()).device),
102
+ ap,
103
+ speaker_id=None,
104
+ d_vector=reference_emb,
105
+ style_wav=None,
106
+ language_id=language_id,
107
+ enable_eos_bos_chars=C.enable_eos_bos_chars,
108
+ use_griffin_lim=True,
109
+ do_trim_silence=False,
110
+ ).values()
111
+ print("Generated Audio")
112
+ IPython.display.display(Audio(wav, rate=ap.sample_rate))
113
+ #file_name = text.replace(" ", "_")
114
+ #file_name = file_name.translate(str.maketrans('', '', string.punctuation.replace('_', ''))) + '.wav'
115
+ file_name='clonned_audio.wav'
116
+ out_path = os.path.join(OUT_PATH, file_name)
117
+ print(" > Saving output to {}".format(out_path))
118
+ ap.save_wav(wav, out_path)
119
+ return out_path
120
+