Commit
·
33d1219
1
Parent(s):
68c6e5c
Update app.py
Browse files
app.py
CHANGED
@@ -14,8 +14,8 @@ import imageio
|
|
14 |
import os
|
15 |
import ffmpeg
|
16 |
|
17 |
-
|
18 |
-
|
19 |
|
20 |
block = gr.Blocks()
|
21 |
|
@@ -126,8 +126,9 @@ def audio_video():
|
|
126 |
input_video = ffmpeg.input('/content/video_output.mp4')
|
127 |
|
128 |
input_audio = ffmpeg.input('/content/audio.wav')
|
129 |
-
|
130 |
ffmpeg.concat(input_video, input_audio, v=1, a=1).output('/content/final_output.mp4').run()
|
|
|
131 |
return "/content/final_output.mp4"
|
132 |
|
133 |
def one_shot_talking(image_in,audio_in):
|
@@ -141,7 +142,7 @@ def one_shot_talking(image_in,audio_in):
|
|
141 |
# time.sleep(60)
|
142 |
image_in_one_shot='/content/results/restored_imgs/image_pre.png'
|
143 |
#One Shot Talking Face algorithm
|
144 |
-
|
145 |
|
146 |
#Video Quality Improvement
|
147 |
|
@@ -169,7 +170,31 @@ def one_shot(image_in,input_text,gender):
|
|
169 |
sound.export("/content/audio.wav", format="wav")
|
170 |
audio_in="/content/audio.wav"
|
171 |
return one_shot_talking(image_in,audio_in)
|
172 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
173 |
|
174 |
|
175 |
def run():
|
@@ -179,7 +204,7 @@ def run():
|
|
179 |
with gr.Box():
|
180 |
with gr.Row().style(equal_height=True):
|
181 |
image_in = gr.Image(show_label=True, type="filepath",label="Input Image")
|
182 |
-
input_text = gr.Textbox(show_label=
|
183 |
gender = gr.Radio(["Female","Male"],value="Female",label="Gender")
|
184 |
video_out = gr.Video(show_label=True,label="Output")
|
185 |
with gr.Row().style(equal_height=True):
|
|
|
14 |
import os
|
15 |
import ffmpeg
|
16 |
|
17 |
+
from fairseq.checkpoint_utils import load_model_ensemble_and_task_from_hf_hub
|
18 |
+
from fairseq.models.text_to_speech.hub_interface import TTSHubInterface
|
19 |
|
20 |
block = gr.Blocks()
|
21 |
|
|
|
126 |
input_video = ffmpeg.input('/content/video_output.mp4')
|
127 |
|
128 |
input_audio = ffmpeg.input('/content/audio.wav')
|
129 |
+
os.system(f"rm -rf /content/final_output.mp4")
|
130 |
ffmpeg.concat(input_video, input_audio, v=1, a=1).output('/content/final_output.mp4').run()
|
131 |
+
|
132 |
return "/content/final_output.mp4"
|
133 |
|
134 |
def one_shot_talking(image_in,audio_in):
|
|
|
142 |
# time.sleep(60)
|
143 |
image_in_one_shot='/content/results/restored_imgs/image_pre.png'
|
144 |
#One Shot Talking Face algorithm
|
145 |
+
calculate(image_in_one_shot,audio_in)
|
146 |
|
147 |
#Video Quality Improvement
|
148 |
|
|
|
170 |
sound.export("/content/audio.wav", format="wav")
|
171 |
audio_in="/content/audio.wav"
|
172 |
return one_shot_talking(image_in,audio_in)
|
173 |
+
elif gender == 'Male':
|
174 |
+
|
175 |
+
models, cfg, task = load_model_ensemble_and_task_from_hf_hub(
|
176 |
+
"Voicemod/fastspeech2-en-male1",
|
177 |
+
arg_overrides={"vocoder": "hifigan", "fp16": False}
|
178 |
+
)
|
179 |
+
|
180 |
+
model = models[0]
|
181 |
+
TTSHubInterface.update_cfg_with_data_cfg(cfg, task.data_cfg)
|
182 |
+
generator = task.build_generator([model], cfg)
|
183 |
+
# next(model.parameters()).device
|
184 |
+
|
185 |
+
sample = TTSHubInterface.get_model_input(task, input_text)
|
186 |
+
sample["net_input"]["src_tokens"] = sample["net_input"]["src_tokens"]
|
187 |
+
sample["net_input"]["src_lengths"] = sample["net_input"]["src_lengths"]
|
188 |
+
sample["speaker"] = sample["speaker"]
|
189 |
+
|
190 |
+
wav, rate = TTSHubInterface.get_prediction(task, model, generator, sample)
|
191 |
+
# soundfile.write("/content/audio_before.wav", wav, rate)
|
192 |
+
soundfile.write("/content/audio_before.wav", wav.cpu().clone().numpy(), rate)
|
193 |
+
cmd='ffmpeg -i /content/audio_before.wav -filter:a "atempo=0.7" -vn /content/audio.wav'
|
194 |
+
os.system(cmd)
|
195 |
+
audio_in="/content/audio.wav"
|
196 |
+
|
197 |
+
return one_shot_talking(image_in,audio_in)
|
198 |
|
199 |
|
200 |
def run():
|
|
|
204 |
with gr.Box():
|
205 |
with gr.Row().style(equal_height=True):
|
206 |
image_in = gr.Image(show_label=True, type="filepath",label="Input Image")
|
207 |
+
input_text = gr.Textbox(show_label=True,label="Input Text")
|
208 |
gender = gr.Radio(["Female","Male"],value="Female",label="Gender")
|
209 |
video_out = gr.Video(show_label=True,label="Output")
|
210 |
with gr.Row().style(equal_height=True):
|