wav2lip-gfpgan / txt2video.py
lorneluo's picture
change default audio model
7db9974
import os
import shutil
import sys
from pathlib import Path
import requests
from gradio_client import Client
from main import call_wav2lip, call_gfpgan, merge
root_dir = '/content/jobs'
os.makedirs(root_dir, exist_ok=True)
def download_file(url, local_filename):
response = requests.get(url, stream=True)
with open(local_filename, 'wb') as file:
for chunk in response.iter_content(chunk_size=8192):
if chunk:
file.write(chunk)
def main(job_id, text, video_url):
job_path = os.path.join(root_dir, job_id)
os.makedirs(job_path, exist_ok=True)
if video_url.startswith('http'):
video_file = os.path.basename(video_url)
video_path = os.path.join(job_path, video_file)
download_file(video_url, video_path)
else:
video_path = video_url
audio_path = os.path.join(job_path, 'audio.mp3')
gen_voice(text, audio_path)
assert os.path.isfile(video_path), f'Video {video_path} not exist.'
assert os.path.isfile(audio_path), f'Audio {audio_path} not exist.'
wav2lip_mp4 = os.path.join(job_path, 'wav2lip.mp4')
print('=' * 40)
call_wav2lip(video_path, audio_path, wav2lip_mp4)
print('=' * 40)
call_gfpgan(wav2lip_mp4)
output_filename = 'output.mp4'
output_mp4 = os.path.join(job_path, output_filename)
merge(job_path, audio_path, output_mp4)
return output_mp4
def gen_voice(text, audio_path):
ogg_path = Path(audio_path).with_suffix('.ogg')
client = Client("https://digitalxingtong-ign-longread-bert-vits2.hf.space/")
if os.path.isabs(text):
path = text
else:
current_folder = os.path.dirname(os.path.abspath(__file__))
path = os.path.join(current_folder, 'input.txt')
with open(path, 'w') as file:
file.write(text)
result = client.predict(
path,
# str (filepath or URL to file) in '在这里上传TXT文件' File component
0.2, # int | float (numeric value between 0 and 1) in '语调变化' Slider component
0.6, # int | float (numeric value between 0.1 and 1.5) in '感情变化' Slider component
0.8, # int | float (numeric value between 0.1 and 1.4) in '音节发音长度变化' Slider component
0.95, # int | float (numeric value between 0.1 and 2) in '语速' Slider component
fn_index=0
)
audio_file = result
print(audio_file)
shutil.move(audio_file, audio_path)
if __name__ == '__main__':
"""
job_id='2'
! python txt2video.py {job_id} 'https://image.tensorartassets.com/cdn-cgi/image/plain=true,w=1280/posts/images/631302687467566953/4873c256-c1bd-49ba-85ca-5821801f122f.jpg' 海狸今天我们出去走路么
from google.colab import files
files.download(f'/content/jobs/{job_id}/output.mp4')
"""
job_id = sys.argv[1]
text = sys.argv[2] # text or .txt file
video_url = sys.argv[3]
if os.path.isfile(text):
# read file as string
with open(text, 'r') as file:
text = file.read()
output_mp4 = main(job_id, text, video_url)