Spaces:
Running
Running
File size: 4,915 Bytes
4d909dd d35ea54 a8c9931 6abe9d1 74076e9 d35ea54 4d909dd 74076e9 f9a70bb 74076e9 f9a70bb 74076e9 2b8678f 9cbe47c 74076e9 069100e 2b8678f 7b6b779 2b8678f 7b6b779 78953ab 2b8678f d35ea54 74076e9 2b8678f d35ea54 2b8678f 74076e9 d35ea54 74076e9 2b8678f d35ea54 74076e9 2b8678f d35ea54 2b8678f a8c9931 2b8678f e17cc19 2b8678f a8c9931 2b8678f ae516d2 2b8678f 74076e9 2b8678f d35ea54 2b8678f a8c9931 41d2d4a a8c9931 2b8678f a8c9931 2b8678f a8c9931 2b8678f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 |
import streamlit as st
import os
import sys
import torch
import pickle
import numpy
import librosa
import subprocess
from avatar import Avatar
def run_pickleface():
try:
result = subprocess.run(
['python', 'pickleface.py'],
check=True,
capture_output=True,
text=True
)
print(result.stdout)
if result.returncode != 0:
st.error(f"Error creating face detection results: {result.stderr}")
return False
return True
except subprocess.CalledProcessError as e:
st.error(f"Critical error running pickleface.py: {e.stderr}")
return False
def initialize_face_detection_results():
# Kiểm tra xem tất cả file pkl đã tồn tại chưa
missing_files = [opt for opt in options if not os.path.exists(f'ref_videos/{opt}_face_det_result.pkl')]
if missing_files:
current_status_placeholder.write("Creating face detection results...")
if not run_pickleface():
st.error("Failed to create face detection results")
st.stop()
current_status_placeholder.write("Face detection results created successfully!")
# Cấu hình ban đầu
options = ['Aude', 'Kyla', 'Liv', 'MC6']
images = ['ref_videos/Aude.png', 'ref_videos/Kyla.png', 'ref_videos/Liv.png', 'ref_videos/MC6.png']
# Thêm đường dẫn đến thư mục Wav2Lip
wav2lip_path = os.path.join(os.path.dirname(__file__), "Wav2Lip")
if wav2lip_path not in sys.path:
sys.path.insert(0, wav2lip_path)
# Giao diện
big_text = """
<div style='text-align: center;'>
<h1 style='font-size: 30x;'>Text to Speech Synchronized Video</h1>
</div>
"""
st.markdown(big_text, unsafe_allow_html=True)
current_status_placeholder = st.empty()
init_progress_bar = st.progress(0)
# Khởi tạo session state
if 'is_initialized' not in st.session_state:
initialize_face_detection_results()
# Khởi tạo Avatar
st.session_state.avatar = Avatar()
st.session_state.avatar.export_video = False
# Load model
current_status_placeholder.write("Loading model...")
st.session_state.avatar.load_model("checkpoint/wav2lip_gan.pth")
current_status_placeholder.write("Model loaded successfully")
# Cấu hình thiết bị
st.session_state.avatar.device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {st.session_state.avatar.device}")
# Cấu hình đường dẫn
st.session_state.avatar.output_audio_path = "audio/"
st.session_state.avatar.output_audio_filename = "result.wav"
st.session_state.avatar.temp_lip_video_no_voice_path = "temp/"
st.session_state.avatar.temp_lip_video_no_voice_filename = "result.avi"
st.session_state.avatar.output_video_path = "results/"
st.session_state.avatar.output_video_name = "result_voice.mp4"
# Khởi tạo video mặc định
st.session_state.selected_option = "Liv"
st.session_state.avatar.ref_video_path_and_filename = f"ref_videos/{st.session_state.selected_option}.mp4"
# Xử lý video và face detection
st.session_state.avatar.get_video_full_frames(st.session_state.avatar.ref_video_path_and_filename)
st.session_state.avatar.face_detect_batch_size = 16
# Load face detection results cho tất cả options
st.session_state.face_det_results_dict = {}
for option in options:
with open(f'ref_videos/{option}_face_det_result.pkl', 'rb') as file:
st.session_state.face_det_results_dict[option] = pickle.load(file)
st.session_state.avatar.face_detect_img_results = st.session_state.face_det_results_dict[st.session_state.selected_option]
st.session_state.avatar.face_det_results_path_and_name = f'ref_videos/{st.session_state.selected_option}_face_det_result.pkl'
# Xử lý text to speech
input_text = "Hi How are you?"
st.session_state.avatar.text_to_lip_video(input_text, init_progress_bar)
current_status_placeholder.write("Face detection results loaded")
st.session_state['is_initialized'] = True
# Giao diện lựa chọn video
selected_option = st.radio("Choose an option:", options, index=options.index(st.session_state.selected_option))
img_col1, img_col2 = st.columns([1,1])
with img_col1:
st.image(images[options.index(selected_option)])
# Xử lý khi thay đổi lựa chọn video
if st.session_state.selected_option != selected_option:
print("The selected option has changed!")
st.session_state.selected_option = selected_option
st.session_state.avatar.ref_video_path_and_filename = f"ref_videos/{st.session_state.selected_option}.mp4"
st.session_state.avatar.get_video_full_frames(st.session_state.avatar.ref_video_path_and_filename)
st.session_state.avatar.face_detect_img_results = st.session_state.face_det_results_dict[st.session_state.selected_option] |