Spaces:

VIDraft
/

Portrait-Animation

Running on Zero

App Files Files Community

openfree commited on 17 days ago

Commit

729c163

verified ·

1 Parent(s): 406d112

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -16

app.py CHANGED Viewed

@@ -27,15 +27,17 @@ def get_md5(content):
 def get_video_res(img_path, audio_path, res_video_path, dynamic_scale=1.0):
     expand_ratio = 0.5
     min_resolution = 512
-    inference_steps = 25
-    # Get audio duration (for logging)
     audio = AudioSegment.from_file(audio_path)
-    duration = len(audio) / 1000.0  # Convert ms to seconds
     face_info = pipe.preprocess(img_path, expand_ratio=expand_ratio)
     print(f"Face detection info: {face_info}")
-    print(f"Audio duration: {duration} seconds")
     if face_info['face_num'] > 0:
         crop_image_path = img_path + '.crop.png'
@@ -43,7 +45,7 @@ def get_video_res(img_path, audio_path, res_video_path, dynamic_scale=1.0):
         img_path = crop_image_path
         os.makedirs(os.path.dirname(res_video_path), exist_ok=True)
-        # Process the video (duration parameter removed)
         pipe.process(
             img_path,
             audio_path,
@@ -52,7 +54,7 @@ def get_video_res(img_path, audio_path, res_video_path, dynamic_scale=1.0):
             inference_steps=inference_steps,
             dynamic_scale=dynamic_scale
         )
-        # ★ 수정: 생성된 비디오 파일 경로를 반환하도록 함.
         return res_video_path
     else:
         return -1
@@ -63,7 +65,7 @@ os.makedirs(tmp_path, exist_ok=True)
 os.makedirs(res_path, exist_ok=True)
 def process_sonic(image, audio, dynamic_scale):
-    # Input validation
     if image is None:
         raise gr.Error("Please upload an image")
     if audio is None:
@@ -77,7 +79,7 @@ def process_sonic(image, audio, dynamic_scale):
     if len(arr.shape) == 1:
         arr = arr[:, None]
-    # Create an audio segment from numpy array
     audio_segment = AudioSegment(
         arr.tobytes(),
         frame_rate=sampling_rate,
@@ -86,18 +88,18 @@ def process_sonic(image, audio, dynamic_scale):
     )
     audio_segment = audio_segment.set_frame_rate(sampling_rate)
-    # Generate file paths
     image_path = os.path.abspath(os.path.join(tmp_path, f'{img_md5}.png'))
     audio_path = os.path.abspath(os.path.join(tmp_path, f'{audio_md5}.wav'))
     res_video_path = os.path.abspath(os.path.join(res_path, f'{img_md5}_{audio_md5}_{dynamic_scale}.mp4'))
-    # Save input files if they don't exist
     if not os.path.exists(image_path):
         image.save(image_path)
     if not os.path.exists(audio_path):
         audio_segment.export(audio_path, format="wav")
-    # If cached video exists, return it; otherwise, generate a new one
     if os.path.exists(res_video_path):
         print(f"Using cached result: {res_video_path}")
         return res_video_path
@@ -105,7 +107,7 @@ def process_sonic(image, audio, dynamic_scale):
         print(f"Generating new video with dynamic scale: {dynamic_scale}")
         return get_video_res(image_path, audio_path, res_video_path, dynamic_scale)
-# Dummy get_example function to prevent errors in examples section
 def get_example():
     return []
@@ -173,7 +175,6 @@ with gr.Blocks(css=css) as demo:
                 elem_id="video_output"
             )
-    # Process button click: when clicked, process_sonic() is called and its return value is sent to video_output.
     process_btn.click(
         fn=process_sonic,
         inputs=[image_input, audio_input, dynamic_scale],
@@ -181,7 +182,6 @@ with gr.Blocks(css=css) as demo:
         api_name="animate"
     )
-    # Examples section
     gr.Examples(
         examples=get_example(),
         fn=process_sonic,
@@ -190,7 +190,6 @@ with gr.Blocks(css=css) as demo:
         cache_examples=False
     )
-    # Footer with attribution and links
     gr.HTML("""
         <div style="text-align: center; margin-top: 2em;">
             <div style="margin-bottom: 1em;">
@@ -205,5 +204,5 @@ with gr.Blocks(css=css) as demo:
         </div>
     """)
-# To create a public link, share=True is set.
 demo.launch(share=True)

 def get_video_res(img_path, audio_path, res_video_path, dynamic_scale=1.0):
     expand_ratio = 0.5
     min_resolution = 512
+    fps = 25  # 원하는 프레임 레이트 설정 (예: 25 fps)
+    # 오디오 파일로부터 실제 오디오 길이를 구하고, 그에 맞춰 추론 단계를 계산합니다.
     audio = AudioSegment.from_file(audio_path)
+    duration = len(audio) / 1000.0  # 초 단위
+    # 오디오 길이에 따른 프레임 수 계산 (예: 5초 -> 5*25 = 125 단계)
+    inference_steps = int(duration * fps)
+    print(f"Audio duration: {duration} seconds, using inference_steps: {inference_steps}")
     face_info = pipe.preprocess(img_path, expand_ratio=expand_ratio)
     print(f"Face detection info: {face_info}")
     if face_info['face_num'] > 0:
         crop_image_path = img_path + '.crop.png'
         img_path = crop_image_path
         os.makedirs(os.path.dirname(res_video_path), exist_ok=True)
+        # Sonic.process() 호출 시, 동적으로 계산된 inference_steps를 전달합니다.
         pipe.process(
             img_path,
             audio_path,
             inference_steps=inference_steps,
             dynamic_scale=dynamic_scale
         )
+        # 생성된 비디오 파일 경로 반환
         return res_video_path
     else:
         return -1
 os.makedirs(res_path, exist_ok=True)
 def process_sonic(image, audio, dynamic_scale):
+    # 입력 검증
     if image is None:
         raise gr.Error("Please upload an image")
     if audio is None:
     if len(arr.shape) == 1:
         arr = arr[:, None]
+    # numpy array로부터 AudioSegment 생성
     audio_segment = AudioSegment(
         arr.tobytes(),
         frame_rate=sampling_rate,
     )
     audio_segment = audio_segment.set_frame_rate(sampling_rate)
+    # 파일 경로 생성
     image_path = os.path.abspath(os.path.join(tmp_path, f'{img_md5}.png'))
     audio_path = os.path.abspath(os.path.join(tmp_path, f'{audio_md5}.wav'))
     res_video_path = os.path.abspath(os.path.join(res_path, f'{img_md5}_{audio_md5}_{dynamic_scale}.mp4'))
+    # 입력 파일이 없으면 저장
     if not os.path.exists(image_path):
         image.save(image_path)
     if not os.path.exists(audio_path):
         audio_segment.export(audio_path, format="wav")
+    # 캐시된 결과가 있으면 반환, 없으면 새로 생성
     if os.path.exists(res_video_path):
         print(f"Using cached result: {res_video_path}")
         return res_video_path
         print(f"Generating new video with dynamic scale: {dynamic_scale}")
         return get_video_res(image_path, audio_path, res_video_path, dynamic_scale)
+# 예시 데이터를 위한 dummy 함수 (필요시 실제 예시 데이터로 수정)
 def get_example():
     return []
                 elem_id="video_output"
             )
     process_btn.click(
         fn=process_sonic,
         inputs=[image_input, audio_input, dynamic_scale],
         api_name="animate"
     )
     gr.Examples(
         examples=get_example(),
         fn=process_sonic,
         cache_examples=False
     )
     gr.HTML("""
         <div style="text-align: center; margin-top: 2em;">
             <div style="margin-bottom: 1em;">
         </div>
     """)
+# 공개 링크를 생성하려면 share=True 옵션 사용
 demo.launch(share=True)