STUDIO

Running

App Files Files Community

openfree commited on May 28

Commit

7409da1

verified ·

1 Parent(s): 3a4d626

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -8

app.py CHANGED Viewed

@@ -194,7 +194,7 @@ def generate_text_to_image(prompt, width, height, guidance, inference_steps, see
 @spaces.GPU(duration=60)
 @torch.inference_mode()
-def video_to_audio(video_path, prompt, negative_prompt="music", seed=0, num_steps=25, cfg_strength=4.5, duration=8.0):
     """비디오에 사운드를 추가하는 함수"""
     if not MMAUDIO_LOADED:
         logging.error("MMAudio model not loaded")
@@ -205,12 +205,10 @@ def video_to_audio(video_path, prompt, negative_prompt="music", seed=0, num_step
         rng.manual_seed(seed)
         fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=num_steps)
-        # 비디오 로드 - 반환되는 duration을 다른 변수명으로 받기
-        clip_frames, sync_frames, actual_duration = load_video(video_path, duration)
         clip_frames = clip_frames.unsqueeze(0)
         sync_frames = sync_frames.unsqueeze(0)
-        # 실제 비디오 길이로 seq_cfg 업데이트
         mmaudio_seq_cfg.duration = actual_duration
         mmaudio_net.update_seq_lengths(mmaudio_seq_cfg.latent_seq_len, mmaudio_seq_cfg.clip_seq_len, mmaudio_seq_cfg.sync_seq_len)
@@ -225,12 +223,13 @@ def video_to_audio(video_path, prompt, negative_prompt="music", seed=0, num_step
                           cfg_strength=cfg_strength)
         audio = audios.float().cpu()[0]
-        # 비디오와 오디오 결합 - duration_sec 매개변수 제거
         video_save_path = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4').name
         make_video(video_path,
                    video_save_path,
                    audio,
-                   sampling_rate=mmaudio_seq_cfg.sampling_rate)
         return video_save_path
     except Exception as e:
@@ -274,6 +273,7 @@ def generate_video_from_image(image, prompt="", length=4.0, sound_generation="
                     sound_prompt = prompt if prompt else "ambient sound"
                 # 비디오에 사운드 추가 - 모든 매개변수를 명시적으로 전달
                 video_with_sound = video_to_audio(
                     video_path=video_path,
                     prompt=sound_prompt,
@@ -281,8 +281,9 @@ def generate_video_from_image(image, prompt="", length=4.0, sound_generation="
                     seed=random.randint(0, 9999999),
                     num_steps=25,
                     cfg_strength=4.5,
-                    duration=length
                 )
                 return video_with_sound
             return video_path

 @spaces.GPU(duration=60)
 @torch.inference_mode()
+def video_to_audio(video_path, prompt, negative_prompt="music", seed=0, num_steps=25, cfg_strength=4.5, target_duration=8.0):
     """비디오에 사운드를 추가하는 함수"""
     if not MMAUDIO_LOADED:
         logging.error("MMAudio model not loaded")
         rng.manual_seed(seed)
         fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=num_steps)
+        # 비디오 로드 - target_duration 사용
+        clip_frames, sync_frames, actual_duration = load_video(video_path, target_duration)
         clip_frames = clip_frames.unsqueeze(0)
         sync_frames = sync_frames.unsqueeze(0)
         mmaudio_seq_cfg.duration = actual_duration
         mmaudio_net.update_seq_lengths(mmaudio_seq_cfg.latent_seq_len, mmaudio_seq_cfg.clip_seq_len, mmaudio_seq_cfg.sync_seq_len)
                           cfg_strength=cfg_strength)
         audio = audios.float().cpu()[0]
+        # 비디오와 오디오 결합
         video_save_path = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4').name
         make_video(video_path,
                    video_save_path,
                    audio,
+                   sampling_rate=mmaudio_seq_cfg.sampling_rate,
+                   duration_sec=mmaudio_seq_cfg.duration)
         return video_save_path
     except Exception as e:
                     sound_prompt = prompt if prompt else "ambient sound"
                 # 비디오에 사운드 추가 - 모든 매개변수를 명시적으로 전달
+# 비디오에 사운드 추가
                 video_with_sound = video_to_audio(
                     video_path=video_path,
                     prompt=sound_prompt,
                     seed=random.randint(0, 9999999),
                     num_steps=25,
                     cfg_strength=4.5,
+                    target_duration=length  # duration → target_duration
                 )
                 return video_with_sound
             return video_path