roychao19477 commited on
Commit
d450f41
·
1 Parent(s): de425e9

Test on lengths

Browse files
Files changed (1) hide show
  1. app.py +7 -7
app.py CHANGED
@@ -63,8 +63,6 @@ from moviepy import ImageSequenceClip
63
  from scipy.io import wavfile
64
  from avse_code import run_avse
65
 
66
- # Load face detector
67
- model = YOLO("yolov8n-face.pt").cuda() # assumes CUDA available
68
 
69
 
70
  from decord import VideoReader, cpu
@@ -75,18 +73,18 @@ import spaces
75
  # Load model once globally
76
  #ckpt_path = "ckpts/ep215_0906.oat.ckpt"
77
  #model = AVSEModule.load_from_checkpoint(ckpt_path)
78
- avse_model = AVSEModule()
79
  #avse_state_dict = torch.load("ckpts/ep215_0906.oat.ckpt")
80
- avse_state_dict = torch.load("ckpts/ep220_0908.oat.ckpt")
81
- avse_model.load_state_dict(avse_state_dict, strict=True)
82
- avse_model.to("cuda")
83
- avse_model.eval()
84
 
85
  CHUNK_SIZE_AUDIO = 48000 # 3 sec at 16kHz
86
  CHUNK_SIZE_VIDEO = 75 # 25fps × 3 sec
87
 
88
  @spaces.GPU
89
  def run_avse_inference(video_path, audio_path):
 
 
 
 
 
90
  estimated = run_avse(video_path, audio_path)
91
  # Load audio
92
  #noisy, _ = sf.read(audio_path, dtype='float32') # (N, )
@@ -164,6 +162,8 @@ def extract_resampled_audio(video_path, target_sr=16000):
164
 
165
  @spaces.GPU
166
  def yolo_detection(frame, verbose=False):
 
 
167
  return model(frame, verbose=verbose)[0]
168
 
169
  @spaces.GPU
 
63
  from scipy.io import wavfile
64
  from avse_code import run_avse
65
 
 
 
66
 
67
 
68
  from decord import VideoReader, cpu
 
73
  # Load model once globally
74
  #ckpt_path = "ckpts/ep215_0906.oat.ckpt"
75
  #model = AVSEModule.load_from_checkpoint(ckpt_path)
 
76
  #avse_state_dict = torch.load("ckpts/ep215_0906.oat.ckpt")
 
 
 
 
77
 
78
  CHUNK_SIZE_AUDIO = 48000 # 3 sec at 16kHz
79
  CHUNK_SIZE_VIDEO = 75 # 25fps × 3 sec
80
 
81
  @spaces.GPU
82
  def run_avse_inference(video_path, audio_path):
83
+ avse_model = AVSEModule()
84
+ avse_state_dict = torch.load("ckpts/ep220_0908.oat.ckpt")
85
+ avse_model.load_state_dict(avse_state_dict, strict=True)
86
+ avse_model.to("cuda")
87
+ avse_model.eval()
88
  estimated = run_avse(video_path, audio_path)
89
  # Load audio
90
  #noisy, _ = sf.read(audio_path, dtype='float32') # (N, )
 
162
 
163
  @spaces.GPU
164
  def yolo_detection(frame, verbose=False):
165
+ # Load face detector
166
+ model = YOLO("yolov8n-face.pt").cuda() # assumes CUDA available
167
  return model(frame, verbose=verbose)[0]
168
 
169
  @spaces.GPU