jhj0517 commited on
Commit
ec1ebf1
2 Parent(s): 0c8c544 fda7ba6

Merge pull request #270 from jhj0517/fix/load_audio

Browse files
modules/utils/files_manager.py CHANGED
@@ -61,3 +61,8 @@ def format_gradio_files(files: list):
61
  gradio_files.append(NamedString(file))
62
  return gradio_files
63
 
 
 
 
 
 
 
61
  gradio_files.append(NamedString(file))
62
  return gradio_files
63
 
64
+
65
+ def is_video(file_path):
66
+ video_extensions = ['.mp4', '.mkv', '.avi', '.mov', '.flv', '.wmv', '.webm', '.m4v', '.mpeg', '.mpg', '.3gp']
67
+ extension = os.path.splitext(file_path)[1].lower()
68
+ return extension in video_extensions
modules/uvr/music_separator.py CHANGED
@@ -9,6 +9,8 @@ import gradio as gr
9
  from datetime import datetime
10
 
11
  from uvr.models import MDX, Demucs, VrNetwork, MDXC
 
 
12
 
13
 
14
  class MusicSeparator:
@@ -77,14 +79,18 @@ class MusicSeparator:
77
  tuple[np.ndarray, np.ndarray]: Instrumental and vocals numpy arrays.
78
  """
79
  if isinstance(audio, str):
80
- self.audio_info = torchaudio.info(audio)
81
- sample_rate = self.audio_info.sample_rate
82
- output_filename, ext = os.path.splitext(audio)
83
  output_filename, ext = os.path.basename(audio), ".wav"
 
 
 
 
 
 
 
84
  else:
85
- sample_rate = 16000
86
  timestamp = datetime.now().strftime("%m%d%H%M%S")
87
  output_filename, ext = f"UVR-{timestamp}", ".wav"
 
88
 
89
  model_config = {
90
  "segment": segment_size,
@@ -94,7 +100,7 @@ class MusicSeparator:
94
  if (self.model is None or
95
  self.current_model_size != model_name or
96
  self.model_config != model_config or
97
- self.audio_info.sample_rate != sample_rate or
98
  self.device != device):
99
  progress(0, desc="Initializing UVR Model..")
100
  self.update_model(
 
9
  from datetime import datetime
10
 
11
  from uvr.models import MDX, Demucs, VrNetwork, MDXC
12
+ from modules.utils.files_manager import is_video
13
+ from modules.diarize.audio_loader import load_audio
14
 
15
 
16
  class MusicSeparator:
 
79
  tuple[np.ndarray, np.ndarray]: Instrumental and vocals numpy arrays.
80
  """
81
  if isinstance(audio, str):
 
 
 
82
  output_filename, ext = os.path.basename(audio), ".wav"
83
+
84
+ if is_video(audio):
85
+ audio = load_audio(audio)
86
+ sample_rate = 16000
87
+ else:
88
+ self.audio_info = torchaudio.info(audio)
89
+ sample_rate = self.audio_info.sample_rate
90
  else:
 
91
  timestamp = datetime.now().strftime("%m%d%H%M%S")
92
  output_filename, ext = f"UVR-{timestamp}", ".wav"
93
+ sample_rate = 16000
94
 
95
  model_config = {
96
  "segment": segment_size,
 
100
  if (self.model is None or
101
  self.current_model_size != model_name or
102
  self.model_config != model_config or
103
+ self.model.sample_rate != sample_rate or
104
  self.device != device):
105
  progress(0, desc="Initializing UVR Model..")
106
  self.update_model(