music_mixing_style_transfer

Running

App Files Files Community

jhtonyKoo commited on Oct 23, 2023

Commit

66e10e8

•

1 Parent(s): f528768

Update inference/style_transfer.py

Browse files

Files changed (1) hide show

inference/style_transfer.py +10 -17

inference/style_transfer.py CHANGED Viewed

@@ -26,7 +26,7 @@ from data_loader import *
 class Mixing_Style_Transfer_Inference:
     def __init__(self, args, trained_w_ddp=True):
-        if args.inference_device!='cpu' and torch.cuda.is_available():
             self.device = torch.device("cuda:0")
         else:
             self.device = torch.device("cpu")
@@ -86,7 +86,7 @@ class Mixing_Style_Transfer_Inference:
                     if os.path.exists(os.path.join(cur_sep_output_dir, self.args.separation_model, cur_file_name, 'drums.wav')):
                         print(f'\talready separated current file : {cur_sep_file_path}')
                     else:
-                        cur_cmd_line = f"demucs {cur_sep_file_path} -n {self.args.separation_model} -d {self.args.separation_device} -o {cur_sep_output_dir}"
                         os.system(cur_cmd_line)
@@ -109,7 +109,7 @@ class Mixing_Style_Transfer_Inference:
     # Inference whole song
-    def inference(self, ):
         print("\n======= Start to inference music mixing style transfer =======")
         # normalized input
         output_name_tag = 'output' if self.args.normalize_input else 'output_notnormed'
@@ -267,7 +267,10 @@ class Mixing_Style_Transfer_Inference:
                     sf.write(os.path.join(cur_out_dir, f"{cur_inst_name}_{output_name_tag}.wav"), fin_data_out_inst.transpose(-1, -2), self.args.sample_rate, 'PCM_16')
             # remix
             fin_data_out_mix = sum(inst_outputs)
-            sf.write(os.path.join(cur_out_dir, f"mixture_{output_name_tag}.wav"), fin_data_out_mix.transpose(-1, -2), self.args.sample_rate, 'PCM_16')
     # function that segmentize an entire song into batch
@@ -322,7 +325,7 @@ class Mixing_Style_Transfer_Inference:
-if __name__ == '__main__':
     os.environ['MASTER_ADDR'] = '127.0.0.1'
     os.environ["CUDA_VISIBLE_DEVICES"] = '0'
     os.environ['MASTER_PORT'] = '8888'
@@ -366,7 +369,7 @@ if __name__ == '__main__':
     inference_args.add_argument('--stem_level_directory_name', type=str, default='separated')
     inference_args.add_argument('--save_each_inst', type=str2bool, default=False)
     inference_args.add_argument('--do_not_separate', type=str2bool, default=False)
-    inference_args.add_argument('--separation_model', type=str, default='mdx_extra')
     # FX normalization
     inference_args.add_argument('--normalize_input', type=str2bool, default=True)
     inference_args.add_argument('--normalization_order', type=str2bool, default=['loudness', 'eq', 'compression', 'imager', 'loudness']) # Effects to be normalized, order matters
@@ -376,9 +379,7 @@ if __name__ == '__main__':
     device_args = parser.add_argument_group('Device args')
     device_args.add_argument('--workers', type=int, default=1)
-    device_args.add_argument('--inference_device', type=str, default='gpu', help="if this option is not set to 'cpu', inference will happen on gpu only if there is a detected one")
     device_args.add_argument('--batch_size', type=int, default=1)   # for processing long audio
-    device_args.add_argument('--separation_device', type=str, default='cpu', help="device for performing source separation using Demucs")
     args = parser.parse_args()
@@ -388,13 +389,5 @@ if __name__ == '__main__':
     args.cfg_encoder = configs['Effects_Encoder']['default']
     args.cfg_converter = configs['TCN']['default']
-    # Perform music mixing style transfer
-    inference_style_transfer = Mixing_Style_Transfer_Inference(args)
-    if args.interpolation:
-        inference_style_transfer.inference_interpolation()
-    else:
-        inference_style_transfer.inference()

 class Mixing_Style_Transfer_Inference:
     def __init__(self, args, trained_w_ddp=True):
+        if torch.cuda.is_available():
             self.device = torch.device("cuda:0")
         else:
             self.device = torch.device("cpu")
                     if os.path.exists(os.path.join(cur_sep_output_dir, self.args.separation_model, cur_file_name, 'drums.wav')):
                         print(f'\talready separated current file : {cur_sep_file_path}')
                     else:
+                        cur_cmd_line = f"demucs {cur_sep_file_path} -n {self.args.separation_model} -d {self.device} -o {cur_sep_output_dir}"
                         os.system(cur_cmd_line)
     # Inference whole song
+    def inference(self, input_track_path, reference_track_path):
         print("\n======= Start to inference music mixing style transfer =======")
         # normalized input
         output_name_tag = 'output' if self.args.normalize_input else 'output_notnormed'
                     sf.write(os.path.join(cur_out_dir, f"{cur_inst_name}_{output_name_tag}.wav"), fin_data_out_inst.transpose(-1, -2), self.args.sample_rate, 'PCM_16')
             # remix
             fin_data_out_mix = sum(inst_outputs)
+            fin_output_path = os.path.join(cur_out_dir, f"mixture_{output_name_tag}.wav"
+            sf.write(fin_output_path), fin_data_out_mix.transpose(-1, -2), self.args.sample_rate, 'PCM_16')
+            return fin_output_path
     # function that segmentize an entire song into batch
+def set_up()
     os.environ['MASTER_ADDR'] = '127.0.0.1'
     os.environ["CUDA_VISIBLE_DEVICES"] = '0'
     os.environ['MASTER_PORT'] = '8888'
     inference_args.add_argument('--stem_level_directory_name', type=str, default='separated')
     inference_args.add_argument('--save_each_inst', type=str2bool, default=False)
     inference_args.add_argument('--do_not_separate', type=str2bool, default=False)
+    inference_args.add_argument('--separation_model', type=str, default='htdemucs')
     # FX normalization
     inference_args.add_argument('--normalize_input', type=str2bool, default=True)
     inference_args.add_argument('--normalization_order', type=str2bool, default=['loudness', 'eq', 'compression', 'imager', 'loudness']) # Effects to be normalized, order matters
     device_args = parser.add_argument_group('Device args')
     device_args.add_argument('--workers', type=int, default=1)
     device_args.add_argument('--batch_size', type=int, default=1)   # for processing long audio
     args = parser.parse_args()
     args.cfg_encoder = configs['Effects_Encoder']['default']
     args.cfg_converter = configs['TCN']['default']
+    return args