Maximofn commited on
Commit
9af6e91
1 Parent(s): 6e9a4ca

Update separe.py

Browse files
Files changed (1) hide show
  1. separe.py +11 -7
separe.py CHANGED
@@ -60,6 +60,12 @@ def main(args):
60
  input_name = input.split(".")[0]
61
  output_name = output.split(".")[0]
62
 
 
 
 
 
 
 
63
  # Set input files with 8k sample rate and mono
64
  input_8k = f"{input_name}_8k.wav"
65
  input_8k_mono = f"{input_name}_8k_mono.wav"
@@ -67,14 +73,12 @@ def main(args):
67
  # Check if input has 8k sample rate, if not, change it
68
  sr = get_sample_rate(input)
69
  if sr != SAMPLE_RATE:
70
- print("Changing sample rate...")
71
  change_sample_rate(input, input_8k, SAMPLE_RATE)
72
  else:
73
  input_8k = input
74
 
75
  # Check if input is stereo, if yes, set it to mono
76
  if audio_is_stereo(input_8k):
77
- print("Setting mono...")
78
  set_mono(input_8k, input_8k_mono)
79
  else:
80
  input_8k_mono = input_8k
@@ -82,16 +86,16 @@ def main(args):
82
  # Separate audio voices
83
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
84
  separation = pipeline(Tasks.speech_separation, model='damo/speech_mossformer_separation_temporal_8k', device=device)
85
- print("Separating...")
86
  result = separation(input_8k_mono)
87
- print("Separated!")
88
 
89
  # Save separated audio voices
90
- print("Saving...")
91
  for i, signal in enumerate(result['output_pcm_list']):
92
- save_file = f'{output_name}_spk{i}.wav'
93
  sf.write(save_file, np.frombuffer(signal, dtype=np.int16), SAMPLE_RATE)
94
- print("Saved!")
 
 
 
95
 
96
  if __name__ == '__main__':
97
  argparser = argparse.ArgumentParser(description='Separate speech from a stereo audio file')
 
60
  input_name = input.split(".")[0]
61
  output_name = output.split(".")[0]
62
 
63
+ # Get folder of output file
64
+ input_folder = input_name.split("/")[0]
65
+ output_folder = output_name.split("/")[0]
66
+ input_file_name = input_name.split("/")[1]
67
+ output_file_name = output_name.split("/")[1]
68
+
69
  # Set input files with 8k sample rate and mono
70
  input_8k = f"{input_name}_8k.wav"
71
  input_8k_mono = f"{input_name}_8k_mono.wav"
 
73
  # Check if input has 8k sample rate, if not, change it
74
  sr = get_sample_rate(input)
75
  if sr != SAMPLE_RATE:
 
76
  change_sample_rate(input, input_8k, SAMPLE_RATE)
77
  else:
78
  input_8k = input
79
 
80
  # Check if input is stereo, if yes, set it to mono
81
  if audio_is_stereo(input_8k):
 
82
  set_mono(input_8k, input_8k_mono)
83
  else:
84
  input_8k_mono = input_8k
 
86
  # Separate audio voices
87
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
88
  separation = pipeline(Tasks.speech_separation, model='damo/speech_mossformer_separation_temporal_8k', device=device)
 
89
  result = separation(input_8k_mono)
 
90
 
91
  # Save separated audio voices
 
92
  for i, signal in enumerate(result['output_pcm_list']):
93
+ save_file = f'{output_folder}/{output_file_name}_speaker{i:003d}.wav'
94
  sf.write(save_file, np.frombuffer(signal, dtype=np.int16), SAMPLE_RATE)
95
+
96
+ # Remove temporary files
97
+ os.remove(input_8k)
98
+ os.remove(input_8k_mono)
99
 
100
  if __name__ == '__main__':
101
  argparser = argparse.ArgumentParser(description='Separate speech from a stereo audio file')