Spaces:

Maximofn
/

subtify

Sleeping

App Files Files Community

Maximofn commited on Oct 19, 2023

Commit

6e9a4ca

•

1 Parent(s): 5df18dc

Create script to separate voices into input audio file

Browse files

Files changed (1) hide show

separe.py +101 -0

separe.py ADDED Viewed

	@@ -0,0 +1,101 @@

+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+import soundfile as sf
+import numpy as np
+import os
+import torch
+import argparse
+SAMPLE_RATE = 8000
+def get_sample_rate(audio_file_path):
+    """
+    Get the sample rate of an audio file
+    Args:
+        audio_file_path (str): Path to the audio file
+    Returns:
+        int: Sample rate of the audio file
+    """
+    _, sample_rate = sf.read(audio_file_path, always_2d=True)
+    return sample_rate
+def change_sample_rate(input_audio_file_path, output_audio_file_path, sample_rate):
+    """
+    Change the sample rate of an audio file
+    Args:
+        input_audio_file_path (str): Path to the input audio file
+        output_audio_file_path (str): Path to the output audio file
+        sample_rate (int): Sample rate to change to
+    """
+    os.system(f'ffmpeg -i {input_audio_file_path} -ar {sample_rate} {output_audio_file_path}')
+def audio_is_stereo(audio_file_path):
+    """
+    Check if an audio file is stereo
+    Args:
+        audio_file_path (str): Path to the audio file
+    Returns:
+        bool: True if the audio file is stereo, False otherwise
+    """
+    audio, _ = sf.read(audio_file_path, always_2d=True)
+    return audio.shape[1] == 2
+def set_mono(input_audio_file_path, output_audio_file_path):
+    """
+    Set an audio file to mono
+    Args:
+        input_audio_file_path (str): Path to the input audio file
+        output_audio_file_path (str): Path to the output audio file
+    """
+    os.system(f'ffmpeg -i {input_audio_file_path} -ac 1 {output_audio_file_path}')
+def main(args):
+    # Get input and output files
+    input = args.input
+    output = args.output
+    # Get input and output names
+    input_name = input.split(".")[0]
+    output_name = output.split(".")[0]
+    # Set input files with 8k sample rate and mono
+    input_8k = f"{input_name}_8k.wav"
+    input_8k_mono = f"{input_name}_8k_mono.wav"
+    # Check if input has 8k sample rate, if not, change it
+    sr = get_sample_rate(input)
+    if sr != SAMPLE_RATE:
+        print("Changing sample rate...")
+        change_sample_rate(input, input_8k, SAMPLE_RATE)
+    else:
+        input_8k = input
+    # Check if input is stereo, if yes, set it to mono
+    if audio_is_stereo(input_8k):
+        print("Setting mono...")
+        set_mono(input_8k, input_8k_mono)
+    else:
+        input_8k_mono = input_8k
+    # Separate audio voices
+    device = 'cuda' if torch.cuda.is_available() else 'cpu'
+    separation = pipeline(Tasks.speech_separation, model='damo/speech_mossformer_separation_temporal_8k', device=device)
+    print("Separating...")
+    result = separation(input_8k_mono)
+    print("Separated!")
+    # Save separated audio voices
+    print("Saving...")
+    for i, signal in enumerate(result['output_pcm_list']):
+        save_file = f'{output_name}_spk{i}.wav'
+        sf.write(save_file, np.frombuffer(signal, dtype=np.int16), SAMPLE_RATE)
+    print("Saved!")
+if __name__ == '__main__':
+    argparser = argparse.ArgumentParser(description='Separate speech from a stereo audio file')
+    argparser.add_argument('input', type=str, help='Input audio file')
+    argparser.add_argument('output', type=str, help='Output directory')
+    args = argparser.parse_args()
+    main(args)