agorlanov commited on
Commit
ea4b219
1 Parent(s): 4c18976
app.py CHANGED
@@ -3,7 +3,7 @@ import gradio as gr
3
  from main_pipeline import main_pipeline
4
  from scipy.io.wavfile import write
5
 
6
- title = "audio_denoise and speakser diarization"
7
 
8
 
9
  def app_pipeline(audio):
@@ -14,9 +14,15 @@ def app_pipeline(audio):
14
  return result_diarization + [None] * (10 - len(result_diarization))
15
 
16
 
 
 
 
17
  gr.Interface(
18
  app_pipeline,
19
  gr.Audio(type="numpy", label="Input"),
20
  [gr.Audio(visible=True) for i in range(10)],
21
  title=title,
 
 
 
22
  ).launch(enable_queue=True)
 
3
  from main_pipeline import main_pipeline
4
  from scipy.io.wavfile import write
5
 
6
+ title = "audio_denoise and speakser diarization. Fast inference https://t.me/diarizarion_bot"
7
 
8
 
9
  def app_pipeline(audio):
 
14
  return result_diarization + [None] * (10 - len(result_diarization))
15
 
16
 
17
+ example_list = [
18
+ ["dialog.mp3"]
19
+ ]
20
  gr.Interface(
21
  app_pipeline,
22
  gr.Audio(type="numpy", label="Input"),
23
  [gr.Audio(visible=True) for i in range(10)],
24
  title=title,
25
+ examples=example_list,
26
+ cache_examples=False
27
+
28
  ).launch(enable_queue=True)
main_pipeline.py CHANGED
@@ -21,7 +21,6 @@ def save_speaker_audios(segments, denoised_audio_path, out_folder='out', out_f=4
21
  for _, r in temp_df.iterrows():
22
  start = int(r["start"] * out_f)
23
  end = int(r["end"] * out_f)
24
- # output_signal[start:end] = signal[start:end]
25
  output_signal.append(signal[start:end])
26
 
27
  out_wav_path = f'{out_folder}/{label}.wav'
@@ -42,4 +41,4 @@ def main_pipeline(audio_path):
42
 
43
 
44
  if __name__ == '__main__':
45
- main_pipeline('out.wav')
 
21
  for _, r in temp_df.iterrows():
22
  start = int(r["start"] * out_f)
23
  end = int(r["end"] * out_f)
 
24
  output_signal.append(signal[start:end])
25
 
26
  out_wav_path = f'{out_folder}/{label}.wav'
 
41
 
42
 
43
  if __name__ == '__main__':
44
+ main_pipeline('dialog.mp3')
utils/denoise_pipeline.py CHANGED
@@ -10,9 +10,12 @@ from demucs.pretrained import get_model
10
  demucs_model = get_model('htdemucs')
11
 
12
 
13
- def denoise(filename, device):
 
14
  wav_ref, sr = librosa.load(filename, mono=False, sr=44100)
 
15
  wav = torch.tensor(wav_ref)
 
16
  ref = wav.mean(0)
17
  wav = (wav - ref.mean()) / wav.std()
18
  sources = apply_model(
@@ -32,4 +35,4 @@ def denoise(filename, device):
32
 
33
  if __name__ == '__main__':
34
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
35
- denoise(filename='../out.wav', device=device)
 
10
  demucs_model = get_model('htdemucs')
11
 
12
 
13
+ def denoise(filename: str, device: str) -> str:
14
+
15
  wav_ref, sr = librosa.load(filename, mono=False, sr=44100)
16
+
17
  wav = torch.tensor(wav_ref)
18
+ wav = torch.cat([wav.unsqueeze(0), wav.unsqueeze(0)]) if len(wav.shape) == 1 else wav
19
  ref = wav.mean(0)
20
  wav = (wav - ref.mean()) / wav.std()
21
  sources = apply_model(
 
35
 
36
  if __name__ == '__main__':
37
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
38
+ denoise(filename='../oxx.wav', device=device)
utils/diarization_pipeline.py CHANGED
@@ -1,9 +1,5 @@
1
  from simple_diarizer.diarizer import Diarizer
2
 
3
- import librosa
4
-
5
- import soundfile as sf
6
-
7
 
8
  class DiarizationPipeline:
9
  def __init__(self, ):
@@ -16,10 +12,7 @@ class DiarizationPipeline:
16
  )
17
 
18
  def __call__(self, wav_file):
19
- y_16k, sr = librosa.load(wav_file, sr=16000)
20
- sf.write("converted.wav", y_16k, 16000, 'PCM_24')
21
-
22
- segments = self.diar.diarize("converted.wav",
23
  num_speakers=None,
24
  threshold=9e-1, )
25
 
 
1
  from simple_diarizer.diarizer import Diarizer
2
 
 
 
 
 
3
 
4
  class DiarizationPipeline:
5
  def __init__(self, ):
 
12
  )
13
 
14
  def __call__(self, wav_file):
15
+ segments = self.diar.diarize(wav_file,
 
 
 
16
  num_speakers=None,
17
  threshold=9e-1, )
18