deepsync commited on
Commit
a639500
1 Parent(s): 344aab3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -4
app.py CHANGED
@@ -5,7 +5,7 @@ from pydub.silence import detect_nonsilent
5
  from pydub import AudioSegment
6
 
7
 
8
- def get_labels(audio_fp, min_speech_duration_ms, min_silence_duration_ms, auto_merge, uppper_merge_threshold, max_segment_length):
9
  audio = AudioSegment.from_file(audio_fp).set_channels(1)
10
  speech_timestamps = detect_nonsilent(audio, min_silence_len=min_silence_duration_ms, silence_thresh=-40)
11
  speech_timestamps = list(filter(lambda x: x[1]-x[0] > min_speech_duration_ms, speech_timestamps))
@@ -44,6 +44,16 @@ def get_labels(audio_fp, min_speech_duration_ms, min_silence_duration_ms, auto_m
44
  else:
45
  new_labels.append(list(labels[i]))
46
 
 
 
 
 
 
 
 
 
 
 
47
  translate_labels = list(map(lambda x: f"{x[0]}\t{x[1]}\t{x[2]}", new_labels))
48
 
49
  filename_path = f"{fn}_translate_label.txt"
@@ -57,11 +67,13 @@ interface = gr.Interface(
57
  get_labels,
58
  [
59
  gr.Audio(type="filepath", label="Audio file"),
60
- gr.Number(label="min_speech_duration_ms", value=80, info="default (80)"),
61
- gr.Number(label="min_silence_duration_ms", value=40, info="default (100)"),
62
  gr.Checkbox(label="Auto merge", value=True),
63
  gr.Textbox(label="Gap max threshold value (seconds)", value=0.350),
64
- gr.Number(label="Approx Max Segment Length", value=7)
 
 
65
  ],
66
  [
67
  gr.File(label="VAD Labels"),
 
5
  from pydub import AudioSegment
6
 
7
 
8
+ def get_labels(audio_fp, min_speech_duration_ms, min_silence_duration_ms, auto_merge, uppper_merge_threshold, max_segment_length, end_extension, extend_small_segments):
9
  audio = AudioSegment.from_file(audio_fp).set_channels(1)
10
  speech_timestamps = detect_nonsilent(audio, min_silence_len=min_silence_duration_ms, silence_thresh=-40)
11
  speech_timestamps = list(filter(lambda x: x[1]-x[0] > min_speech_duration_ms, speech_timestamps))
 
44
  else:
45
  new_labels.append(list(labels[i]))
46
 
47
+ if extend_small_segments:
48
+ for i, nl in enumerate(new_labels[:-1]):
49
+ if nl[1] - nl[0] <= 1.2 and nl[0] + 1.2 < new_labels[i+1][0]:
50
+ nl[1] = nl[0] + 1.2
51
+
52
+ if end_extension:
53
+ for i, nl in enumerate(new_labels[:-1]):
54
+ if nl[0] + end_extension < new_labels[i+1][0]:
55
+ nl[1] = nl[0] + end_extension
56
+
57
  translate_labels = list(map(lambda x: f"{x[0]}\t{x[1]}\t{x[2]}", new_labels))
58
 
59
  filename_path = f"{fn}_translate_label.txt"
 
67
  get_labels,
68
  [
69
  gr.Audio(type="filepath", label="Audio file"),
70
+ gr.Number(label="min_speech_duration_ms", value=40, info="default (40)"),
71
+ gr.Number(label="min_silence_duration_ms", value=40, info="default (40)"),
72
  gr.Checkbox(label="Auto merge", value=True),
73
  gr.Textbox(label="Gap max threshold value (seconds)", value=0.350),
74
+ gr.Number(label="Approx Max Segment Length", value=7),
75
+ gr.Number(label="Extend end by (seconds)", value=0),
76
+ gr.Checkbox(label="Extend small segments (minimum 1.2 seconds)", value=False)
77
  ],
78
  [
79
  gr.File(label="VAD Labels"),