all-in-one

Sleeping

App Files Files Community

helloWorld199 commited on Jun 15

Commit

4936aba

•

1 Parent(s): 09c8fbd

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -0

app.py CHANGED Viewed

@@ -167,7 +167,57 @@ def add_voice_label(json_file, audio_path):
     with open(json_file, 'w') as f:
         json.dump(data, f, indent=4)
 with gr.Blocks() as demo:
   gr.HTML(HEADER)

     with open(json_file, 'w') as f:
         json.dump(data, f, indent=4)
+def add_voice_labelv2(json_file, audio_path):
+    # Load the JSON file
+    with open(json_file, 'r') as f:
+        data = json.load(f)
+    # Create VAD object
+    vad_iterator = VADIterator(model)
+    # Read input audio file
+    wav, _ = librosa.load(audio_path, sr=SAMPLING_RATE, mono=True)
+    speech_probs = []
+    # Size of the window we compute the probability on
+    window_size_samples = SAMPLING_RATE/4
+    for i in range(0, len(wav), window_size_samples):
+        chunk = torch.from_numpy(wav[i: i+ window_size_samples])
+        if len(chunk) < window_size_samples:
+          break
+        speech_prob = model(chunk, SAMPLING_RATE).item()
+        speech_probs.append(speech_prob)
+    vad_iterator.reset_states() # reset model states after each audio
+    voice_idxs = np.where(speech_probs >= 0.7)
+    if len(voice_idxs) == 0:
+        print("NO VOICE SEGMENTS DETECTED!")
+    try:
+        begin_seq = True
+        start_idx = 0
+        for i in range(len(voice_idxs)):
+            if begin_seq:
+                start_idx = voice_idxs[i]
+                begin_seq = False
+            if voice_idxs[i+1] == voice_idxs[i] + 1
+                continue
+            start_time = float((start_idx*window_size_samples)/SAMPLING_RATE)
+            end_time = float((voice_idxs[i]*window_size_samples)/SAMPLING_RATE)
+            start_minutes = int(start_time)
+            end_minutes = int(end_time)
+            start_seconds = (start_time - start_minutes) * 60
+            end_seconds = (end_time - end_minutes) * 60
+            data['vocal_times'] = {
+            "start_time": f"{start_minutes}.{start_seconds}",
+            "end_time": f"{end_minutes}.{end_seconds}"
+            }
+    except Exception as e:
+        print(f"An exception occurred: {e}")
 with gr.Blocks() as demo:
   gr.HTML(HEADER)