import gradio as gr import numpy as np from vad_utils import get_speech_probs, make_visualization, probs2speech_timestamps, read_audio import torch import pandas as pd import gdown def process_audio(audio_input, window_size_samples): wav = read_audio(audio_input, sampling_rate=16_000) audio_length_samples = len(wav) probs = get_speech_probs(wav, window_size_samples=window_size_samples, sampling_rate=16_000) return make_visualization(probs, 512 / 16_000), probs, audio_length_samples def process_parameters(probs, audio_length_samples, threshold, min_speech_duration_ms, min_silence_duration_ms, window_size_samples, speech_pad_ms): min_speech_duration_ms *= 1000 min_silence_duration_ms *= 1000 timestamps = probs2speech_timestamps(probs, audio_length_samples, threshold = threshold, min_speech_duration_ms = min_speech_duration_ms, min_silence_duration_ms=min_silence_duration_ms, window_size_samples=window_size_samples, speech_pad_ms=speech_pad_ms, return_seconds=True, rounding=3) df = pd.DataFrame(timestamps) df["note"] = "" df.to_csv("timestamps.txt", sep = '\t', header=False, index=False) return "timestamps.txt", df def download_gdrive(id): output_file = "audio.wav" # Replace "data_file.ext" with the desired output filename and extension gdown.download(f"https://drive.google.com/uc?id={id}", output_file) return output_file def main(): with gr.Blocks() as demo: probs = gr.State() audio_length_samples = gr.State() with gr.Row(): info = """Input the Google Drive file id from the shared link. It comes after https://drive.google.com/file/d/