Spaces:

Mridul21
/

VAD-BTP

Running

App Files Files Community

Mridul commited on Nov 15, 2023

Commit

782d9c8

•

1 Parent(s): a380e3b

Adding the initial files

Browse files

Files changed (3) hide show

app.py +119 -0
helper.py +53 -0
requirements.txt +8 -0

app.py ADDED Viewed

	@@ -0,0 +1,119 @@

+import streamlit as st
+import matplotlib.pyplot as plt
+import librosa
+import librosa.display
+import numpy as np
+from matplotlib.colors import ListedColormap
+import torch
+from pprint import pprint
+import tempfile
+import sounddevice as sd
+import helper as hp
+from io import BytesIO
+magicEnabled = False
+st.title("Human Voice Activity Detector")
+# record audio
+st.subheader("Record Audio From Microphone")
+with st.form("enter_info_form"):
+    filename = st.text_input("FILENAME")+".wav"
+    duration = st.number_input("DURATION", min_value=0)
+    record_button = st.form_submit_button("Record")
+st.session_state["recording_done"] = False
+if record_button:
+    if "recording_state" not in st.session_state:
+        st.session_state["recording_state"] = True
+    try:
+        hp.record_Audio(filename, duration)
+        # reading the conent of the audio file
+        with open(filename, 'rb') as file:
+            audio_content = file.read()
+            audio_file = BytesIO(audio_content)     # converting it to BytesIO format
+        st.download_button(
+            label=f"Download {filename}",
+            data = audio_file,
+            file_name=filename,
+            mime="audio/wav",
+        )
+    except ValueError as e:
+        st.error(str(e))
+# TODO
+#upload audio file with streamlit
+else:
+    audio_file = st.file_uploader("Upload Audio", type=["wav"])
+if audio_file is not None:
+    # Save the uploaded audio file to a temporary file
+    with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
+        tmp_file.write(audio_file.getvalue())
+        # tmp_file.write(audio_file.read())
+        tmp_file_name = tmp_file.name
+    # audio_file.seek(0)   # Seek to the beginning of the file
+    tmp_file.close()
+    # print(audio_file)
+    plt.figure(figsize = (14,5))
+    data, sample_rate = librosa.load(tmp_file_name,sr=16000)
+    # Plot the waveform
+    plt.figure(figsize=(10, 4))
+    librosa.display.waveshow(data, sr=16000)
+    plt.title("Waveform")
+    plt.xlabel("Time (s)")
+    plt.ylabel("Amplitude")
+    plt.tight_layout()
+    # Display the plot in Streamlit
+    st.audio(data, format="audio/wav", sample_rate=sample_rate)
+    st.caption("Raw Audio Waveform")
+    st.pyplot(plt)
+    with st.spinner('Processing the audio file...'):
+        torch.set_num_threads(1)
+        model, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad',
+                                    model='silero_vad',
+                                    force_reload=True)
+        (get_speech_timestamps,
+        _, read_audio,
+        *_) = utils
+        sampling_rate = 16000
+        wav = read_audio(audio_file, sampling_rate=sampling_rate) #type(wav) = <class 'torch.Tensor'>
+        # print(wav)
+        speech_timestamps = get_speech_timestamps(wav, model, sampling_rate=sampling_rate)
+        # pprint(speech_timestamps)
+        plt.figure(figsize = (14,5))
+        # data,sample_rate = librosa.load(local_audio_file_path, sr=sampling_rate)
+        librosa.display.waveshow(np.array(wav), sr = sampling_rate)
+        if len(speech_timestamps) != 0:
+            plt.title("Detected Speech Segments")
+            plt.xlabel("Time (s)")
+            plt.ylabel("Amplitude")
+            for timestamp in speech_timestamps:
+                start_time = timestamp['start'] / sampling_rate
+                end_time = timestamp['end'] / sampling_rate
+                plt.axvspan(start_time, end_time, alpha=0.5, color='gray', label='Detected Speech')
+            st.success("Speech Segments Detected!")
+            st.caption("Model Output with Detected Speech Segments")
+            st.pyplot(plt)
+        else:
+            print("No Speech Detected")
+            st.error("No Speech Detected")
+if st.session_state['recording_done']:
+    if st.button("Reset", ):
+        st.session_state["recording_state"] = False
+        st.rerun()

helper.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import streamlit as st
+from pyaudio import paInt16, PyAudio
+import wave
+def record_Audio(filename, duration):
+    """
+     A audio-recording helping function Using PyAudio
+    """
+    if not filename:
+        raise ValueError("Filename not specified. Please provide a filename!")
+    CHUNK = 1024
+    FORMAT = paInt16
+    CHANNELS = 1
+    RATE = 16000
+    RECORD_TIME = duration
+    recording_state = st.session_state.get("recording_state", False)
+    recording_info_placeholder = st.empty()
+    if recording_state:
+        recording_info_placeholder.info("Recording... ")
+        with wave.open(filename, 'wb') as f:
+            p = PyAudio()
+            f.setnchannels(CHANNELS)
+            f.setsampwidth(p.get_sample_size(FORMAT))
+            f.setframerate(RATE)
+            stream = p.open(format=FORMAT,
+                            channels=CHANNELS,
+                            rate=RATE,
+                            input=True)
+            if recording_state:
+                stop_button = st.button("Stop Recording")
+            for _ in range(0, RATE // CHUNK * RECORD_TIME):
+                f.writeframes(stream.read(CHUNK))
+                if stop_button:
+                    break
+            recording_info_placeholder.success("Recording Completed\nThese are the results:")
+            st.session_state["recording_done"] = True
+            stream.close()
+            p.terminate()

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+streamlit==1.28.0
+matplotlib==3.7.2
+librosa==0.10.0.post2
+numpy==1.24.3
+torch==2.0.1
+torchaudio==2.0.2
+pyaudio==0.2.13
+wave==0.0.2