Mridul commited on
Commit
782d9c8
1 Parent(s): a380e3b

Adding the initial files

Browse files
Files changed (3) hide show
  1. app.py +119 -0
  2. helper.py +53 -0
  3. requirements.txt +8 -0
app.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import matplotlib.pyplot as plt
3
+ import librosa
4
+ import librosa.display
5
+ import numpy as np
6
+ from matplotlib.colors import ListedColormap
7
+ import torch
8
+ from pprint import pprint
9
+ import tempfile
10
+ import sounddevice as sd
11
+ import helper as hp
12
+ from io import BytesIO
13
+
14
+ magicEnabled = False
15
+
16
+ st.title("Human Voice Activity Detector")
17
+
18
+ # record audio
19
+ st.subheader("Record Audio From Microphone")
20
+ with st.form("enter_info_form"):
21
+ filename = st.text_input("FILENAME")+".wav"
22
+ duration = st.number_input("DURATION", min_value=0)
23
+ record_button = st.form_submit_button("Record")
24
+
25
+
26
+ st.session_state["recording_done"] = False
27
+
28
+ if record_button:
29
+
30
+ if "recording_state" not in st.session_state:
31
+ st.session_state["recording_state"] = True
32
+
33
+ try:
34
+ hp.record_Audio(filename, duration)
35
+
36
+ # reading the conent of the audio file
37
+ with open(filename, 'rb') as file:
38
+ audio_content = file.read()
39
+ audio_file = BytesIO(audio_content) # converting it to BytesIO format
40
+
41
+ st.download_button(
42
+ label=f"Download {filename}",
43
+ data = audio_file,
44
+ file_name=filename,
45
+ mime="audio/wav",
46
+ )
47
+
48
+ except ValueError as e:
49
+ st.error(str(e))
50
+ # TODO
51
+ #upload audio file with streamlit
52
+ else:
53
+ audio_file = st.file_uploader("Upload Audio", type=["wav"])
54
+
55
+ if audio_file is not None:
56
+ # Save the uploaded audio file to a temporary file
57
+ with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
58
+ tmp_file.write(audio_file.getvalue())
59
+ # tmp_file.write(audio_file.read())
60
+ tmp_file_name = tmp_file.name
61
+
62
+ # audio_file.seek(0) # Seek to the beginning of the file
63
+ tmp_file.close()
64
+ # print(audio_file)
65
+ plt.figure(figsize = (14,5))
66
+ data, sample_rate = librosa.load(tmp_file_name,sr=16000)
67
+ # Plot the waveform
68
+ plt.figure(figsize=(10, 4))
69
+ librosa.display.waveshow(data, sr=16000)
70
+ plt.title("Waveform")
71
+ plt.xlabel("Time (s)")
72
+ plt.ylabel("Amplitude")
73
+ plt.tight_layout()
74
+
75
+ # Display the plot in Streamlit
76
+ st.audio(data, format="audio/wav", sample_rate=sample_rate)
77
+ st.caption("Raw Audio Waveform")
78
+ st.pyplot(plt)
79
+
80
+ with st.spinner('Processing the audio file...'):
81
+ torch.set_num_threads(1)
82
+
83
+ model, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad',
84
+ model='silero_vad',
85
+ force_reload=True)
86
+
87
+ (get_speech_timestamps,
88
+ _, read_audio,
89
+ *_) = utils
90
+
91
+ sampling_rate = 16000
92
+ wav = read_audio(audio_file, sampling_rate=sampling_rate) #type(wav) = <class 'torch.Tensor'>
93
+ # print(wav)
94
+ speech_timestamps = get_speech_timestamps(wav, model, sampling_rate=sampling_rate)
95
+ # pprint(speech_timestamps)
96
+
97
+ plt.figure(figsize = (14,5))
98
+ # data,sample_rate = librosa.load(local_audio_file_path, sr=sampling_rate)
99
+ librosa.display.waveshow(np.array(wav), sr = sampling_rate)
100
+ if len(speech_timestamps) != 0:
101
+ plt.title("Detected Speech Segments")
102
+ plt.xlabel("Time (s)")
103
+ plt.ylabel("Amplitude")
104
+ for timestamp in speech_timestamps:
105
+ start_time = timestamp['start'] / sampling_rate
106
+ end_time = timestamp['end'] / sampling_rate
107
+ plt.axvspan(start_time, end_time, alpha=0.5, color='gray', label='Detected Speech')
108
+
109
+ st.success("Speech Segments Detected!")
110
+ st.caption("Model Output with Detected Speech Segments")
111
+ st.pyplot(plt)
112
+ else:
113
+ print("No Speech Detected")
114
+ st.error("No Speech Detected")
115
+
116
+ if st.session_state['recording_done']:
117
+ if st.button("Reset", ):
118
+ st.session_state["recording_state"] = False
119
+ st.rerun()
helper.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from pyaudio import paInt16, PyAudio
3
+ import wave
4
+
5
+ def record_Audio(filename, duration):
6
+ """
7
+ A audio-recording helping function Using PyAudio
8
+ """
9
+
10
+ if not filename:
11
+ raise ValueError("Filename not specified. Please provide a filename!")
12
+
13
+ CHUNK = 1024
14
+ FORMAT = paInt16
15
+ CHANNELS = 1
16
+ RATE = 16000
17
+ RECORD_TIME = duration
18
+
19
+ recording_state = st.session_state.get("recording_state", False)
20
+ recording_info_placeholder = st.empty()
21
+ if recording_state:
22
+
23
+ recording_info_placeholder.info("Recording... ")
24
+
25
+
26
+ with wave.open(filename, 'wb') as f:
27
+ p = PyAudio()
28
+ f.setnchannels(CHANNELS)
29
+ f.setsampwidth(p.get_sample_size(FORMAT))
30
+ f.setframerate(RATE)
31
+
32
+ stream = p.open(format=FORMAT,
33
+ channels=CHANNELS,
34
+ rate=RATE,
35
+ input=True)
36
+
37
+ if recording_state:
38
+ stop_button = st.button("Stop Recording")
39
+
40
+ for _ in range(0, RATE // CHUNK * RECORD_TIME):
41
+
42
+ f.writeframes(stream.read(CHUNK))
43
+
44
+ if stop_button:
45
+ break
46
+
47
+
48
+ recording_info_placeholder.success("Recording Completed\nThese are the results:")
49
+
50
+ st.session_state["recording_done"] = True
51
+
52
+ stream.close()
53
+ p.terminate()
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ streamlit==1.28.0
2
+ matplotlib==3.7.2
3
+ librosa==0.10.0.post2
4
+ numpy==1.24.3
5
+ torch==2.0.1
6
+ torchaudio==2.0.2
7
+ pyaudio==0.2.13
8
+ wave==0.0.2