import streamlit as st import os import subprocess import tempfile import matlab.engine from st_audiorec import st_audiorec import os.path import numpy as np import sounddevice as sd from scipy.io.wavfile import write from scipy.io import wavfile def record(duration): fs = 16000 seconds = duration myrecording = sd.rec(int(seconds * fs), samplerate=fs, channels=1) sd.wait() write('clean_waveform.wav', fs, myrecording) def main(): st.title("Upload WAV File, make the file noisy, run enhancement and transcribe") uploaded_wav_file = st.file_uploader("Upload a WAV file", type=["wav"]) uploaded_noise_file = st.file_uploader("Upload a noise file", type = ["wav"]) snr = st.text_input("Enter SNR", "") temp_dir = tempfile.mkdtemp() if st.button("Record"): record(5) if st.button("Add Noise"): wav_file_path = os.path.join(temp_dir, uploaded_wav_file.name) with open(wav_file_path, "wb") as f1: f1.write(uploaded_wav_file.getvalue()) noise_file_path = os.path.join(temp_dir, uploaded_noise_file.name) with open(noise_file_path, "wb") as f2: f2.write(uploaded_noise_file.getvalue()) #run_matlab_script(snr) samplerate, signal = wavfile.read(wav_file_path) samplerate, noise = wavfile.read(noise_file_path) mix_audio(signal, noise, snr) if st.button("Enhance"): run_batch_script() if st.button("Transcribe_zeroshot"): transcribe_zeroshot() if st.button("Transcribe_trained"): transcribe_trained() def run_matlab_script(snr): read_fd, write_fd = os.pipe() matlab_executable = 'matlab' # Path to your MATLAB script matlab_script = 'mixFiles.m' # Data to send to MATLAB (replace with your actual data) data_to_send = snr # Run the MATLAB script process = subprocess.Popen([matlab_executable, '-nodesktop', '-nosplash', '-r', f'run("{matlab_script}");exit;'], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) # Send data to MATLAB process.stdin.write(data_to_send) process.stdin.close() def run_batch_script(): command = r"OmniClear_cloud_demo noisy_waveform.wav enhanced_waveform.wav" subprocess.run(command, shell=True) def transcribe_zeroshot(): command = r"streamlit run loadlocal_zeroshot.py" subprocess.run(command) def transcribe_trained(): command = r"streamlit run loadlocal_trained.py" subprocess.run(command) def mix_audio(signal, noise, snr): noise = noise[np.arange(len(signal)) % len(noise)] noise = noise.astype(np.float32) signal = signal.astype(np.float32) signal_energy = np.mean(signal**2) noise_energy = np.mean(noise**2) snr = float(snr) g = np.sqrt(10.0 ** (-snr/10) * signal_energy / noise_energy) a = np.sqrt(1 / (1 + g**2)) b = np.sqrt(g**2 / (1 + g**2)) # mix the signals rate = 16000 noisy_signal = a * signal + b * noise scaled = np.int16(noisy_signal / np.max(np.abs(noisy_signal)) * 32767) # Write the array to a WAV file #print(scaled.shape) write('noisy_waveform.wav', rate, scaled) if __name__ == "__main__": main()