import streamlit as st import time as t from transformers import pipeline from pydub import AudioSegment, silence #import speech_recognition as sr #pipe = pipeline('sentiment-analysis') #text = st.text_area('Enter your notes') #if text: # out = pipe(text) # st.json(out) st.markdown("

Group Therapy Notes

",unsafe_allow_html = True) st.markdown("---",unsafe_allow_html=True) audio=st.file_uploader("Upload Your Audio File", type=['mp3','wav','m4a']) if audio: pipe = pipeline('automatic-speech-recognition',model="facebook/wav2vec2-base-960h") audio_segment= AudioSegment.from_file(audio) audio_segment.export("audio.wav", format="wav") output = pipe("audio.wav", chunk_length_s=10, stride_length_s=(4, 2)) st.json(output) # stride_length_s is a tuple of the left and right stride length. # With only 1 number, both sides get the same stride, by default # the stride_length on one side is 1/6th of the chunk_length_s # chunk.export(str(index)+".wav", format="wav") # audio_segment= AudioSegment.from_file(audio) # chunks=silence.split_on_silence(audio_segment, min_silence_len=500, silence_thresh= audio_segment.dBFS-20,keep_silence=100) # for index, chunk in enumerate (chunks): # #output = pipe(audio_segment, chunk_length_s=10, stride_length_s=(4, 2)) # print (chunk) # st.json("wav")