File size: 2,998 Bytes
b30324c
 
 
 
 
 
 
 
 
 
 
 
2cf5541
b30324c
2cf5541
b30324c
2cf5541
 
 
 
 
b30324c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import streamlit as st
from PIL import Image
import os
import requests


ASR_API = os.environ['ASR_API']

def request_to_asr_service(audiofile):

    # file_path = "/media/mohammadkrb/hddExt/personal_projects/vidabia/audio_tests/epit_sample.mp3"
    # file_data = open(file_path, 'rb')
    try:

        files = {'file': (audiofile)}

        response = requests.post(ASR_API, files=files)
        return response.json()
    except:
        st.info('ASR Service not worked!')
        
st.set_page_config(
    page_title="Automatic Speech Recognition",
    page_icon="πŸ—£",
    layout="centered",
    initial_sidebar_state="auto",
)

upload_path = "uploads/"
download_path = "downloads/"
os.makedirs(upload_path, exist_ok=True)
os.makedirs(download_path, exist_ok=True)
# @st.cache(persist=True,allow_output_mutation=True,show_spinner=False,suppress_st_warning=True)
# def asr_inference_wav2vec2(uploaded_file):
#     asr = Wave2Vec2Inference("facebook/wav2vec2-base-960h")
#     text = asr.file_to_text(uploaded_file)
#     return text

@st.cache(persist=True,allow_output_mutation=True,show_spinner=False,suppress_st_warning=True)
def save_text(text, downloaded_txt_file):
    with open(downloaded_txt_file, 'w') as outtxt:
        outtxt.write(text)
    print(downloaded_txt_file)

@st.cache(persist=True,allow_output_mutation=True,show_spinner=False,suppress_st_warning=True)
def download_success():
    st.balloons()
    st.success('βœ… Download Successful !!')

main_image = Image.open('static/main_banner.png')

st.image(main_image,use_column_width='auto')
st.title("πŸ—£ Automatic Speech Recognition")
st.info('✨ Supports ALL Audio Formats (mp3, wav, ogg, ...).')

uploaded_file = st.file_uploader("Upload audio file", type=["wav"])
if uploaded_file is not None:
    with open(os.path.join(upload_path,uploaded_file.name),"wb") as f:
        f.write((uploaded_file).getbuffer())
    with st.spinner(f"Converting speech to text... πŸ’«"):
        resp = request_to_asr_service(uploaded_file)
        text = resp['transcript']
        # text = asr_inference_wav2vec2(upload_path + uploaded_file.name)
        st.info(text)
        downloaded_txt_file = os.path.abspath(os.path.join(download_path,str("processed_"+uploaded_file.name.split(".")[0] + ".txt")))
        save_text(text, downloaded_txt_file)
        with open(downloaded_txt_file, "rb") as file:
            if st.download_button(
                                    label="Download ASR Output πŸ—£",
                                    data=file,
                                    file_name=str("ASR_output_"+uploaded_file.name.split(".")[0]+ ".txt"),
                                    mime='text/plain'
                                 ):
                download_success()
# else:
    # st.warning("Please upload your  file. Any other audio format is currently not supported")

st.markdown("<br><hr><center>Made with ❀️ by <a href='https://ahdsoft.ir'><strong>AHD Co</strong></a></center><hr>", unsafe_allow_html=True)