import streamlit as st from PIL import Image from bokeh.models.widgets import Button from bokeh.models import CustomJS from streamlit_bokeh_events import streamlit_bokeh_events import subprocess def show(): st.title('Visual Question Answering - Audiobot') st.markdown('''

Hi, I am a Visual Audiobot, capable of answering a sequence of questions about images. Please upload image and fire away!

''', unsafe_allow_html=True) upload_pic = st.file_uploader('Choose an image...', type=[ 'jpg', 'png', 'jpeg'], accept_multiple_files=False) if upload_pic is not None: st.session_state.image = st.image(upload_pic, use_column_width='auto') else: st.session_state.image = None # Speech recognition based in streamlit based on # stt_button = Button(label='Ask', width=100) stt_button.js_on_event('button_click', CustomJS(code=''' var recognition = new webkitSpeechRecognition(); recognition.continuous = false; recognition.interimResults = false; recognition.onresult = function (e) { var value = ''; for (var i = e.resultIndex; i < e.results.length; ++i) { if (e.results[i].isFinal) { value += e.results[i][0].transcript; } } if ( value != '') { document.dispatchEvent(new CustomEvent('GET_TEXT', {detail: value})); } } recognition.start(); ''')) result = streamlit_bokeh_events( stt_button, events='GET_TEXT', key='listen', refresh_on_update=False, override_height=75, debounce_time=0) if result: if 'GET_TEXT' in result: answer = st.session_state.predictor.predict_answer_from_text( st.session_state.image, result.get('GET_TEXT')) subprocess.check_output(['say', answer])