import streamlit as st
from PIL import Image
from bokeh.models.widgets import Button
from bokeh.models import CustomJS
from streamlit_bokeh_events import streamlit_bokeh_events
import subprocess
def show():
st.title('Visual Question Answering - Audiobot')
st.markdown('''
Hi, I am a Visual Audiobot, capable of answering a sequence of questions about images.
Please upload image and fire away!
''', unsafe_allow_html=True)
upload_pic = st.file_uploader('Choose an image...', type=[
'jpg', 'png', 'jpeg'], accept_multiple_files=False)
if upload_pic is not None:
st.session_state.image = Image.open(upload_pic)
st.image(upload_pic, use_column_width='auto')
else:
st.session_state.image = None
# Speech recognition based in streamlit based on
# https://discuss.streamlit.io/t/speech-to-text-on-client-side-using-html5-and-streamlit-bokeh-events/7888
stt_button = Button(label='Ask', width=100)
stt_button.js_on_event('button_click', CustomJS(code='''
var recognition = new webkitSpeechRecognition();
recognition.continuous = false;
recognition.interimResults = false;
recognition.onresult = function (e) {
var value = '';
for (var i = e.resultIndex; i < e.results.length; ++i) {
if (e.results[i].isFinal) {
value += e.results[i][0].transcript;
}
}
if ( value != '') {
document.dispatchEvent(new CustomEvent('GET_TEXT', {detail: value}));
}
}
recognition.start();
'''))
result = streamlit_bokeh_events(
stt_button,
events='GET_TEXT',
key='listen',
refresh_on_update=False,
override_height=75,
debounce_time=0)
if result:
if 'GET_TEXT' in result:
answer = st.session_state.predictor.predict_answer_from_text(
st.session_state.image, result.get('GET_TEXT'))
subprocess.check_output(['say', answer])