vqa_audiobot / audiobot.py
Madhuri's picture
Add chatbot and audiobot pages to application.
7a69915
raw history blame
No virus
2.21 kB
import streamlit as st
from PIL import Image
from bokeh.models.widgets import Button
from bokeh.models import CustomJS
from streamlit_bokeh_events import streamlit_bokeh_events
import subprocess
def show():
st.title('Visual Question Answering - Audiobot')
st.markdown('''
<h4 style='text-align: center; color: #B2BEB5;'>
<i>Hi, I am a Visual Audiobot, capable of answering a sequence of questions about images.
Please upload image and fire away!
</i></h4>
''', unsafe_allow_html=True)
upload_pic = st.file_uploader('Choose an image...', type=[
'jpg', 'png', 'jpeg'], accept_multiple_files=False)
if upload_pic is not None:
st.session_state.image = Image.open(upload_pic)
st.image(upload_pic, use_column_width='auto')
else:
st.session_state.image = None
# Speech recognition based in streamlit based on
# https://discuss.streamlit.io/t/speech-to-text-on-client-side-using-html5-and-streamlit-bokeh-events/7888
stt_button = Button(label='Ask', width=100)
stt_button.js_on_event('button_click', CustomJS(code='''
var recognition = new webkitSpeechRecognition();
recognition.continuous = false;
recognition.interimResults = false;
recognition.onresult = function (e) {
var value = '';
for (var i = e.resultIndex; i < e.results.length; ++i) {
if (e.results[i].isFinal) {
value += e.results[i][0].transcript;
}
}
if ( value != '') {
document.dispatchEvent(new CustomEvent('GET_TEXT', {detail: value}));
}
}
recognition.start();
'''))
result = streamlit_bokeh_events(
stt_button,
events='GET_TEXT',
key='listen',
refresh_on_update=False,
override_height=75,
debounce_time=0)
if result:
if 'GET_TEXT' in result:
answer = st.session_state.predictor.predict_answer_from_text(
st.session_state.image, result.get('GET_TEXT'))
subprocess.check_output(['say', answer])