Spaces:

Madhuri
/

vqa_audiobot

Runtime error

App Files Files Community

Madhuri commited on Jun 21, 2022

Commit

5560825

•

1 Parent(s): 7aa61b0

Add grammar module to correct the generated answers.

Browse files

Files changed (8) hide show

.DS_Store +0 -0
.gitignore +80 -1
app.py +7 -5
audiobot.py +7 -7
chatbot.py +7 -5
images/.DS_Store +0 -0
model/predictor.py +10 -2
requirements.txt +17 -0

.DS_Store DELETED Viewed

Binary file (6.15 kB)

.gitignore CHANGED Viewed

@@ -2,9 +2,88 @@
 __pycache__/
 *.py[cod]
 # Distribution / packaging
 .Python
 # Installer logs
 pip-log.txt
-pip-delete-this-directory.txt

 __pycache__/
 *.py[cod]
+# C extensions
+*.so
 # Distribution / packaging
 .Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
 # Installer logs
 pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# DotEnv configuration
+.env
+# Database
+*.db
+*.rdb
+# Pycharm
+.idea
+# VS Code
+.vscode/
+# Spyder
+.spyproject/
+# Jupyter NB Checkpoints
+.ipynb_checkpoints/
+# Mac OS-specific storage files
+.DS_Store
+# vim
+*.swp
+*.swo
+# Mypy cache
+.mypy_cache/
+# exclude generated models from source control
+models/intermediate/

app.py CHANGED Viewed

@@ -7,9 +7,12 @@ import chatbot
 import os
 import threading
 def runInThread():
     print('Initialize model in thread')
     st.session_state['predictor'] = predictor.Predictor()
 def run():
     st.set_page_config(
@@ -19,6 +22,10 @@ def run():
     )
     os.environ['TOKENIZERS_PARALLELISM'] = 'false'
     st.sidebar.title('VQA Bot')
     st.sidebar.image('./images/logo.png')
@@ -37,10 +44,5 @@ def run():
     st.caption("Created by Madhuri Sakhare - [Github](https://github.com/msak1612/vqa_chatbot) [Linkedin](https://www.linkedin.com/in/madhuri-sakhare/)")
-    if 'thread' not in st.session_state:
-        st.session_state.thread = threading.Thread(target=runInThread)
-        add_script_run_ctx(st.session_state.thread)
-        st.session_state.thread.start()
 run()

 import os
 import threading
 def runInThread():
     print('Initialize model in thread')
     st.session_state['predictor'] = predictor.Predictor()
+    print('Model is initialized')
 def run():
     st.set_page_config(
     )
     os.environ['TOKENIZERS_PARALLELISM'] = 'false'
+    if 'thread' not in st.session_state:
+        st.session_state.thread = threading.Thread(target=runInThread)
+        add_script_run_ctx(st.session_state.thread)
+        st.session_state.thread.start()
     st.sidebar.title('VQA Bot')
     st.sidebar.image('./images/logo.png')
     st.caption("Created by Madhuri Sakhare - [Github](https://github.com/msak1612/vqa_chatbot) [Linkedin](https://www.linkedin.com/in/madhuri-sakhare/)")
 run()

audiobot.py CHANGED Viewed

@@ -7,6 +7,7 @@ from streamlit_bokeh_events import streamlit_bokeh_events
 from bokeh.models.widgets.buttons import Button
 import time
 def show():
     st.session_state.audio_answer = ''
@@ -18,7 +19,7 @@ def show():
             </i></h4>
             ''', unsafe_allow_html=True)
-    weights = [5,2]
     image_col, audio_col = st.columns(weights)
     with image_col:
         upload_pic = st.file_uploader('Choose an image...', type=[
@@ -30,8 +31,8 @@ def show():
             st.session_state.image = None
     with audio_col:
-        welcome_text='Hello and Welcome. I have been trained  as  visual question answering model. You are welcome to look at any image and ask me any questions about it.  I will do my best to provide the most accurate information possible based on my expertise. Select an image of interest by pressing the browse files button.  Now use the Ask question button to ask a question. Please feel free to ask me any questions about this image. Now. to get my answer. press the Get answer button.'
-        welcome_button = Button(label='About Me')
         welcome_button.js_on_event('button_click', CustomJS(code=f'''
             var u = new SpeechSynthesisUtterance();
             u.text = '{welcome_text}';
@@ -43,7 +44,7 @@ def show():
         # Speech recognition based in streamlit based on
         # https://discuss.streamlit.io/t/speech-to-text-on-client-side-using-html5-and-streamlit-bokeh-events/7888
-        stt_button = Button(label='Ask Question')
         stt_button.js_on_event('button_click', CustomJS(code="""
             var recognition = new webkitSpeechRecognition();
@@ -51,7 +52,7 @@ def show():
             recognition.interimResults = false;
             recognition.onresult = function (e) {
-                var value = "";
                 for (var i = e.resultIndex; i < e.results.length; ++i) {
                     if (e.results[i].isFinal) {
                         value += e.results[i][0].transcript;
@@ -80,8 +81,7 @@ def show():
                     st.session_state.audio_answer = st.session_state.predictor.predict_answer_from_text(
                         st.session_state.image, result.get('GET_TEXT'))
-        tts_button = Button(label='Get Answer')
         tts_button.js_on_event('button_click', CustomJS(code=f"""
             var u = new SpeechSynthesisUtterance();
             u.text = '{st.session_state.audio_answer}';

 from bokeh.models.widgets.buttons import Button
 import time
 def show():
     st.session_state.audio_answer = ''
             </i></h4>
             ''', unsafe_allow_html=True)
+    weights = [5, 2]
     image_col, audio_col = st.columns(weights)
     with image_col:
         upload_pic = st.file_uploader('Choose an image...', type=[
             st.session_state.image = None
     with audio_col:
+        welcome_text = 'Hello and Welcome. I have been trained  as  visual question answering model. You are welcome to look at any image and ask me any questions about it.  I will do my best to provide the most accurate information possible based on my expertise. Select an image of interest by pressing the browse files button.  Now use the Ask question button to ask a question. Please feel free to ask me any questions about this image. Now. to get my answer. press the Get answer button.'
+        welcome_button = Button(label='About Me', width=100)
         welcome_button.js_on_event('button_click', CustomJS(code=f'''
             var u = new SpeechSynthesisUtterance();
             u.text = '{welcome_text}';
         # Speech recognition based in streamlit based on
         # https://discuss.streamlit.io/t/speech-to-text-on-client-side-using-html5-and-streamlit-bokeh-events/7888
+        stt_button = Button(label='Ask Question', width=100)
         stt_button.js_on_event('button_click', CustomJS(code="""
             var recognition = new webkitSpeechRecognition();
             recognition.interimResults = false;
             recognition.onresult = function (e) {
+                var value = '';
                 for (var i = e.resultIndex; i < e.results.length; ++i) {
                     if (e.results[i].isFinal) {
                         value += e.results[i][0].transcript;
                     st.session_state.audio_answer = st.session_state.predictor.predict_answer_from_text(
                         st.session_state.image, result.get('GET_TEXT'))
+        tts_button = Button(label='Get Answer', width=100)
         tts_button.js_on_event('button_click', CustomJS(code=f"""
             var u = new SpeechSynthesisUtterance();
             u.text = '{st.session_state.audio_answer}';

chatbot.py CHANGED Viewed

@@ -3,6 +3,7 @@ from streamlit_chat import message
 from PIL import Image
 import time
 def init_chat_history():
     if 'question' not in st.session_state:
         st.session_state['question'] = []
@@ -23,9 +24,10 @@ def predict(image, input):
     if image is None or not input:
         return
-    with st.spinner('Preparing answer...'):
-        while 'predictor' not in st.session_state:
-            time.sleep(2)
     answer = st.session_state.predictor.predict_answer_from_text(image, input)
     st.session_state.question.append(input)
@@ -51,8 +53,8 @@ def show():
             image = Image.open(upload_pic)
             st.image(upload_pic, use_column_width='auto')
         else:
-            st.session_state.question=[]
-            st.session_state.answer=[]
             st.session_state.input = ''
     with text_col:
         input = st.text_input('Enter question: ', '', key='input')

 from PIL import Image
 import time
 def init_chat_history():
     if 'question' not in st.session_state:
         st.session_state['question'] = []
     if image is None or not input:
         return
+    if 'predictor' not in st.session_state:
+        with st.spinner('Preparing answer...'):
+            while 'predictor' not in st.session_state:
+                time.sleep(2)
     answer = st.session_state.predictor.predict_answer_from_text(image, input)
     st.session_state.question.append(input)
             image = Image.open(upload_pic)
             st.image(upload_pic, use_column_width='auto')
         else:
+            st.session_state.question = []
+            st.session_state.answer = []
             st.session_state.input = ''
     with text_col:
         input = st.text_input('Enter question: ', '', key='input')

images/.DS_Store DELETED Viewed

Binary file (6.15 kB)

model/predictor.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import streamlit as st
 from transformers import ViltProcessor
 from transformers import ViltForQuestionAnswering
 from transformers import AutoTokenizer
@@ -28,7 +29,9 @@ class Predictor:
             'Madhuri/t5_small_vqa_fs',  use_auth_token=auth_token)
         self.qa_tokenizer = AutoTokenizer.from_pretrained(
             'Madhuri/t5_small_vqa_fs', use_auth_token=auth_token)
     def predict_answer_from_text(self, image, input):
         if image is None:
@@ -54,4 +57,9 @@ class Predictor:
         answers = self.qa_tokenizer.batch_decode(
             output_ids, skip_special_tokens=True)
-        return answers[0]

 import streamlit as st
+from happytransformer import HappyTextToText, TTSettings
 from transformers import ViltProcessor
 from transformers import ViltForQuestionAnswering
 from transformers import AutoTokenizer
             'Madhuri/t5_small_vqa_fs',  use_auth_token=auth_token)
         self.qa_tokenizer = AutoTokenizer.from_pretrained(
             'Madhuri/t5_small_vqa_fs', use_auth_token=auth_token)
+        self.happy_tt = HappyTextToText(
+            "T5", "vennify/t5-base-grammar-correction")
+        self.tt_args = TTSettings(num_beams=5, min_length=1)
     def predict_answer_from_text(self, image, input):
         if image is None:
         answers = self.qa_tokenizer.batch_decode(
             output_ids, skip_special_tokens=True)
+        # Correct the grammar of the answer
+        answer = self.happy_tt.generate_text(
+            'grammar: ' + answers[0], args=self.tt_args).text
+        print(
+            f'question - {question}, answer - {answer}, original_answer - {answers[0]}')
+        return answer

requirements.txt CHANGED Viewed

@@ -1,3 +1,5 @@
 altair==4.2.0
 ansicolors==1.1.8
 ansiwrap==0.8.4
@@ -5,6 +7,7 @@ appnope==0.1.3
 argon2-cffi==21.3.0
 argon2-cffi-bindings==21.2.0
 asttokens==2.0.5
 attrs==21.4.0
 backcall==0.2.0
 beautifulsoup4==4.11.1
@@ -19,15 +22,20 @@ charset-normalizer==2.0.12
 click==8.1.3
 combomethod==1.0.12
 commonmark==0.9.1
 debugpy==1.6.0
 decorator==5.1.1
 defusedxml==0.7.1
 entrypoints==0.4
 executing==0.8.3
 fastjsonschema==2.15.3
 filelock==3.7.1
 gitdb==4.0.9
 GitPython==3.1.27
 huggingface-hub==0.7.0
 idna==3.3
 importlib-metadata==4.11.4
@@ -37,19 +45,24 @@ ipython-genutils==0.2.0
 ipywidgets==7.7.0
 jedi==0.18.1
 Jinja2==3.1.2
 jsonschema==4.6.0
 jupyter-client==7.3.4
 jupyter-core==4.10.0
 jupyterlab-pygments==0.2.2
 jupyterlab-widgets==1.1.0
 MarkupSafe==2.1.1
 matplotlib-inline==0.1.3
 mementos==1.3.1
 mistune==0.8.4
 nbclient==0.6.4
 nbconvert==6.5.0
 nbformat==5.4.0
 nest-asyncio==1.5.5
 notebook==6.4.12
 nulltype==2.3.1
 numpy==1.22.4
@@ -81,10 +94,12 @@ PyYAML==6.0
 pyzmq==23.1.0
 regex==2022.6.2
 requests==2.28.0
 rich==12.4.4
 say==1.6.6
 semver==2.13.0
 Send2Trash==1.8.0
 simplere==1.2.13
 six==1.12.0
 smmap==5.0.0
@@ -112,4 +127,6 @@ validators==0.20.0
 wcwidth==0.2.5
 webencodings==0.5.1
 widgetsnbextension==3.6.0
 zipp==3.8.0

+aiohttp==3.8.1
+aiosignal==1.2.0
 altair==4.2.0
 ansicolors==1.1.8
 ansiwrap==0.8.4
 argon2-cffi==21.3.0
 argon2-cffi-bindings==21.2.0
 asttokens==2.0.5
+async-timeout==4.0.2
 attrs==21.4.0
 backcall==0.2.0
 beautifulsoup4==4.11.1
 click==8.1.3
 combomethod==1.0.12
 commonmark==0.9.1
+datasets==2.3.2
 debugpy==1.6.0
 decorator==5.1.1
 defusedxml==0.7.1
+dill==0.3.5.1
 entrypoints==0.4
 executing==0.8.3
 fastjsonschema==2.15.3
 filelock==3.7.1
+frozenlist==1.3.0
+fsspec==2022.5.0
 gitdb==4.0.9
 GitPython==3.1.27
+happytransformer==2.4.1
 huggingface-hub==0.7.0
 idna==3.3
 importlib-metadata==4.11.4
 ipywidgets==7.7.0
 jedi==0.18.1
 Jinja2==3.1.2
+joblib==1.1.0
 jsonschema==4.6.0
 jupyter-client==7.3.4
 jupyter-core==4.10.0
 jupyterlab-pygments==0.2.2
 jupyterlab-widgets==1.1.0
+language-tool-python==2.7.1
 MarkupSafe==2.1.1
 matplotlib-inline==0.1.3
 mementos==1.3.1
 mistune==0.8.4
+multidict==6.0.2
+multiprocess==0.70.13
 nbclient==0.6.4
 nbconvert==6.5.0
 nbformat==5.4.0
 nest-asyncio==1.5.5
+nltk==3.7
 notebook==6.4.12
 nulltype==2.3.1
 numpy==1.22.4
 pyzmq==23.1.0
 regex==2022.6.2
 requests==2.28.0
+responses==0.18.0
 rich==12.4.4
 say==1.6.6
 semver==2.13.0
 Send2Trash==1.8.0
+sentencepiece==0.1.96
 simplere==1.2.13
 six==1.12.0
 smmap==5.0.0
 wcwidth==0.2.5
 webencodings==0.5.1
 widgetsnbextension==3.6.0
+xxhash==3.0.0
+yarl==1.7.2
 zipp==3.8.0