InvictusRudra commited on
Commit
3081c82
1 Parent(s): a857f25

Upload 4 files

Browse files
Files changed (3) hide show
  1. Dockerfile +0 -1
  2. app1.ipynb +76 -40
  3. requirements.txt +1 -3
Dockerfile CHANGED
@@ -6,7 +6,6 @@ COPY ./requirements.txt /code/requirements.txt
6
 
7
  RUN python3 -m pip install --no-cache-dir --upgrade pip
8
  RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
9
- # RUN pip install poppler-utils
10
 
11
  COPY . .
12
 
 
6
 
7
  RUN python3 -m pip install --no-cache-dir --upgrade pip
8
  RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
 
9
 
10
  COPY . .
11
 
app1.ipynb CHANGED
@@ -9,21 +9,21 @@
9
  "tags": []
10
  },
11
  "source": [
12
- "# LangChain QA Panel App\n",
13
  "\n",
14
  "This notebook shows how to make this app:"
15
  ]
16
  },
17
  {
18
  "cell_type": "code",
19
- "execution_count": null,
20
  "id": "a181568b-9cde-4a55-a853-4d2a41dbfdad",
21
  "metadata": {
22
  "colab": {
23
  "base_uri": "https://localhost:8080/"
24
  },
25
  "id": "a181568b-9cde-4a55-a853-4d2a41dbfdad",
26
- "outputId": "e8015c21-ef2d-406a-d705-d3520d08d8c2",
27
  "tags": []
28
  },
29
  "outputs": [
@@ -103,7 +103,32 @@
103
  },
104
  {
105
  "cell_type": "code",
106
- "execution_count": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  "id": "9a464409-d064-4766-a9cb-5119f6c4b8f5",
108
  "metadata": {
109
  "id": "9a464409-d064-4766-a9cb-5119f6c4b8f5",
@@ -113,13 +138,12 @@
113
  "source": [
114
  "import panel as pn\n",
115
  "from transformers import pipeline\n",
116
- "from pdf2image import convert_from_path\n",
117
- "import easyocr"
118
  ]
119
  },
120
  {
121
  "cell_type": "code",
122
- "execution_count": null,
123
  "id": "b2d07ea5-9ff2-4c96-a8dc-92895d870b73",
124
  "metadata": {
125
  "colab": {
@@ -127,13 +151,13 @@
127
  "height": 17
128
  },
129
  "id": "b2d07ea5-9ff2-4c96-a8dc-92895d870b73",
130
- "outputId": "59d39aa7-4a42-4bd3-a17c-a785456b901a",
131
  "tags": []
132
  },
133
  "outputs": [
134
  {
135
  "data": {
136
- "application/javascript": "(function(root) {\n function now() {\n return new Date();\n }\n\n var force = true;\n\n if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n root._bokeh_onload_callbacks = [];\n root._bokeh_is_loading = undefined;\n }\n\n if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, js_modules, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n if (js_modules == null) js_modules = [];\n\n root._bokeh_onload_callbacks.push(callback);\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls.length === 0 && js_modules.length === 0) {\n run_callbacks();\n return null;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n\n function on_error() {\n console.error(\"failed to load \" + url);\n }\n\n for (var i = 0; i < css_urls.length; i++) {\n var url = css_urls[i];\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n }\n\n var skip = [];\n if (window.requirejs) {\n window.requirejs.config({'packages': {}, 'paths': {'Quill': 'https://cdn.quilljs.com/1.3.6/quill', 'gridstack': 'https://cdn.jsdelivr.net/npm/gridstack@4.2.5/dist/gridstack-h5', 'notyf': 'https://cdn.jsdelivr.net/npm/notyf@3/notyf.min'}, 'shim': {'gridstack': {'exports': 'GridStack'}}});\n require([\"Quill\"], function(Quill) {\n\twindow.Quill = Quill\n\ton_load()\n })\n require([\"gridstack\"], function(GridStack) {\n\twindow.GridStack = GridStack\n\ton_load()\n })\n require([\"notyf\"], function() {\n\ton_load()\n })\n root._bokeh_is_loading = css_urls.length + 3;\n } else {\n root._bokeh_is_loading = css_urls.length + js_urls.length + js_modules.length;\n } if (((window['Quill'] !== undefined) && (!(window['Quill'] instanceof HTMLElement))) || window.requirejs) {\n var urls = ['https://cdn.holoviz.org/panel/0.14.4/dist/bundled/quillinput/1.3.6/quill.js'];\n for (var i = 0; i < urls.length; i++) {\n skip.push(urls[i])\n }\n } if (((window['GridStack'] !== undefined) && (!(window['GridStack'] instanceof HTMLElement))) || window.requirejs) {\n var urls = ['https://cdn.holoviz.org/panel/0.14.4/dist/bundled/gridstack/gridstack@4.2.5/dist/gridstack-h5.js'];\n for (var i = 0; i < urls.length; i++) {\n skip.push(urls[i])\n }\n } if (((window['Notyf'] !== undefined) && (!(window['Notyf'] instanceof HTMLElement))) || window.requirejs) {\n var urls = ['https://cdn.holoviz.org/panel/0.14.4/dist/bundled/notificationarea/notyf@3/notyf.min.js'];\n for (var i = 0; i < urls.length; i++) {\n skip.push(urls[i])\n }\n } for (var i = 0; i < js_urls.length; i++) {\n var url = js_urls[i];\n if (skip.indexOf(url) >= 0) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n for (var i = 0; i < js_modules.length; i++) {\n var url = js_modules[i];\n if (skip.indexOf(url) >= 0) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n element.type = \"module\";\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n if (!js_urls.length && !js_modules.length) {\n on_load()\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n var js_urls = [\"https://cdn.holoviz.org/panel/0.14.4/dist/bundled/quillinput/1.3.6/quill.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-2.4.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-2.4.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-2.4.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-2.4.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-mathjax-2.4.3.min.js\", \"https://unpkg.com/@holoviz/panel@0.14.4/dist/panel.min.js\"];\n var js_modules = [];\n var css_urls = [\"https://cdn.holoviz.org/panel/0.14.4/dist/bundled/quillinput/1.3.6/quill.bubble.css\", \"https://cdn.holoviz.org/panel/0.14.4/dist/bundled/quillinput/1.3.6/quill.snow.css\", \"https://cdn.holoviz.org/panel/0.14.4/dist/css/loading.css\", \"https://cdn.holoviz.org/panel/0.14.4/dist/css/json.css\", \"https://cdn.holoviz.org/panel/0.14.4/dist/css/alerts.css\", \"https://cdn.holoviz.org/panel/0.14.4/dist/css/dataframe.css\", \"https://cdn.holoviz.org/panel/0.14.4/dist/css/card.css\", \"https://cdn.holoviz.org/panel/0.14.4/dist/css/widgets.css\", \"https://cdn.holoviz.org/panel/0.14.4/dist/css/debugger.css\", \"https://cdn.holoviz.org/panel/0.14.4/dist/css/markdown.css\"];\n var inline_js = [ function(Bokeh) {\n inject_raw_css(\"\\n .bk.pn-loading.arc:before {\\n background-image: url(\\\"data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHN0eWxlPSJtYXJnaW46IGF1dG87IGJhY2tncm91bmQ6IG5vbmU7IGRpc3BsYXk6IGJsb2NrOyBzaGFwZS1yZW5kZXJpbmc6IGF1dG87IiB2aWV3Qm94PSIwIDAgMTAwIDEwMCIgcHJlc2VydmVBc3BlY3RSYXRpbz0ieE1pZFlNaWQiPiAgPGNpcmNsZSBjeD0iNTAiIGN5PSI1MCIgZmlsbD0ibm9uZSIgc3Ryb2tlPSIjYzNjM2MzIiBzdHJva2Utd2lkdGg9IjEwIiByPSIzNSIgc3Ryb2tlLWRhc2hhcnJheT0iMTY0LjkzMzYxNDMxMzQ2NDE1IDU2Ljk3Nzg3MTQzNzgyMTM4Ij4gICAgPGFuaW1hdGVUcmFuc2Zvcm0gYXR0cmlidXRlTmFtZT0idHJhbnNmb3JtIiB0eXBlPSJyb3RhdGUiIHJlcGVhdENvdW50PSJpbmRlZmluaXRlIiBkdXI9IjFzIiB2YWx1ZXM9IjAgNTAgNTA7MzYwIDUwIDUwIiBrZXlUaW1lcz0iMDsxIj48L2FuaW1hdGVUcmFuc2Zvcm0+ICA8L2NpcmNsZT48L3N2Zz4=\\\");\\n background-size: auto calc(min(50%, 400px));\\n }\\n \");\n }, function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\nfunction(Bokeh) {} // ensure no trailing comma for IE\n ];\n\n function run_inline_js() {\n if ((root.Bokeh !== undefined) || (force === true)) {\n for (var i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }} else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n }\n }\n\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n run_inline_js();\n } else {\n load_libs(css_urls, js_urls, js_modules, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n}(window));",
137
  "application/vnd.holoviews_load.v0+json": ""
138
  },
139
  "metadata": {},
@@ -172,7 +196,7 @@
172
  },
173
  {
174
  "cell_type": "code",
175
- "execution_count": null,
176
  "id": "763db4d0-3436-41d3-8b0f-e66ce16468cd",
177
  "metadata": {
178
  "id": "763db4d0-3436-41d3-8b0f-e66ce16468cd",
@@ -194,7 +218,7 @@
194
  },
195
  {
196
  "cell_type": "code",
197
- "execution_count": null,
198
  "id": "9b83cc06-3401-498f-8f84-8a98370f3121",
199
  "metadata": {
200
  "id": "9b83cc06-3401-498f-8f84-8a98370f3121",
@@ -203,20 +227,13 @@
203
  "outputs": [],
204
  "source": [
205
  "def qa(file, query):\n",
206
- "\n",
207
- " images = convert_from_path(file)\n",
208
- " reader = easyocr.Reader(['en'])\n",
209
- " result = []\n",
210
- " for i in range(len(images)):\n",
211
- " # Save pages as images in the pdf\n",
212
- " images[i].save('page'+ str(i) +'.jpg', 'JPEG')\n",
213
- " x=str(i)\n",
214
- " t='page'+x+'.jpg'\n",
215
- " result.append(reader.readtext(t, detail = 0))\n",
216
- " text = \"\"\n",
217
- " for page in result:\n",
218
- " page_text = \" \".join(page)\n",
219
- " text += page_text\n",
220
  " model = pipeline(\"question-answering\", model='deepset/roberta-base-squad2')\n",
221
  " context = text\n",
222
  " result = model(question=query, context=context)\n",
@@ -226,7 +243,7 @@
226
  },
227
  {
228
  "cell_type": "code",
229
- "execution_count": null,
230
  "id": "60e1b3d3-c0d2-4260-ae0c-26b03f1b8824",
231
  "metadata": {
232
  "id": "60e1b3d3-c0d2-4260-ae0c-26b03f1b8824"
@@ -262,7 +279,7 @@
262
  },
263
  {
264
  "cell_type": "code",
265
- "execution_count": null,
266
  "id": "c3a70857-0b98-4f62-a9c0-b62ca42b474c",
267
  "metadata": {
268
  "id": "c3a70857-0b98-4f62-a9c0-b62ca42b474c",
@@ -278,7 +295,7 @@
278
  },
279
  {
280
  "cell_type": "code",
281
- "execution_count": null,
282
  "id": "228e2b42-b1ed-43af-b923-031a70241ab0",
283
  "metadata": {
284
  "id": "228e2b42-b1ed-43af-b923-031a70241ab0",
@@ -291,21 +308,21 @@
291
  },
292
  {
293
  "cell_type": "code",
294
- "execution_count": null,
295
  "id": "1b0ec253-2bcd-4f91-96d8-d8456e900a58",
296
  "metadata": {
297
  "colab": {
298
  "base_uri": "https://localhost:8080/",
299
- "height": 901
300
  },
301
  "id": "1b0ec253-2bcd-4f91-96d8-d8456e900a58",
302
- "outputId": "e43e7013-aa7c-4445-f4b6-9a411971f525",
303
  "tags": []
304
  },
305
  "outputs": [
306
  {
307
  "data": {
308
- "application/javascript": "(function(root) {\n function now() {\n return new Date();\n }\n\n var force = true;\n\n if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n root._bokeh_onload_callbacks = [];\n root._bokeh_is_loading = undefined;\n }\n\n if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, js_modules, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n if (js_modules == null) js_modules = [];\n\n root._bokeh_onload_callbacks.push(callback);\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls.length === 0 && js_modules.length === 0) {\n run_callbacks();\n return null;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n\n function on_error() {\n console.error(\"failed to load \" + url);\n }\n\n for (var i = 0; i < css_urls.length; i++) {\n var url = css_urls[i];\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n }\n\n var skip = [];\n if (window.requirejs) {\n window.requirejs.config({'packages': {}, 'paths': {'Quill': 'https://cdn.quilljs.com/1.3.6/quill', 'gridstack': 'https://cdn.jsdelivr.net/npm/gridstack@4.2.5/dist/gridstack-h5', 'notyf': 'https://cdn.jsdelivr.net/npm/notyf@3/notyf.min'}, 'shim': {'gridstack': {'exports': 'GridStack'}}});\n require([\"Quill\"], function(Quill) {\n\twindow.Quill = Quill\n\ton_load()\n })\n require([\"gridstack\"], function(GridStack) {\n\twindow.GridStack = GridStack\n\ton_load()\n })\n require([\"notyf\"], function() {\n\ton_load()\n })\n root._bokeh_is_loading = css_urls.length + 3;\n } else {\n root._bokeh_is_loading = css_urls.length + js_urls.length + js_modules.length;\n } if (((window['Quill'] !== undefined) && (!(window['Quill'] instanceof HTMLElement))) || window.requirejs) {\n var urls = ['https://cdn.holoviz.org/panel/0.14.4/dist/bundled/quillinput/1.3.6/quill.js'];\n for (var i = 0; i < urls.length; i++) {\n skip.push(urls[i])\n }\n } if (((window['GridStack'] !== undefined) && (!(window['GridStack'] instanceof HTMLElement))) || window.requirejs) {\n var urls = ['https://cdn.holoviz.org/panel/0.14.4/dist/bundled/gridstack/gridstack@4.2.5/dist/gridstack-h5.js'];\n for (var i = 0; i < urls.length; i++) {\n skip.push(urls[i])\n }\n } if (((window['Notyf'] !== undefined) && (!(window['Notyf'] instanceof HTMLElement))) || window.requirejs) {\n var urls = ['https://cdn.holoviz.org/panel/0.14.4/dist/bundled/notificationarea/notyf@3/notyf.min.js'];\n for (var i = 0; i < urls.length; i++) {\n skip.push(urls[i])\n }\n } for (var i = 0; i < js_urls.length; i++) {\n var url = js_urls[i];\n if (skip.indexOf(url) >= 0) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n for (var i = 0; i < js_modules.length; i++) {\n var url = js_modules[i];\n if (skip.indexOf(url) >= 0) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n element.type = \"module\";\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n if (!js_urls.length && !js_modules.length) {\n on_load()\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n var js_urls = [\"https://cdn.holoviz.org/panel/0.14.4/dist/bundled/quillinput/1.3.6/quill.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-2.4.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-2.4.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-2.4.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-2.4.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-mathjax-2.4.3.min.js\", \"https://unpkg.com/@holoviz/panel@0.14.4/dist/panel.min.js\"];\n var js_modules = [];\n var css_urls = [\"https://cdn.holoviz.org/panel/0.14.4/dist/bundled/quillinput/1.3.6/quill.bubble.css\", \"https://cdn.holoviz.org/panel/0.14.4/dist/bundled/quillinput/1.3.6/quill.snow.css\", \"https://cdn.holoviz.org/panel/0.14.4/dist/css/loading.css\", \"https://cdn.holoviz.org/panel/0.14.4/dist/css/json.css\", \"https://cdn.holoviz.org/panel/0.14.4/dist/css/alerts.css\", \"https://cdn.holoviz.org/panel/0.14.4/dist/css/dataframe.css\", \"https://cdn.holoviz.org/panel/0.14.4/dist/css/card.css\", \"https://cdn.holoviz.org/panel/0.14.4/dist/css/widgets.css\", \"https://cdn.holoviz.org/panel/0.14.4/dist/css/debugger.css\", \"https://cdn.holoviz.org/panel/0.14.4/dist/css/markdown.css\"];\n var inline_js = [ function(Bokeh) {\n inject_raw_css(\"\\n .bk.pn-loading.arc:before {\\n background-image: url(\\\"data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHN0eWxlPSJtYXJnaW46IGF1dG87IGJhY2tncm91bmQ6IG5vbmU7IGRpc3BsYXk6IGJsb2NrOyBzaGFwZS1yZW5kZXJpbmc6IGF1dG87IiB2aWV3Qm94PSIwIDAgMTAwIDEwMCIgcHJlc2VydmVBc3BlY3RSYXRpbz0ieE1pZFlNaWQiPiAgPGNpcmNsZSBjeD0iNTAiIGN5PSI1MCIgZmlsbD0ibm9uZSIgc3Ryb2tlPSIjYzNjM2MzIiBzdHJva2Utd2lkdGg9IjEwIiByPSIzNSIgc3Ryb2tlLWRhc2hhcnJheT0iMTY0LjkzMzYxNDMxMzQ2NDE1IDU2Ljk3Nzg3MTQzNzgyMTM4Ij4gICAgPGFuaW1hdGVUcmFuc2Zvcm0gYXR0cmlidXRlTmFtZT0idHJhbnNmb3JtIiB0eXBlPSJyb3RhdGUiIHJlcGVhdENvdW50PSJpbmRlZmluaXRlIiBkdXI9IjFzIiB2YWx1ZXM9IjAgNTAgNTA7MzYwIDUwIDUwIiBrZXlUaW1lcz0iMDsxIj48L2FuaW1hdGVUcmFuc2Zvcm0+ICA8L2NpcmNsZT48L3N2Zz4=\\\");\\n background-size: auto calc(min(50%, 400px));\\n }\\n \");\n }, function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\nfunction(Bokeh) {} // ensure no trailing comma for IE\n ];\n\n function run_inline_js() {\n if ((root.Bokeh !== undefined) || (force === true)) {\n for (var i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }} else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n }\n }\n\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n run_inline_js();\n } else {\n load_libs(css_urls, js_urls, js_modules, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n}(window));",
309
  "application/vnd.holoviews_load.v0+json": ""
310
  },
311
  "metadata": {},
@@ -336,8 +353,27 @@
336
  {
337
  "data": {
338
  "text/html": [
339
- "Answer: 40</br>\n",
340
- "Answer: Production, and Editing</br>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
341
  ]
342
  },
343
  "metadata": {},
@@ -347,13 +383,13 @@
347
  "data": {
348
  "application/vnd.holoviews_exec.v0+json": "",
349
  "text/html": [
350
- "<div id='1133'>\n",
351
- " <div class=\"bk-root\" id=\"5053dda3-ccbd-4ed8-ad08-edf8823c2531\" data-root-id=\"1133\"></div>\n",
352
  "</div>\n",
353
  "<script type=\"application/javascript\">(function(root) {\n",
354
  " function embed_document(root) {\n",
355
- " var docs_json = {\"679e668b-6bbb-419a-b35c-ed3bfdcc36d7\":{\"defs\":[{\"extends\":null,\"module\":null,\"name\":\"ReactiveHTML1\",\"overrides\":[],\"properties\":[]},{\"extends\":null,\"module\":null,\"name\":\"FlexBox1\",\"overrides\":[],\"properties\":[{\"default\":\"flex-start\",\"kind\":null,\"name\":\"align_content\"},{\"default\":\"flex-start\",\"kind\":null,\"name\":\"align_items\"},{\"default\":\"row\",\"kind\":null,\"name\":\"flex_direction\"},{\"default\":\"wrap\",\"kind\":null,\"name\":\"flex_wrap\"},{\"default\":\"flex-start\",\"kind\":null,\"name\":\"justify_content\"}]},{\"extends\":null,\"module\":null,\"name\":\"GridStack1\",\"overrides\":[],\"properties\":[{\"default\":\"warn\",\"kind\":null,\"name\":\"mode\"},{\"default\":null,\"kind\":null,\"name\":\"ncols\"},{\"default\":null,\"kind\":null,\"name\":\"nrows\"},{\"default\":true,\"kind\":null,\"name\":\"allow_resize\"},{\"default\":true,\"kind\":null,\"name\":\"allow_drag\"},{\"default\":[],\"kind\":null,\"name\":\"state\"}]},{\"extends\":null,\"module\":null,\"name\":\"click1\",\"overrides\":[],\"properties\":[{\"default\":\"\",\"kind\":null,\"name\":\"terminal_output\"},{\"default\":\"\",\"kind\":null,\"name\":\"debug_name\"},{\"default\":0,\"kind\":null,\"name\":\"clears\"}]},{\"extends\":null,\"module\":null,\"name\":\"NotificationAreaBase1\",\"overrides\":[],\"properties\":[{\"default\":\"bottom-right\",\"kind\":null,\"name\":\"position\"},{\"default\":0,\"kind\":null,\"name\":\"_clear\"}]},{\"extends\":null,\"module\":null,\"name\":\"NotificationArea1\",\"overrides\":[],\"properties\":[{\"default\":[],\"kind\":null,\"name\":\"notifications\"},{\"default\":\"bottom-right\",\"kind\":null,\"name\":\"position\"},{\"default\":0,\"kind\":null,\"name\":\"_clear\"},{\"default\":[{\"background\":\"#ffc107\",\"icon\":{\"className\":\"fas fa-exclamation-triangle\",\"color\":\"white\",\"tagName\":\"i\"},\"type\":\"warning\"},{\"background\":\"#007bff\",\"icon\":{\"className\":\"fas fa-info-circle\",\"color\":\"white\",\"tagName\":\"i\"},\"type\":\"info\"}],\"kind\":null,\"name\":\"types\"}]},{\"extends\":null,\"module\":null,\"name\":\"Notification\",\"overrides\":[],\"properties\":[{\"default\":null,\"kind\":null,\"name\":\"background\"},{\"default\":3000,\"kind\":null,\"name\":\"duration\"},{\"default\":null,\"kind\":null,\"name\":\"icon\"},{\"default\":\"\",\"kind\":null,\"name\":\"message\"},{\"default\":null,\"kind\":null,\"name\":\"notification_type\"},{\"default\":false,\"kind\":null,\"name\":\"_destroyed\"}]},{\"extends\":null,\"module\":null,\"name\":\"TemplateActions1\",\"overrides\":[],\"properties\":[{\"default\":0,\"kind\":null,\"name\":\"open_modal\"},{\"default\":0,\"kind\":null,\"name\":\"close_modal\"}]},{\"extends\":null,\"module\":null,\"name\":\"MaterialTemplateActions1\",\"overrides\":[],\"properties\":[{\"default\":0,\"kind\":null,\"name\":\"open_modal\"},{\"default\":0,\"kind\":null,\"name\":\"close_modal\"}]}],\"roots\":{\"references\":[{\"attributes\":{\"margin\":[5,10,5,10],\"min_width\":300,\"width\":300},\"id\":\"1136\",\"type\":\"FileInput\"},{\"attributes\":{\"children\":[{\"id\":\"1134\"},{\"id\":\"1135\"},{\"id\":\"1137\"},{\"id\":\"1141\"}],\"margin\":[0,0,0,0],\"name\":\"Column00327\",\"sizing_mode\":\"stretch_width\"},\"id\":\"1133\",\"type\":\"Column\"},{\"attributes\":{\"icon\":null,\"js_event_callbacks\":{\"button_click\":[{\"id\":\"1145\"}]},\"label\":\"Run\",\"margin\":[5,10,5,10],\"sizing_mode\":\"stretch_width\",\"subscribed_events\":[\"button_click\"]},\"id\":\"1144\",\"type\":\"Button\"},{\"attributes\":{\"css_classes\":[\"markdown\"],\"margin\":[5,5,5,5],\"name\":\"Markdown00321\",\"sizing_mode\":\"stretch_width\",\"text\":\"&lt;p&gt;&lt;em&gt;Output will show up here:&lt;/em&gt;&lt;/p&gt;\"},\"id\":\"1138\",\"type\":\"panel.models.markup.HTML\"},{\"attributes\":{\"children\":[{\"id\":\"1140\"}],\"margin\":[0,0,0,0],\"name\":\"Row00317\"},\"id\":\"1139\",\"type\":\"Row\"},{\"attributes\":{\"margin\":[15,15,15,15],\"min_height\":400,\"min_width\":575,\"name\":\"Column00319\",\"width\":575},\"id\":\"1140\",\"type\":\"Column\"},{\"attributes\":{\"args\":{\"bidirectional\":false,\"properties\":{\"event:button_click\":\"loading\"},\"source\":{\"id\":\"1144\"},\"target\":{\"id\":\"1139\"}},\"code\":\"\\n if ('event:button_click'.startsWith('event:')) {\\n var value = true\\n } else {\\n var value = source['event:button_click'];\\n value = value;\\n }\\n if (typeof value !== 'boolean' || source.labels !== ['Loading']) {\\n value = true\\n }\\n var css_classes = target.css_classes.slice()\\n var loading_css = ['pn-loading', 'arc']\\n if (value) {\\n for (var css of loading_css) {\\n if (!(css in css_classes)) {\\n css_classes.push(css)\\n }\\n }\\n } else {\\n for (var css of loading_css) {\\n var index = css_classes.indexOf(css)\\n if (index > -1) {\\n css_classes.splice(index, 1)\\n }\\n }\\n }\\n target['css_classes'] = css_classes\\n \",\"tags\":[[140543018023040,[null,\"event:button_click\"],[null,\"loading\"]]]},\"id\":\"1145\",\"type\":\"CustomJS\"},{\"attributes\":{\"children\":[{\"id\":\"1143\"},{\"id\":\"1144\"}],\"margin\":[5,5,5,5],\"name\":\"Column00311\",\"sizing_mode\":\"stretch_width\"},\"id\":\"1142\",\"type\":\"Column\"},{\"attributes\":{\"height\":160,\"margin\":[5,10,5,10],\"name\":\"\",\"placeholder\":\"Enter your questions here...\",\"sizing_mode\":\"stretch_width\",\"toolbar\":false},\"id\":\"1143\",\"type\":\"panel.models.quill.QuillInput\"},{\"attributes\":{\"children\":[{\"id\":\"1136\"}],\"margin\":[0,0,0,0],\"name\":\"Row00326\",\"sizing_mode\":\"stretch_width\"},\"id\":\"1135\",\"type\":\"Row\"},{\"attributes\":{\"children\":[{\"id\":\"1138\"},{\"id\":\"1139\"}],\"css_classes\":[\"panel-widget-box\",\"scrollable\"],\"margin\":[5,5,5,5],\"min_width\":630,\"name\":\"WidgetBox00323\",\"width\":630},\"id\":\"1137\",\"type\":\"Column\"},{\"attributes\":{\"children\":[{\"id\":\"1142\"}],\"margin\":[0,0,0,0],\"min_width\":630,\"name\":\"Row00312\",\"width\":630},\"id\":\"1141\",\"type\":\"Row\"},{\"attributes\":{\"css_classes\":[\"markdown\"],\"margin\":[5,5,5,5],\"name\":\"Markdown00324\",\"sizing_mode\":\"stretch_width\",\"text\":\"&lt;p&gt;Question Answering with your PDF file!&lt;/p&gt;\\n&lt;p&gt;1) Upload a PDF. &lt;/p&gt;\\n&lt;p&gt;2) Type a question and click &amp;ldquo;Run&amp;rdquo;.&lt;/p&gt;\"},\"id\":\"1134\",\"type\":\"panel.models.markup.HTML\"},{\"attributes\":{\"client_comm_id\":\"bff23f4e09c246f6b9231843dd08c833\",\"comm_id\":\"57e7a9bb3a6742cea3f970d832963ac2\",\"plot_id\":\"1133\"},\"id\":\"1146\",\"type\":\"panel.models.comm_manager.CommManager\"}],\"root_ids\":[\"1133\",\"1146\"]},\"title\":\"Bokeh Application\",\"version\":\"2.4.3\"}};\n",
356
- " var render_items = [{\"docid\":\"679e668b-6bbb-419a-b35c-ed3bfdcc36d7\",\"root_ids\":[\"1133\"],\"roots\":{\"1133\":\"5053dda3-ccbd-4ed8-ad08-edf8823c2531\"}}];\n",
357
  " root.Bokeh.embed.embed_items_notebook(docs_json, render_items);\n",
358
  " for (const render_item of render_items) {\n",
359
  " for (const root_id of render_item.root_ids) {\n",
@@ -398,10 +434,10 @@
398
  " [1] Button(name='Run', sizing_mode='stretch_width')"
399
  ]
400
  },
401
- "execution_count": 50,
402
  "metadata": {
403
  "application/vnd.holoviews_exec.v0+json": {
404
- "id": "1133"
405
  }
406
  },
407
  "output_type": "execute_result"
 
9
  "tags": []
10
  },
11
  "source": [
12
+ "# Hackathon Illuminati Question answering app\n",
13
  "\n",
14
  "This notebook shows how to make this app:"
15
  ]
16
  },
17
  {
18
  "cell_type": "code",
19
+ "execution_count": 38,
20
  "id": "a181568b-9cde-4a55-a853-4d2a41dbfdad",
21
  "metadata": {
22
  "colab": {
23
  "base_uri": "https://localhost:8080/"
24
  },
25
  "id": "a181568b-9cde-4a55-a853-4d2a41dbfdad",
26
+ "outputId": "0589dcc5-1cf0-42be-a29a-0f75e3bb877d",
27
  "tags": []
28
  },
29
  "outputs": [
 
103
  },
104
  {
105
  "cell_type": "code",
106
+ "execution_count": 39,
107
+ "id": "y_NPBrAUkFxS",
108
+ "metadata": {
109
+ "colab": {
110
+ "base_uri": "https://localhost:8080/"
111
+ },
112
+ "id": "y_NPBrAUkFxS",
113
+ "outputId": "2c418c3d-52a7-4f31-fac9-69da8c356571"
114
+ },
115
+ "outputs": [
116
+ {
117
+ "name": "stdout",
118
+ "output_type": "stream",
119
+ "text": [
120
+ "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
121
+ "Requirement already satisfied: PyPDF2 in /usr/local/lib/python3.10/dist-packages (3.0.1)\n"
122
+ ]
123
+ }
124
+ ],
125
+ "source": [
126
+ "# ! pip install PyPDF2"
127
+ ]
128
+ },
129
+ {
130
+ "cell_type": "code",
131
+ "execution_count": 40,
132
  "id": "9a464409-d064-4766-a9cb-5119f6c4b8f5",
133
  "metadata": {
134
  "id": "9a464409-d064-4766-a9cb-5119f6c4b8f5",
 
138
  "source": [
139
  "import panel as pn\n",
140
  "from transformers import pipeline\n",
141
+ "from PyPDF2 import PdfReader"
 
142
  ]
143
  },
144
  {
145
  "cell_type": "code",
146
+ "execution_count": 41,
147
  "id": "b2d07ea5-9ff2-4c96-a8dc-92895d870b73",
148
  "metadata": {
149
  "colab": {
 
151
  "height": 17
152
  },
153
  "id": "b2d07ea5-9ff2-4c96-a8dc-92895d870b73",
154
+ "outputId": "7d8183bc-c2f2-4e18-cd39-d9788d86c154",
155
  "tags": []
156
  },
157
  "outputs": [
158
  {
159
  "data": {
160
+ "application/javascript": "(function(root) {\n function now() {\n return new Date();\n }\n\n var force = true;\n\n if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n root._bokeh_onload_callbacks = [];\n root._bokeh_is_loading = undefined;\n }\n\n if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, js_modules, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n if (js_modules == null) js_modules = [];\n\n root._bokeh_onload_callbacks.push(callback);\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls.length === 0 && js_modules.length === 0) {\n run_callbacks();\n return null;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n\n function on_error() {\n console.error(\"failed to load \" + url);\n }\n\n for (var i = 0; i < css_urls.length; i++) {\n var url = css_urls[i];\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n }\n\n var skip = [];\n if (window.requirejs) {\n window.requirejs.config({'packages': {}, 'paths': {'Quill': 'https://cdn.quilljs.com/1.3.6/quill', 'gridstack': 'https://cdn.jsdelivr.net/npm/gridstack@4.2.5/dist/gridstack-h5', 'notyf': 'https://cdn.jsdelivr.net/npm/notyf@3/notyf.min'}, 'shim': {'gridstack': {'exports': 'GridStack'}}});\n require([\"Quill\"], function(Quill) {\n\twindow.Quill = Quill\n\ton_load()\n })\n require([\"gridstack\"], function(GridStack) {\n\twindow.GridStack = GridStack\n\ton_load()\n })\n require([\"notyf\"], function() {\n\ton_load()\n })\n root._bokeh_is_loading = css_urls.length + 3;\n } else {\n root._bokeh_is_loading = css_urls.length + js_urls.length + js_modules.length;\n } if (((window['Quill'] !== undefined) && (!(window['Quill'] instanceof HTMLElement))) || window.requirejs) {\n var urls = ['https://cdn.holoviz.org/panel/0.14.4/dist/bundled/quillinput/1.3.6/quill.js'];\n for (var i = 0; i < urls.length; i++) {\n skip.push(urls[i])\n }\n } if (((window['GridStack'] !== undefined) && (!(window['GridStack'] instanceof HTMLElement))) || window.requirejs) {\n var urls = ['https://cdn.holoviz.org/panel/0.14.4/dist/bundled/gridstack/gridstack@4.2.5/dist/gridstack-h5.js'];\n for (var i = 0; i < urls.length; i++) {\n skip.push(urls[i])\n }\n } if (((window['Notyf'] !== undefined) && (!(window['Notyf'] instanceof HTMLElement))) || window.requirejs) {\n var urls = ['https://cdn.holoviz.org/panel/0.14.4/dist/bundled/notificationarea/notyf@3/notyf.min.js'];\n for (var i = 0; i < urls.length; i++) {\n skip.push(urls[i])\n }\n } for (var i = 0; i < js_urls.length; i++) {\n var url = js_urls[i];\n if (skip.indexOf(url) >= 0) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n for (var i = 0; i < js_modules.length; i++) {\n var url = js_modules[i];\n if (skip.indexOf(url) >= 0) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n element.type = \"module\";\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n if (!js_urls.length && !js_modules.length) {\n on_load()\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n var js_urls = [\"https://cdn.holoviz.org/panel/0.14.4/dist/bundled/quillinput/1.3.6/quill.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-2.4.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-2.4.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-2.4.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-2.4.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-mathjax-2.4.3.min.js\", \"https://unpkg.com/@holoviz/panel@0.14.4/dist/panel.min.js\"];\n var js_modules = [];\n var css_urls = [\"https://cdn.holoviz.org/panel/0.14.4/dist/bundled/quillinput/1.3.6/quill.bubble.css\", \"https://cdn.holoviz.org/panel/0.14.4/dist/bundled/quillinput/1.3.6/quill.snow.css\", \"https://cdn.holoviz.org/panel/0.14.4/dist/css/widgets.css\", \"https://cdn.holoviz.org/panel/0.14.4/dist/css/alerts.css\", \"https://cdn.holoviz.org/panel/0.14.4/dist/css/markdown.css\", \"https://cdn.holoviz.org/panel/0.14.4/dist/css/card.css\", \"https://cdn.holoviz.org/panel/0.14.4/dist/css/loading.css\", \"https://cdn.holoviz.org/panel/0.14.4/dist/css/dataframe.css\", \"https://cdn.holoviz.org/panel/0.14.4/dist/css/json.css\", \"https://cdn.holoviz.org/panel/0.14.4/dist/css/debugger.css\"];\n var inline_js = [ function(Bokeh) {\n inject_raw_css(\"\\n .bk.pn-loading.arc:before {\\n background-image: url(\\\"data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHN0eWxlPSJtYXJnaW46IGF1dG87IGJhY2tncm91bmQ6IG5vbmU7IGRpc3BsYXk6IGJsb2NrOyBzaGFwZS1yZW5kZXJpbmc6IGF1dG87IiB2aWV3Qm94PSIwIDAgMTAwIDEwMCIgcHJlc2VydmVBc3BlY3RSYXRpbz0ieE1pZFlNaWQiPiAgPGNpcmNsZSBjeD0iNTAiIGN5PSI1MCIgZmlsbD0ibm9uZSIgc3Ryb2tlPSIjYzNjM2MzIiBzdHJva2Utd2lkdGg9IjEwIiByPSIzNSIgc3Ryb2tlLWRhc2hhcnJheT0iMTY0LjkzMzYxNDMxMzQ2NDE1IDU2Ljk3Nzg3MTQzNzgyMTM4Ij4gICAgPGFuaW1hdGVUcmFuc2Zvcm0gYXR0cmlidXRlTmFtZT0idHJhbnNmb3JtIiB0eXBlPSJyb3RhdGUiIHJlcGVhdENvdW50PSJpbmRlZmluaXRlIiBkdXI9IjFzIiB2YWx1ZXM9IjAgNTAgNTA7MzYwIDUwIDUwIiBrZXlUaW1lcz0iMDsxIj48L2FuaW1hdGVUcmFuc2Zvcm0+ICA8L2NpcmNsZT48L3N2Zz4=\\\");\\n background-size: auto calc(min(50%, 400px));\\n }\\n \");\n }, function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\nfunction(Bokeh) {} // ensure no trailing comma for IE\n ];\n\n function run_inline_js() {\n if ((root.Bokeh !== undefined) || (force === true)) {\n for (var i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }} else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n }\n }\n\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n run_inline_js();\n } else {\n load_libs(css_urls, js_urls, js_modules, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n}(window));",
161
  "application/vnd.holoviews_load.v0+json": ""
162
  },
163
  "metadata": {},
 
196
  },
197
  {
198
  "cell_type": "code",
199
+ "execution_count": 42,
200
  "id": "763db4d0-3436-41d3-8b0f-e66ce16468cd",
201
  "metadata": {
202
  "id": "763db4d0-3436-41d3-8b0f-e66ce16468cd",
 
218
  },
219
  {
220
  "cell_type": "code",
221
+ "execution_count": 43,
222
  "id": "9b83cc06-3401-498f-8f84-8a98370f3121",
223
  "metadata": {
224
  "id": "9b83cc06-3401-498f-8f84-8a98370f3121",
 
227
  "outputs": [],
228
  "source": [
229
  "def qa(file, query):\n",
230
+ " text = ''\n",
231
+ " reader = PdfReader(file)\n",
232
+ " for i in range(len(reader.pages)):\n",
233
+ " # creating a page object\n",
234
+ " pageObj = reader.pages[i]\n",
235
+ " # extracting text from page\n",
236
+ " text=text+pageObj.extract_text()\n",
 
 
 
 
 
 
 
237
  " model = pipeline(\"question-answering\", model='deepset/roberta-base-squad2')\n",
238
  " context = text\n",
239
  " result = model(question=query, context=context)\n",
 
243
  },
244
  {
245
  "cell_type": "code",
246
+ "execution_count": 44,
247
  "id": "60e1b3d3-c0d2-4260-ae0c-26b03f1b8824",
248
  "metadata": {
249
  "id": "60e1b3d3-c0d2-4260-ae0c-26b03f1b8824"
 
279
  },
280
  {
281
  "cell_type": "code",
282
+ "execution_count": 45,
283
  "id": "c3a70857-0b98-4f62-a9c0-b62ca42b474c",
284
  "metadata": {
285
  "id": "c3a70857-0b98-4f62-a9c0-b62ca42b474c",
 
295
  },
296
  {
297
  "cell_type": "code",
298
+ "execution_count": 46,
299
  "id": "228e2b42-b1ed-43af-b923-031a70241ab0",
300
  "metadata": {
301
  "id": "228e2b42-b1ed-43af-b923-031a70241ab0",
 
308
  },
309
  {
310
  "cell_type": "code",
311
+ "execution_count": 47,
312
  "id": "1b0ec253-2bcd-4f91-96d8-d8456e900a58",
313
  "metadata": {
314
  "colab": {
315
  "base_uri": "https://localhost:8080/",
316
+ "height": 1000
317
  },
318
  "id": "1b0ec253-2bcd-4f91-96d8-d8456e900a58",
319
+ "outputId": "f7cee6f1-c6a7-438c-ec93-8c41c9fa6665",
320
  "tags": []
321
  },
322
  "outputs": [
323
  {
324
  "data": {
325
+ "application/javascript": "(function(root) {\n function now() {\n return new Date();\n }\n\n var force = true;\n\n if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n root._bokeh_onload_callbacks = [];\n root._bokeh_is_loading = undefined;\n }\n\n if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, js_modules, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n if (js_modules == null) js_modules = [];\n\n root._bokeh_onload_callbacks.push(callback);\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls.length === 0 && js_modules.length === 0) {\n run_callbacks();\n return null;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n\n function on_error() {\n console.error(\"failed to load \" + url);\n }\n\n for (var i = 0; i < css_urls.length; i++) {\n var url = css_urls[i];\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n }\n\n var skip = [];\n if (window.requirejs) {\n window.requirejs.config({'packages': {}, 'paths': {'Quill': 'https://cdn.quilljs.com/1.3.6/quill', 'gridstack': 'https://cdn.jsdelivr.net/npm/gridstack@4.2.5/dist/gridstack-h5', 'notyf': 'https://cdn.jsdelivr.net/npm/notyf@3/notyf.min'}, 'shim': {'gridstack': {'exports': 'GridStack'}}});\n require([\"Quill\"], function(Quill) {\n\twindow.Quill = Quill\n\ton_load()\n })\n require([\"gridstack\"], function(GridStack) {\n\twindow.GridStack = GridStack\n\ton_load()\n })\n require([\"notyf\"], function() {\n\ton_load()\n })\n root._bokeh_is_loading = css_urls.length + 3;\n } else {\n root._bokeh_is_loading = css_urls.length + js_urls.length + js_modules.length;\n } if (((window['Quill'] !== undefined) && (!(window['Quill'] instanceof HTMLElement))) || window.requirejs) {\n var urls = ['https://cdn.holoviz.org/panel/0.14.4/dist/bundled/quillinput/1.3.6/quill.js'];\n for (var i = 0; i < urls.length; i++) {\n skip.push(urls[i])\n }\n } if (((window['GridStack'] !== undefined) && (!(window['GridStack'] instanceof HTMLElement))) || window.requirejs) {\n var urls = ['https://cdn.holoviz.org/panel/0.14.4/dist/bundled/gridstack/gridstack@4.2.5/dist/gridstack-h5.js'];\n for (var i = 0; i < urls.length; i++) {\n skip.push(urls[i])\n }\n } if (((window['Notyf'] !== undefined) && (!(window['Notyf'] instanceof HTMLElement))) || window.requirejs) {\n var urls = ['https://cdn.holoviz.org/panel/0.14.4/dist/bundled/notificationarea/notyf@3/notyf.min.js'];\n for (var i = 0; i < urls.length; i++) {\n skip.push(urls[i])\n }\n } for (var i = 0; i < js_urls.length; i++) {\n var url = js_urls[i];\n if (skip.indexOf(url) >= 0) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n for (var i = 0; i < js_modules.length; i++) {\n var url = js_modules[i];\n if (skip.indexOf(url) >= 0) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n element.type = \"module\";\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n if (!js_urls.length && !js_modules.length) {\n on_load()\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n var js_urls = [\"https://cdn.holoviz.org/panel/0.14.4/dist/bundled/quillinput/1.3.6/quill.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-2.4.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-2.4.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-2.4.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-2.4.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-mathjax-2.4.3.min.js\", \"https://unpkg.com/@holoviz/panel@0.14.4/dist/panel.min.js\"];\n var js_modules = [];\n var css_urls = [\"https://cdn.holoviz.org/panel/0.14.4/dist/bundled/quillinput/1.3.6/quill.bubble.css\", \"https://cdn.holoviz.org/panel/0.14.4/dist/bundled/quillinput/1.3.6/quill.snow.css\", \"https://cdn.holoviz.org/panel/0.14.4/dist/css/widgets.css\", \"https://cdn.holoviz.org/panel/0.14.4/dist/css/alerts.css\", \"https://cdn.holoviz.org/panel/0.14.4/dist/css/markdown.css\", \"https://cdn.holoviz.org/panel/0.14.4/dist/css/card.css\", \"https://cdn.holoviz.org/panel/0.14.4/dist/css/loading.css\", \"https://cdn.holoviz.org/panel/0.14.4/dist/css/dataframe.css\", \"https://cdn.holoviz.org/panel/0.14.4/dist/css/json.css\", \"https://cdn.holoviz.org/panel/0.14.4/dist/css/debugger.css\"];\n var inline_js = [ function(Bokeh) {\n inject_raw_css(\"\\n .bk.pn-loading.arc:before {\\n background-image: url(\\\"data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHN0eWxlPSJtYXJnaW46IGF1dG87IGJhY2tncm91bmQ6IG5vbmU7IGRpc3BsYXk6IGJsb2NrOyBzaGFwZS1yZW5kZXJpbmc6IGF1dG87IiB2aWV3Qm94PSIwIDAgMTAwIDEwMCIgcHJlc2VydmVBc3BlY3RSYXRpbz0ieE1pZFlNaWQiPiAgPGNpcmNsZSBjeD0iNTAiIGN5PSI1MCIgZmlsbD0ibm9uZSIgc3Ryb2tlPSIjYzNjM2MzIiBzdHJva2Utd2lkdGg9IjEwIiByPSIzNSIgc3Ryb2tlLWRhc2hhcnJheT0iMTY0LjkzMzYxNDMxMzQ2NDE1IDU2Ljk3Nzg3MTQzNzgyMTM4Ij4gICAgPGFuaW1hdGVUcmFuc2Zvcm0gYXR0cmlidXRlTmFtZT0idHJhbnNmb3JtIiB0eXBlPSJyb3RhdGUiIHJlcGVhdENvdW50PSJpbmRlZmluaXRlIiBkdXI9IjFzIiB2YWx1ZXM9IjAgNTAgNTA7MzYwIDUwIDUwIiBrZXlUaW1lcz0iMDsxIj48L2FuaW1hdGVUcmFuc2Zvcm0+ICA8L2NpcmNsZT48L3N2Zz4=\\\");\\n background-size: auto calc(min(50%, 400px));\\n }\\n \");\n }, function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\nfunction(Bokeh) {} // ensure no trailing comma for IE\n ];\n\n function run_inline_js() {\n if ((root.Bokeh !== undefined) || (force === true)) {\n for (var i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }} else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n }\n }\n\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n run_inline_js();\n } else {\n load_libs(css_urls, js_urls, js_modules, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n}(window));",
326
  "application/vnd.holoviews_load.v0+json": ""
327
  },
328
  "metadata": {},
 
353
  {
354
  "data": {
355
  "text/html": [
356
+ "Beyond the Buzz - Assignment 2</br>\n",
357
+ "Rishi Agarwal, Shivam Sharma, Varun Tokas</br>\n",
358
+ "May 2023</br>\n",
359
+ "1 MNIST Classifier</br>\n",
360
+ "You’ll be making a GAN to generate pictures of random digits, which is trained on the MNIST dataset. You</br>\n",
361
+ "are allowed to use pytorch, or any other framework you want.</br>\n",
362
+ "Here are some instructions to get started-</br>\n",
363
+ "•In a file, say models.py, define your Generator and Discriminator models. Keep them simple for this</br>\n",
364
+ "task, no more than 2 hidden layers are required. Keep the input and output of the models according</br>\n",
365
+ "to how I described today.</br>\n",
366
+ "•In another file, say train.py, import the MNIST data using a torchvision library. Before importing,</br>\n",
367
+ "also define the transform which normalizes the MNIST data with 0.5 mean and 0.5 std. deviation.</br>\n",
368
+ "•After getting the normalized dataset, create two separate Adam optmizers for the Generator and the</br>\n",
369
+ "Discriminator with a suitable learning rate.</br>\n",
370
+ "•Use the nn.BCELoss as your criterion, read its documentation and think about how you would generate</br>\n",
371
+ "the loss functions for the Generator and Discriminator from this.</br>\n",
372
+ "•Set up the training loop with a suitable number of epochs.</br>\n",
373
+ "For the evaluation, keep a directory with a the images your generator as examples, also document the</br>\n",
374
+ "code in the README with the instructions on how it works and how to run it.</br>\n",
375
+ "1</br>\n",
376
+ "Answer: nn.BCELoss</br>"
377
  ]
378
  },
379
  "metadata": {},
 
383
  "data": {
384
  "application/vnd.holoviews_exec.v0+json": "",
385
  "text/html": [
386
+ "<div id='1146'>\n",
387
+ " <div class=\"bk-root\" id=\"030d8e6c-df3a-4aa5-b947-e57e47190711\" data-root-id=\"1146\"></div>\n",
388
  "</div>\n",
389
  "<script type=\"application/javascript\">(function(root) {\n",
390
  " function embed_document(root) {\n",
391
+ " var docs_json = {\"b093a904-94d6-4792-9831-2329420647aa\":{\"defs\":[{\"extends\":null,\"module\":null,\"name\":\"ReactiveHTML1\",\"overrides\":[],\"properties\":[]},{\"extends\":null,\"module\":null,\"name\":\"FlexBox1\",\"overrides\":[],\"properties\":[{\"default\":\"flex-start\",\"kind\":null,\"name\":\"align_content\"},{\"default\":\"flex-start\",\"kind\":null,\"name\":\"align_items\"},{\"default\":\"row\",\"kind\":null,\"name\":\"flex_direction\"},{\"default\":\"wrap\",\"kind\":null,\"name\":\"flex_wrap\"},{\"default\":\"flex-start\",\"kind\":null,\"name\":\"justify_content\"}]},{\"extends\":null,\"module\":null,\"name\":\"GridStack1\",\"overrides\":[],\"properties\":[{\"default\":\"warn\",\"kind\":null,\"name\":\"mode\"},{\"default\":null,\"kind\":null,\"name\":\"ncols\"},{\"default\":null,\"kind\":null,\"name\":\"nrows\"},{\"default\":true,\"kind\":null,\"name\":\"allow_resize\"},{\"default\":true,\"kind\":null,\"name\":\"allow_drag\"},{\"default\":[],\"kind\":null,\"name\":\"state\"}]},{\"extends\":null,\"module\":null,\"name\":\"click1\",\"overrides\":[],\"properties\":[{\"default\":\"\",\"kind\":null,\"name\":\"terminal_output\"},{\"default\":\"\",\"kind\":null,\"name\":\"debug_name\"},{\"default\":0,\"kind\":null,\"name\":\"clears\"}]},{\"extends\":null,\"module\":null,\"name\":\"NotificationAreaBase1\",\"overrides\":[],\"properties\":[{\"default\":\"bottom-right\",\"kind\":null,\"name\":\"position\"},{\"default\":0,\"kind\":null,\"name\":\"_clear\"}]},{\"extends\":null,\"module\":null,\"name\":\"NotificationArea1\",\"overrides\":[],\"properties\":[{\"default\":[],\"kind\":null,\"name\":\"notifications\"},{\"default\":\"bottom-right\",\"kind\":null,\"name\":\"position\"},{\"default\":0,\"kind\":null,\"name\":\"_clear\"},{\"default\":[{\"background\":\"#ffc107\",\"icon\":{\"className\":\"fas fa-exclamation-triangle\",\"color\":\"white\",\"tagName\":\"i\"},\"type\":\"warning\"},{\"background\":\"#007bff\",\"icon\":{\"className\":\"fas fa-info-circle\",\"color\":\"white\",\"tagName\":\"i\"},\"type\":\"info\"}],\"kind\":null,\"name\":\"types\"}]},{\"extends\":null,\"module\":null,\"name\":\"Notification\",\"overrides\":[],\"properties\":[{\"default\":null,\"kind\":null,\"name\":\"background\"},{\"default\":3000,\"kind\":null,\"name\":\"duration\"},{\"default\":null,\"kind\":null,\"name\":\"icon\"},{\"default\":\"\",\"kind\":null,\"name\":\"message\"},{\"default\":null,\"kind\":null,\"name\":\"notification_type\"},{\"default\":false,\"kind\":null,\"name\":\"_destroyed\"}]},{\"extends\":null,\"module\":null,\"name\":\"TemplateActions1\",\"overrides\":[],\"properties\":[{\"default\":0,\"kind\":null,\"name\":\"open_modal\"},{\"default\":0,\"kind\":null,\"name\":\"close_modal\"}]},{\"extends\":null,\"module\":null,\"name\":\"MaterialTemplateActions1\",\"overrides\":[],\"properties\":[{\"default\":0,\"kind\":null,\"name\":\"open_modal\"},{\"default\":0,\"kind\":null,\"name\":\"close_modal\"}]}],\"roots\":{\"references\":[{\"attributes\":{\"children\":[{\"id\":\"1149\"}],\"margin\":[0,0,0,0],\"name\":\"Row00299\",\"sizing_mode\":\"stretch_width\"},\"id\":\"1148\",\"type\":\"Row\"},{\"attributes\":{\"children\":[{\"id\":\"1156\"},{\"id\":\"1157\"}],\"margin\":[5,5,5,5],\"name\":\"Column00284\",\"sizing_mode\":\"stretch_width\"},\"id\":\"1155\",\"type\":\"Column\"},{\"attributes\":{\"css_classes\":[\"markdown\"],\"margin\":[5,5,5,5],\"name\":\"Markdown00294\",\"sizing_mode\":\"stretch_width\",\"text\":\"&lt;p&gt;&lt;em&gt;Output will show up here:&lt;/em&gt;&lt;/p&gt;\"},\"id\":\"1151\",\"type\":\"panel.models.markup.HTML\"},{\"attributes\":{\"margin\":[5,10,5,10],\"min_width\":300,\"width\":300},\"id\":\"1149\",\"type\":\"FileInput\"},{\"attributes\":{\"children\":[{\"id\":\"1153\"}],\"margin\":[0,0,0,0],\"name\":\"Row00290\"},\"id\":\"1152\",\"type\":\"Row\"},{\"attributes\":{\"icon\":null,\"js_event_callbacks\":{\"button_click\":[{\"id\":\"1158\"}]},\"label\":\"Run\",\"margin\":[5,10,5,10],\"sizing_mode\":\"stretch_width\",\"subscribed_events\":[\"button_click\"]},\"id\":\"1157\",\"type\":\"Button\"},{\"attributes\":{\"css_classes\":[\"markdown\"],\"margin\":[5,5,5,5],\"name\":\"Markdown00297\",\"sizing_mode\":\"stretch_width\",\"text\":\"&lt;p&gt;Question Answering with your PDF file!&lt;/p&gt;\\n&lt;p&gt;1) Upload a PDF. &lt;/p&gt;\\n&lt;p&gt;2) Type a question and click &amp;ldquo;Run&amp;rdquo;.&lt;/p&gt;\"},\"id\":\"1147\",\"type\":\"panel.models.markup.HTML\"},{\"attributes\":{\"args\":{\"bidirectional\":false,\"properties\":{\"event:button_click\":\"loading\"},\"source\":{\"id\":\"1157\"},\"target\":{\"id\":\"1152\"}},\"code\":\"\\n if ('event:button_click'.startsWith('event:')) {\\n var value = true\\n } else {\\n var value = source['event:button_click'];\\n value = value;\\n }\\n if (typeof value !== 'boolean' || source.labels !== ['Loading']) {\\n value = true\\n }\\n var css_classes = target.css_classes.slice()\\n var loading_css = ['pn-loading', 'arc']\\n if (value) {\\n for (var css of loading_css) {\\n if (!(css in css_classes)) {\\n css_classes.push(css)\\n }\\n }\\n } else {\\n for (var css of loading_css) {\\n var index = css_classes.indexOf(css)\\n if (index > -1) {\\n css_classes.splice(index, 1)\\n }\\n }\\n }\\n target['css_classes'] = css_classes\\n \",\"tags\":[[139650297305440,[null,\"event:button_click\"],[null,\"loading\"]]]},\"id\":\"1158\",\"type\":\"CustomJS\"},{\"attributes\":{\"height\":160,\"margin\":[5,10,5,10],\"name\":\"\",\"placeholder\":\"Enter your questions here...\",\"sizing_mode\":\"stretch_width\",\"toolbar\":false},\"id\":\"1156\",\"type\":\"panel.models.quill.QuillInput\"},{\"attributes\":{\"children\":[{\"id\":\"1147\"},{\"id\":\"1148\"},{\"id\":\"1150\"},{\"id\":\"1154\"}],\"margin\":[0,0,0,0],\"name\":\"Column00300\",\"sizing_mode\":\"stretch_width\"},\"id\":\"1146\",\"type\":\"Column\"},{\"attributes\":{\"client_comm_id\":\"f482e637a4c745e993e44237b18d5906\",\"comm_id\":\"b2d2810fcd4a4ab58af405656b7363d2\",\"plot_id\":\"1146\"},\"id\":\"1159\",\"type\":\"panel.models.comm_manager.CommManager\"},{\"attributes\":{\"margin\":[15,15,15,15],\"min_height\":400,\"min_width\":575,\"name\":\"Column00292\",\"width\":575},\"id\":\"1153\",\"type\":\"Column\"},{\"attributes\":{\"children\":[{\"id\":\"1151\"},{\"id\":\"1152\"}],\"css_classes\":[\"panel-widget-box\",\"scrollable\"],\"margin\":[5,5,5,5],\"min_width\":630,\"name\":\"WidgetBox00296\",\"width\":630},\"id\":\"1150\",\"type\":\"Column\"},{\"attributes\":{\"children\":[{\"id\":\"1155\"}],\"margin\":[0,0,0,0],\"min_width\":630,\"name\":\"Row00285\",\"width\":630},\"id\":\"1154\",\"type\":\"Row\"}],\"root_ids\":[\"1146\",\"1159\"]},\"title\":\"Bokeh Application\",\"version\":\"2.4.3\"}};\n",
392
+ " var render_items = [{\"docid\":\"b093a904-94d6-4792-9831-2329420647aa\",\"root_ids\":[\"1146\"],\"roots\":{\"1146\":\"030d8e6c-df3a-4aa5-b947-e57e47190711\"}}];\n",
393
  " root.Bokeh.embed.embed_items_notebook(docs_json, render_items);\n",
394
  " for (const render_item of render_items) {\n",
395
  " for (const root_id of render_item.root_ids) {\n",
 
434
  " [1] Button(name='Run', sizing_mode='stretch_width')"
435
  ]
436
  },
437
+ "execution_count": 47,
438
  "metadata": {
439
  "application/vnd.holoviews_exec.v0+json": {
440
+ "id": "1146"
441
  }
442
  },
443
  "output_type": "execute_result"
requirements.txt CHANGED
@@ -1,6 +1,4 @@
1
  panel
2
  transformers
3
- pdf2image
4
- easyocr
5
  notebook
6
- poppler-utils
 
1
  panel
2
  transformers
3
+ PyPDF2
 
4
  notebook