diff --git "a/MultimodalRAG.ipynb" "b/MultimodalRAG.ipynb" new file mode 100644--- /dev/null +++ "b/MultimodalRAG.ipynb" @@ -0,0 +1,2593 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "gpuType": "T4" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU", + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "f00c572439414e4b9a960175242d0be5": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_95907a58c9fc4e758976918df3e4d696", + "IPY_MODEL_1ef85a4dbe5643df93666b95a1810e8c", + "IPY_MODEL_1328c7e39383433eac1cb04515abe8c1" + ], + "layout": "IPY_MODEL_d7f200d9b80b4b0497011d8a4d8ccbaa" + } + }, + "95907a58c9fc4e758976918df3e4d696": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_6bb350105f8d4b1e9730182587884f54", + "placeholder": "​", + "style": "IPY_MODEL_d3684c184dd547ba8afdcfd3dc35b5cb", + "value": "yolox_l0.05.onnx: 100%" + } + }, + "1ef85a4dbe5643df93666b95a1810e8c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_195d818eeef8465281cc24642b05607e", + "max": 216625723, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_06d74a0dbf96445fa948ce710581c94b", + "value": 216625723 + } + }, + "1328c7e39383433eac1cb04515abe8c1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9449f30a8c0546b1b4111b743ce8034e", + "placeholder": "​", + "style": "IPY_MODEL_4aeac8542a9f43d880ddce30cb384ab0", + "value": " 217M/217M [00:03<00:00, 73.2MB/s]" + } + }, + "d7f200d9b80b4b0497011d8a4d8ccbaa": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6bb350105f8d4b1e9730182587884f54": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d3684c184dd547ba8afdcfd3dc35b5cb": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "195d818eeef8465281cc24642b05607e": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "06d74a0dbf96445fa948ce710581c94b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "9449f30a8c0546b1b4111b743ce8034e": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4aeac8542a9f43d880ddce30cb384ab0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "e9ee42809e1d4ded9a45b6965b9b7a7a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_e246be046226454e8315e0b988e68358", + "IPY_MODEL_a707ee518c87430d82f12bc1ae7da4b0", + "IPY_MODEL_c0e6a8eb79d8424e83db0b2796e167c3" + ], + "layout": "IPY_MODEL_a848b8f793b440d797c6576fe3d7824a" + } + }, + "e246be046226454e8315e0b988e68358": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0a4e247fba624be8bf247a591f54fccf", + "placeholder": "​", + "style": "IPY_MODEL_233654916ea543feae69e7c81eb7828a", + "value": "config.json: 100%" + } + }, + "a707ee518c87430d82f12bc1ae7da4b0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_44087e28225f4277b82f32b8757405c9", + "max": 1469, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_4ac1c050832d4e70b2c970751852b4d1", + "value": 1469 + } + }, + "c0e6a8eb79d8424e83db0b2796e167c3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ac99b0e7854749b786b7db3efe11418d", + "placeholder": "​", + "style": "IPY_MODEL_49f14e4282804abbbbf08ddab5151622", + "value": " 1.47k/1.47k [00:00<00:00, 79.8kB/s]" + } + }, + "a848b8f793b440d797c6576fe3d7824a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0a4e247fba624be8bf247a591f54fccf": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "233654916ea543feae69e7c81eb7828a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "44087e28225f4277b82f32b8757405c9": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4ac1c050832d4e70b2c970751852b4d1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "ac99b0e7854749b786b7db3efe11418d": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "49f14e4282804abbbbf08ddab5151622": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "857d44aa6b5a42aaa19b687f8bfbab2e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_1acf5f3e8fd24a98b4b14c2718b813be", + "IPY_MODEL_d946d363b4ca4a8581f33a5cab6322a7", + "IPY_MODEL_62b40363de754d51bd920b2a02c896a4" + ], + "layout": "IPY_MODEL_17202e04d4fe46a5825f73c2ffdc12b8" + } + }, + "1acf5f3e8fd24a98b4b14c2718b813be": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0addf04ae5da431ea728454fbc618d20", + "placeholder": "​", + "style": "IPY_MODEL_5255d7e4e1b4485fbd0042493ff40736", + "value": "model.safetensors: 100%" + } + }, + "d946d363b4ca4a8581f33a5cab6322a7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ccb21b1b5f114786982ed9816e97dda2", + "max": 115434268, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_7f888a984b5d40ccbc3aa84dea2ed9fd", + "value": 115434268 + } + }, + "62b40363de754d51bd920b2a02c896a4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d1d1a62343d245d9b5e4d1cf9fd61d7e", + "placeholder": "​", + "style": "IPY_MODEL_b737918adb574c3a91fac7a70af0e73f", + "value": " 115M/115M [00:01<00:00, 78.1MB/s]" + } + }, + "17202e04d4fe46a5825f73c2ffdc12b8": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0addf04ae5da431ea728454fbc618d20": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5255d7e4e1b4485fbd0042493ff40736": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ccb21b1b5f114786982ed9816e97dda2": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "7f888a984b5d40ccbc3aa84dea2ed9fd": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "d1d1a62343d245d9b5e4d1cf9fd61d7e": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b737918adb574c3a91fac7a70af0e73f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "2302b01adee54fef8fbaf67e9051b956": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_6681aabe89b64879a789445f5cdee4a6", + "IPY_MODEL_7ec6e77ea9d14b1ba893e19e9c56d385", + "IPY_MODEL_78a171401f844335be96f958eaf2088c" + ], + "layout": "IPY_MODEL_e207f1d498e945e9bd29a178293c37c9" + } + }, + "6681aabe89b64879a789445f5cdee4a6": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_bac87ae477914d948419f3ed26a7789a", + "placeholder": "​", + "style": "IPY_MODEL_9a702decc7db41f8bb9e891cd5c46913", + "value": "model.safetensors: 100%" + } + }, + "7ec6e77ea9d14b1ba893e19e9c56d385": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_5d9b29bbe7644c9daee66975e04ada13", + "max": 46807446, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_3a5a9eaf750f4b5c9941e77ea6ceb20b", + "value": 46807446 + } + }, + "78a171401f844335be96f958eaf2088c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_5827837e4e1446ed974ffda49affe8e4", + "placeholder": "​", + "style": "IPY_MODEL_44e26064746847be93e2995ff7ba28b9", + "value": " 46.8M/46.8M [00:00<00:00, 81.7MB/s]" + } + }, + "e207f1d498e945e9bd29a178293c37c9": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "bac87ae477914d948419f3ed26a7789a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9a702decc7db41f8bb9e891cd5c46913": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "5d9b29bbe7644c9daee66975e04ada13": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3a5a9eaf750f4b5c9941e77ea6ceb20b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "5827837e4e1446ed974ffda49affe8e4": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "44e26064746847be93e2995ff7ba28b9": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + } + } + } + }, + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "r4TWGw7yf5XY", + "outputId": "3fff66d8-a309-4f8b-9839-949db717c970" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Reading package lists... Done\n", + "Building dependency tree... Done\n", + "Reading state information... Done\n", + "tesseract-ocr is already the newest version (4.1.1-2.1build1).\n", + "0 upgraded, 0 newly installed, 0 to remove and 45 not upgraded.\n", + "Reading package lists... Done\n", + "Building dependency tree... Done\n", + "Reading state information... Done\n", + "libtesseract-dev is already the newest version (4.1.1-2.1build1).\n", + "0 upgraded, 0 newly installed, 0 to remove and 45 not upgraded.\n", + "Reading package lists... Done\n", + "Building dependency tree... Done\n", + "Reading state information... Done\n", + "poppler-utils is already the newest version (22.02.0-2ubuntu0.4).\n", + "0 upgraded, 0 newly installed, 0 to remove and 45 not upgraded.\n" + ] + } + ], + "source": [ + "!sudo apt install tesseract-ocr -y\n", + "!sudo apt install libtesseract-dev -y\n", + "!sudo apt-get install poppler-utils -y\n" + ] + }, + { + "cell_type": "code", + "source": [ + "!pip install langchain unstructured[all-docs] pydantic lxml openai chromadb tiktoken opencv-python" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "dRxdsRvegMiA", + "outputId": "a05db96f-af80-487f-86d3-ba8c691e3d23" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: langchain in /usr/local/lib/python3.10/dist-packages (0.2.1)\n", + "Requirement already satisfied: unstructured[all-docs] in /usr/local/lib/python3.10/dist-packages (0.14.3)\n", + "Requirement already satisfied: pydantic in /usr/local/lib/python3.10/dist-packages (2.7.1)\n", + "Requirement already satisfied: lxml in /usr/local/lib/python3.10/dist-packages (4.9.4)\n", + "Requirement already satisfied: openai in /usr/local/lib/python3.10/dist-packages (1.30.5)\n", + "Requirement already satisfied: chromadb in /usr/local/lib/python3.10/dist-packages (0.5.0)\n", + "Requirement already satisfied: tiktoken in /usr/local/lib/python3.10/dist-packages (0.7.0)\n", + "Requirement already satisfied: opencv-python in /usr/local/lib/python3.10/dist-packages (4.8.0.76)\n", + "Requirement already satisfied: PyYAML>=5.3 in /usr/local/lib/python3.10/dist-packages (from langchain) (6.0.1)\n", + "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /usr/local/lib/python3.10/dist-packages (from langchain) (2.0.30)\n", + "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /usr/local/lib/python3.10/dist-packages (from langchain) (3.9.5)\n", + "Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in /usr/local/lib/python3.10/dist-packages (from langchain) (4.0.3)\n", + "Requirement already satisfied: langchain-core<0.3.0,>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from langchain) (0.2.3)\n", + "Requirement already satisfied: langchain-text-splitters<0.3.0,>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from langchain) (0.2.0)\n", + "Requirement already satisfied: langsmith<0.2.0,>=0.1.17 in /usr/local/lib/python3.10/dist-packages (from langchain) (0.1.66)\n", + "Requirement already satisfied: numpy<2,>=1 in /usr/local/lib/python3.10/dist-packages (from langchain) (1.25.2)\n", + "Requirement already satisfied: requests<3,>=2 in /usr/local/lib/python3.10/dist-packages (from langchain) (2.31.0)\n", + "Requirement already satisfied: tenacity<9.0.0,>=8.1.0 in /usr/local/lib/python3.10/dist-packages (from langchain) (8.3.0)\n", + "Requirement already satisfied: chardet in /usr/local/lib/python3.10/dist-packages (from unstructured[all-docs]) (5.2.0)\n", + "Requirement already satisfied: filetype in /usr/local/lib/python3.10/dist-packages (from unstructured[all-docs]) (1.2.0)\n", + "Requirement already satisfied: python-magic in /usr/local/lib/python3.10/dist-packages (from unstructured[all-docs]) (0.4.27)\n", + "Requirement already satisfied: nltk in /usr/local/lib/python3.10/dist-packages (from unstructured[all-docs]) (3.8.1)\n", + "Requirement already satisfied: tabulate in /usr/local/lib/python3.10/dist-packages (from unstructured[all-docs]) (0.9.0)\n", + "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.10/dist-packages (from unstructured[all-docs]) (4.12.3)\n", + "Requirement already satisfied: emoji in /usr/local/lib/python3.10/dist-packages (from unstructured[all-docs]) (2.12.1)\n", + "Requirement already satisfied: dataclasses-json in /usr/local/lib/python3.10/dist-packages (from unstructured[all-docs]) (0.6.6)\n", + "Requirement already satisfied: python-iso639 in /usr/local/lib/python3.10/dist-packages (from unstructured[all-docs]) (2024.4.27)\n", + "Requirement already satisfied: langdetect in /usr/local/lib/python3.10/dist-packages (from unstructured[all-docs]) (1.0.9)\n", + "Requirement already satisfied: rapidfuzz in /usr/local/lib/python3.10/dist-packages (from unstructured[all-docs]) (3.9.2)\n", + "Requirement already satisfied: backoff in /usr/local/lib/python3.10/dist-packages (from unstructured[all-docs]) (2.2.1)\n", + "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from unstructured[all-docs]) (4.11.0)\n", + "Requirement already satisfied: unstructured-client in /usr/local/lib/python3.10/dist-packages (from unstructured[all-docs]) (0.22.0)\n", + "Requirement already satisfied: wrapt in /usr/local/lib/python3.10/dist-packages (from unstructured[all-docs]) (1.14.1)\n", + "Requirement already satisfied: msg-parser in /usr/local/lib/python3.10/dist-packages (from unstructured[all-docs]) (1.2.0)\n", + "Requirement already satisfied: pypdf in /usr/local/lib/python3.10/dist-packages (from unstructured[all-docs]) (4.2.0)\n", + "Requirement already satisfied: unstructured.pytesseract>=0.3.12 in /usr/local/lib/python3.10/dist-packages (from unstructured[all-docs]) (0.3.12)\n", + "Requirement already satisfied: xlrd in /usr/local/lib/python3.10/dist-packages (from unstructured[all-docs]) (2.0.1)\n", + "Requirement already satisfied: python-pptx<=0.6.23 in /usr/local/lib/python3.10/dist-packages (from unstructured[all-docs]) (0.6.23)\n", + "Requirement already satisfied: pikepdf in /usr/local/lib/python3.10/dist-packages (from unstructured[all-docs]) (9.0.0)\n", + "Requirement already satisfied: pdfminer.six in /usr/local/lib/python3.10/dist-packages (from unstructured[all-docs]) (20231228)\n", + "Requirement already satisfied: google-cloud-vision in /usr/local/lib/python3.10/dist-packages (from unstructured[all-docs]) (3.7.2)\n", + "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from unstructured[all-docs]) (2.0.3)\n", + "Requirement already satisfied: openpyxl in /usr/local/lib/python3.10/dist-packages (from unstructured[all-docs]) (3.1.2)\n", + "Requirement already satisfied: effdet in /usr/local/lib/python3.10/dist-packages (from unstructured[all-docs]) (0.4.1)\n", + "Requirement already satisfied: unstructured-inference==0.7.33 in /usr/local/lib/python3.10/dist-packages (from unstructured[all-docs]) (0.7.33)\n", + "Requirement already satisfied: pdf2image in /usr/local/lib/python3.10/dist-packages (from unstructured[all-docs]) (1.17.0)\n", + "Requirement already satisfied: markdown in /usr/local/lib/python3.10/dist-packages (from unstructured[all-docs]) (3.6)\n", + "Requirement already satisfied: onnx in /usr/local/lib/python3.10/dist-packages (from unstructured[all-docs]) (1.16.1)\n", + "Requirement already satisfied: pypandoc in /usr/local/lib/python3.10/dist-packages (from unstructured[all-docs]) (1.13)\n", + "Requirement already satisfied: pillow-heif in /usr/local/lib/python3.10/dist-packages (from unstructured[all-docs]) (0.16.0)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from unstructured[all-docs]) (3.3)\n", + "Requirement already satisfied: pytesseract in /usr/local/lib/python3.10/dist-packages (from unstructured[all-docs]) (0.3.10)\n", + "Requirement already satisfied: python-docx in /usr/local/lib/python3.10/dist-packages (from unstructured[all-docs]) (1.1.2)\n", + "Requirement already satisfied: layoutparser in /usr/local/lib/python3.10/dist-packages (from unstructured-inference==0.7.33->unstructured[all-docs]) (0.3.4)\n", + "Requirement already satisfied: python-multipart in /usr/local/lib/python3.10/dist-packages (from unstructured-inference==0.7.33->unstructured[all-docs]) (0.0.9)\n", + "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.10/dist-packages (from unstructured-inference==0.7.33->unstructured[all-docs]) (0.23.1)\n", + "Requirement already satisfied: onnxruntime>=1.17.0 in /usr/local/lib/python3.10/dist-packages (from unstructured-inference==0.7.33->unstructured[all-docs]) (1.18.0)\n", + "Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from unstructured-inference==0.7.33->unstructured[all-docs]) (3.7.1)\n", + "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from unstructured-inference==0.7.33->unstructured[all-docs]) (2.3.0+cu121)\n", + "Requirement already satisfied: timm in /usr/local/lib/python3.10/dist-packages (from unstructured-inference==0.7.33->unstructured[all-docs]) (1.0.3)\n", + "Requirement already satisfied: transformers>=4.25.1 in /usr/local/lib/python3.10/dist-packages (from unstructured-inference==0.7.33->unstructured[all-docs]) (4.41.1)\n", + "Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.18.2 in /usr/local/lib/python3.10/dist-packages (from pydantic) (2.18.2)\n", + "Requirement already satisfied: anyio<5,>=3.5.0 in /usr/local/lib/python3.10/dist-packages (from openai) (3.7.1)\n", + "Requirement already satisfied: distro<2,>=1.7.0 in /usr/lib/python3/dist-packages (from openai) (1.7.0)\n", + "Requirement already satisfied: httpx<1,>=0.23.0 in /usr/local/lib/python3.10/dist-packages (from openai) (0.27.0)\n", + "Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from openai) (1.3.1)\n", + "Requirement already satisfied: tqdm>4 in /usr/local/lib/python3.10/dist-packages (from openai) (4.66.4)\n", + "Requirement already satisfied: build>=1.0.3 in /usr/local/lib/python3.10/dist-packages (from chromadb) (1.2.1)\n", + "Requirement already satisfied: chroma-hnswlib==0.7.3 in /usr/local/lib/python3.10/dist-packages (from chromadb) (0.7.3)\n", + "Requirement already satisfied: fastapi>=0.95.2 in /usr/local/lib/python3.10/dist-packages (from chromadb) (0.111.0)\n", + "Requirement already satisfied: uvicorn[standard]>=0.18.3 in /usr/local/lib/python3.10/dist-packages (from chromadb) (0.30.0)\n", + "Requirement already satisfied: posthog>=2.4.0 in /usr/local/lib/python3.10/dist-packages (from chromadb) (3.5.0)\n", + "Requirement already satisfied: opentelemetry-api>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from chromadb) (1.25.0)\n", + "Requirement already satisfied: opentelemetry-exporter-otlp-proto-grpc>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from chromadb) (1.25.0)\n", + "Requirement already satisfied: opentelemetry-instrumentation-fastapi>=0.41b0 in /usr/local/lib/python3.10/dist-packages (from chromadb) (0.46b0)\n", + "Requirement already satisfied: opentelemetry-sdk>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from chromadb) (1.25.0)\n", + "Requirement already satisfied: tokenizers>=0.13.2 in /usr/local/lib/python3.10/dist-packages (from chromadb) (0.19.1)\n", + "Requirement already satisfied: pypika>=0.48.9 in /usr/local/lib/python3.10/dist-packages (from chromadb) (0.48.9)\n", + "Requirement already satisfied: overrides>=7.3.1 in /usr/local/lib/python3.10/dist-packages (from chromadb) (7.7.0)\n", + "Requirement already satisfied: importlib-resources in /usr/local/lib/python3.10/dist-packages (from chromadb) (6.4.0)\n", + "Requirement already satisfied: grpcio>=1.58.0 in /usr/local/lib/python3.10/dist-packages (from chromadb) (1.64.0)\n", + "Requirement already satisfied: bcrypt>=4.0.1 in /usr/local/lib/python3.10/dist-packages (from chromadb) (4.1.3)\n", + "Requirement already satisfied: typer>=0.9.0 in /usr/local/lib/python3.10/dist-packages (from chromadb) (0.12.3)\n", + "Requirement already satisfied: kubernetes>=28.1.0 in /usr/local/lib/python3.10/dist-packages (from chromadb) (29.0.0)\n", + "Requirement already satisfied: mmh3>=4.0.1 in /usr/local/lib/python3.10/dist-packages (from chromadb) (4.1.0)\n", + "Requirement already satisfied: orjson>=3.9.12 in /usr/local/lib/python3.10/dist-packages (from chromadb) (3.10.3)\n", + "Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.10/dist-packages (from tiktoken) (2024.5.15)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.3.1)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (23.2.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.4.1)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (6.0.5)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.9.4)\n", + "Requirement already satisfied: idna>=2.8 in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.5.0->openai) (3.7)\n", + "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.5.0->openai) (1.2.1)\n", + "Requirement already satisfied: packaging>=19.1 in /usr/local/lib/python3.10/dist-packages (from build>=1.0.3->chromadb) (23.2)\n", + "Requirement already satisfied: pyproject_hooks in /usr/local/lib/python3.10/dist-packages (from build>=1.0.3->chromadb) (1.1.0)\n", + "Requirement already satisfied: tomli>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from build>=1.0.3->chromadb) (2.0.1)\n", + "Requirement already satisfied: starlette<0.38.0,>=0.37.2 in /usr/local/lib/python3.10/dist-packages (from fastapi>=0.95.2->chromadb) (0.37.2)\n", + "Requirement already satisfied: fastapi-cli>=0.0.2 in /usr/local/lib/python3.10/dist-packages (from fastapi>=0.95.2->chromadb) (0.0.4)\n", + "Requirement already satisfied: jinja2>=2.11.2 in /usr/local/lib/python3.10/dist-packages (from fastapi>=0.95.2->chromadb) (3.1.4)\n", + "Requirement already satisfied: ujson!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0,>=4.0.1 in /usr/local/lib/python3.10/dist-packages (from fastapi>=0.95.2->chromadb) (5.10.0)\n", + "Requirement already satisfied: email_validator>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from fastapi>=0.95.2->chromadb) (2.1.1)\n", + "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from httpx<1,>=0.23.0->openai) (2024.2.2)\n", + "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.10/dist-packages (from httpx<1,>=0.23.0->openai) (1.0.5)\n", + "Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.10/dist-packages (from httpcore==1.*->httpx<1,>=0.23.0->openai) (0.14.0)\n", + "Requirement already satisfied: six>=1.9.0 in /usr/local/lib/python3.10/dist-packages (from kubernetes>=28.1.0->chromadb) (1.16.0)\n", + "Requirement already satisfied: python-dateutil>=2.5.3 in /usr/local/lib/python3.10/dist-packages (from kubernetes>=28.1.0->chromadb) (2.8.2)\n", + "Requirement already satisfied: google-auth>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from kubernetes>=28.1.0->chromadb) (2.27.0)\n", + "Requirement already satisfied: websocket-client!=0.40.0,!=0.41.*,!=0.42.*,>=0.32.0 in /usr/local/lib/python3.10/dist-packages (from kubernetes>=28.1.0->chromadb) (1.8.0)\n", + "Requirement already satisfied: requests-oauthlib in /usr/local/lib/python3.10/dist-packages (from kubernetes>=28.1.0->chromadb) (1.3.1)\n", + "Requirement already satisfied: oauthlib>=3.2.2 in /usr/local/lib/python3.10/dist-packages (from kubernetes>=28.1.0->chromadb) (3.2.2)\n", + "Requirement already satisfied: urllib3>=1.24.2 in /usr/local/lib/python3.10/dist-packages (from kubernetes>=28.1.0->chromadb) (2.0.7)\n", + "Requirement already satisfied: jsonpatch<2.0,>=1.33 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.3.0,>=0.2.0->langchain) (1.33)\n", + "Requirement already satisfied: coloredlogs in /usr/local/lib/python3.10/dist-packages (from onnxruntime>=1.17.0->unstructured-inference==0.7.33->unstructured[all-docs]) (15.0.1)\n", + "Requirement already satisfied: flatbuffers in /usr/local/lib/python3.10/dist-packages (from onnxruntime>=1.17.0->unstructured-inference==0.7.33->unstructured[all-docs]) (24.3.25)\n", + "Requirement already satisfied: protobuf in /usr/local/lib/python3.10/dist-packages (from onnxruntime>=1.17.0->unstructured-inference==0.7.33->unstructured[all-docs]) (3.20.3)\n", + "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from onnxruntime>=1.17.0->unstructured-inference==0.7.33->unstructured[all-docs]) (1.12)\n", + "Requirement already satisfied: deprecated>=1.2.6 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-api>=1.2.0->chromadb) (1.2.14)\n", + "Requirement already satisfied: importlib-metadata<=7.1,>=6.0 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-api>=1.2.0->chromadb) (7.1.0)\n", + "Requirement already satisfied: googleapis-common-protos~=1.52 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb) (1.63.0)\n", + "Requirement already satisfied: opentelemetry-exporter-otlp-proto-common==1.25.0 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb) (1.25.0)\n", + "Requirement already satisfied: opentelemetry-proto==1.25.0 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb) (1.25.0)\n", + "Requirement already satisfied: opentelemetry-instrumentation-asgi==0.46b0 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-instrumentation-fastapi>=0.41b0->chromadb) (0.46b0)\n", + "Requirement already satisfied: opentelemetry-instrumentation==0.46b0 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-instrumentation-fastapi>=0.41b0->chromadb) (0.46b0)\n", + "Requirement already satisfied: opentelemetry-semantic-conventions==0.46b0 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-instrumentation-fastapi>=0.41b0->chromadb) (0.46b0)\n", + "Requirement already satisfied: opentelemetry-util-http==0.46b0 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-instrumentation-fastapi>=0.41b0->chromadb) (0.46b0)\n", + "Requirement already satisfied: setuptools>=16.0 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-instrumentation==0.46b0->opentelemetry-instrumentation-fastapi>=0.41b0->chromadb) (67.7.2)\n", + "Requirement already satisfied: asgiref~=3.0 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-instrumentation-asgi==0.46b0->opentelemetry-instrumentation-fastapi>=0.41b0->chromadb) (3.8.1)\n", + "Requirement already satisfied: monotonic>=1.5 in /usr/local/lib/python3.10/dist-packages (from posthog>=2.4.0->chromadb) (1.6)\n", + "Requirement already satisfied: Pillow>=3.3.2 in /usr/local/lib/python3.10/dist-packages (from python-pptx<=0.6.23->unstructured[all-docs]) (10.3.0)\n", + "Requirement already satisfied: XlsxWriter>=0.5.7 in /usr/local/lib/python3.10/dist-packages (from python-pptx<=0.6.23->unstructured[all-docs]) (3.2.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain) (3.3.2)\n", + "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.10/dist-packages (from SQLAlchemy<3,>=1.4->langchain) (3.0.3)\n", + "Requirement already satisfied: click>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from typer>=0.9.0->chromadb) (8.1.7)\n", + "Requirement already satisfied: shellingham>=1.3.0 in /usr/local/lib/python3.10/dist-packages (from typer>=0.9.0->chromadb) (1.5.4)\n", + "Requirement already satisfied: rich>=10.11.0 in /usr/local/lib/python3.10/dist-packages (from typer>=0.9.0->chromadb) (13.7.1)\n", + "Requirement already satisfied: httptools>=0.5.0 in /usr/local/lib/python3.10/dist-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.6.1)\n", + "Requirement already satisfied: python-dotenv>=0.13 in /usr/local/lib/python3.10/dist-packages (from uvicorn[standard]>=0.18.3->chromadb) (1.0.1)\n", + "Requirement already satisfied: uvloop!=0.15.0,!=0.15.1,>=0.14.0 in /usr/local/lib/python3.10/dist-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.19.0)\n", + "Requirement already satisfied: watchfiles>=0.13 in /usr/local/lib/python3.10/dist-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.22.0)\n", + "Requirement already satisfied: websockets>=10.4 in /usr/local/lib/python3.10/dist-packages (from uvicorn[standard]>=0.18.3->chromadb) (12.0)\n", + "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.10/dist-packages (from beautifulsoup4->unstructured[all-docs]) (2.5)\n", + "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /usr/local/lib/python3.10/dist-packages (from dataclasses-json->unstructured[all-docs]) (3.21.2)\n", + "Requirement already satisfied: typing-inspect<1,>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from dataclasses-json->unstructured[all-docs]) (0.9.0)\n", + "Requirement already satisfied: torchvision in /usr/local/lib/python3.10/dist-packages (from effdet->unstructured[all-docs]) (0.18.0+cu121)\n", + "Requirement already satisfied: pycocotools>=2.0.2 in /usr/local/lib/python3.10/dist-packages (from effdet->unstructured[all-docs]) (2.0.7)\n", + "Requirement already satisfied: omegaconf>=2.0 in /usr/local/lib/python3.10/dist-packages (from effdet->unstructured[all-docs]) (2.3.0)\n", + "Requirement already satisfied: google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.1 in /usr/local/lib/python3.10/dist-packages (from google-cloud-vision->unstructured[all-docs]) (2.11.1)\n", + "Requirement already satisfied: proto-plus<2.0.0dev,>=1.22.3 in /usr/local/lib/python3.10/dist-packages (from google-cloud-vision->unstructured[all-docs]) (1.23.0)\n", + "Requirement already satisfied: olefile>=0.46 in /usr/local/lib/python3.10/dist-packages (from msg-parser->unstructured[all-docs]) (0.47)\n", + "Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from nltk->unstructured[all-docs]) (1.4.2)\n", + "Requirement already satisfied: et-xmlfile in /usr/local/lib/python3.10/dist-packages (from openpyxl->unstructured[all-docs]) (1.1.0)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->unstructured[all-docs]) (2023.4)\n", + "Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas->unstructured[all-docs]) (2024.1)\n", + "Requirement already satisfied: cryptography>=36.0.0 in /usr/local/lib/python3.10/dist-packages (from pdfminer.six->unstructured[all-docs]) (42.0.7)\n", + "Requirement already satisfied: deepdiff>=6.0 in /usr/local/lib/python3.10/dist-packages (from unstructured-client->unstructured[all-docs]) (7.0.1)\n", + "Requirement already satisfied: jsonpath-python>=1.0.6 in /usr/local/lib/python3.10/dist-packages (from unstructured-client->unstructured[all-docs]) (1.0.6)\n", + "Requirement already satisfied: mypy-extensions>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from unstructured-client->unstructured[all-docs]) (1.0.0)\n", + "Requirement already satisfied: cffi>=1.12 in /usr/local/lib/python3.10/dist-packages (from cryptography>=36.0.0->pdfminer.six->unstructured[all-docs]) (1.16.0)\n", + "Requirement already satisfied: ordered-set<4.2.0,>=4.1.0 in /usr/local/lib/python3.10/dist-packages (from deepdiff>=6.0->unstructured-client->unstructured[all-docs]) (4.1.0)\n", + "Requirement already satisfied: dnspython>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from email_validator>=2.0.0->fastapi>=0.95.2->chromadb) (2.6.1)\n", + "Requirement already satisfied: grpcio-status<2.0.dev0,>=1.33.2 in /usr/local/lib/python3.10/dist-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.1->google-cloud-vision->unstructured[all-docs]) (1.48.2)\n", + "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from google-auth>=1.0.1->kubernetes>=28.1.0->chromadb) (5.3.3)\n", + "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from google-auth>=1.0.1->kubernetes>=28.1.0->chromadb) (0.4.0)\n", + "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from google-auth>=1.0.1->kubernetes>=28.1.0->chromadb) (4.9)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->unstructured-inference==0.7.33->unstructured[all-docs]) (3.14.0)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->unstructured-inference==0.7.33->unstructured[all-docs]) (2023.6.0)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.10/dist-packages (from importlib-metadata<=7.1,>=6.0->opentelemetry-api>=1.2.0->chromadb) (3.18.2)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2>=2.11.2->fastapi>=0.95.2->chromadb) (2.1.5)\n", + "Requirement already satisfied: jsonpointer>=1.9 in /usr/local/lib/python3.10/dist-packages (from jsonpatch<2.0,>=1.33->langchain-core<0.3.0,>=0.2.0->langchain) (2.4)\n", + "Requirement already satisfied: antlr4-python3-runtime==4.9.* in /usr/local/lib/python3.10/dist-packages (from omegaconf>=2.0->effdet->unstructured[all-docs]) (4.9.3)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->unstructured-inference==0.7.33->unstructured[all-docs]) (1.2.1)\n", + "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->unstructured-inference==0.7.33->unstructured[all-docs]) (0.12.1)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->unstructured-inference==0.7.33->unstructured[all-docs]) (4.51.0)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->unstructured-inference==0.7.33->unstructured[all-docs]) (1.4.5)\n", + "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->unstructured-inference==0.7.33->unstructured[all-docs]) (3.1.2)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich>=10.11.0->typer>=0.9.0->chromadb) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich>=10.11.0->typer>=0.9.0->chromadb) (2.16.1)\n", + "Requirement already satisfied: safetensors in /usr/local/lib/python3.10/dist-packages (from timm->unstructured-inference==0.7.33->unstructured[all-docs]) (0.4.3)\n", + "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch->unstructured-inference==0.7.33->unstructured[all-docs]) (12.1.105)\n", + "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch->unstructured-inference==0.7.33->unstructured[all-docs]) (12.1.105)\n", + "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch->unstructured-inference==0.7.33->unstructured[all-docs]) (12.1.105)\n", + "Requirement already satisfied: nvidia-cudnn-cu12==8.9.2.26 in /usr/local/lib/python3.10/dist-packages (from torch->unstructured-inference==0.7.33->unstructured[all-docs]) (8.9.2.26)\n", + "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.10/dist-packages (from torch->unstructured-inference==0.7.33->unstructured[all-docs]) (12.1.3.1)\n", + "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.10/dist-packages (from torch->unstructured-inference==0.7.33->unstructured[all-docs]) (11.0.2.54)\n", + "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.10/dist-packages (from torch->unstructured-inference==0.7.33->unstructured[all-docs]) (10.3.2.106)\n", + "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.10/dist-packages (from torch->unstructured-inference==0.7.33->unstructured[all-docs]) (11.4.5.107)\n", + "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.10/dist-packages (from torch->unstructured-inference==0.7.33->unstructured[all-docs]) (12.1.0.106)\n", + "Requirement already satisfied: nvidia-nccl-cu12==2.20.5 in /usr/local/lib/python3.10/dist-packages (from torch->unstructured-inference==0.7.33->unstructured[all-docs]) (2.20.5)\n", + "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch->unstructured-inference==0.7.33->unstructured[all-docs]) (12.1.105)\n", + "Requirement already satisfied: triton==2.3.0 in /usr/local/lib/python3.10/dist-packages (from torch->unstructured-inference==0.7.33->unstructured[all-docs]) (2.3.0)\n", + "Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.10/dist-packages (from nvidia-cusolver-cu12==11.4.5.107->torch->unstructured-inference==0.7.33->unstructured[all-docs]) (12.5.40)\n", + "Requirement already satisfied: humanfriendly>=9.1 in /usr/local/lib/python3.10/dist-packages (from coloredlogs->onnxruntime>=1.17.0->unstructured-inference==0.7.33->unstructured[all-docs]) (10.0)\n", + "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from layoutparser->unstructured-inference==0.7.33->unstructured[all-docs]) (1.11.4)\n", + "Requirement already satisfied: iopath in /usr/local/lib/python3.10/dist-packages (from layoutparser->unstructured-inference==0.7.33->unstructured[all-docs]) (0.1.10)\n", + "Requirement already satisfied: pdfplumber in /usr/local/lib/python3.10/dist-packages (from layoutparser->unstructured-inference==0.7.33->unstructured[all-docs]) (0.11.0)\n", + "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->onnxruntime>=1.17.0->unstructured-inference==0.7.33->unstructured[all-docs]) (1.3.0)\n", + "Requirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi>=1.12->cryptography>=36.0.0->pdfminer.six->unstructured[all-docs]) (2.22)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->typer>=0.9.0->chromadb) (0.1.2)\n", + "Requirement already satisfied: pyasn1<0.7.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth>=1.0.1->kubernetes>=28.1.0->chromadb) (0.6.0)\n", + "Requirement already satisfied: portalocker in /usr/local/lib/python3.10/dist-packages (from iopath->layoutparser->unstructured-inference==0.7.33->unstructured[all-docs]) (2.8.2)\n", + "Requirement already satisfied: pypdfium2>=4.18.0 in /usr/local/lib/python3.10/dist-packages (from pdfplumber->layoutparser->unstructured-inference==0.7.33->unstructured[all-docs]) (4.30.0)\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!pip install langchain-community\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "USxGblR1hScN", + "outputId": "13308872-7411-4608-d1e3-b0eb2810e42a" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting langchain-community\n", + " Downloading langchain_community-0.2.1-py3-none-any.whl (2.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.1/2.1 MB\u001b[0m \u001b[31m23.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: PyYAML>=5.3 in /usr/local/lib/python3.10/dist-packages (from langchain-community) (6.0.1)\n", + "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /usr/local/lib/python3.10/dist-packages (from langchain-community) (2.0.30)\n", + "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /usr/local/lib/python3.10/dist-packages (from langchain-community) (3.9.5)\n", + "Requirement already satisfied: dataclasses-json<0.7,>=0.5.7 in /usr/local/lib/python3.10/dist-packages (from langchain-community) (0.6.6)\n", + "Requirement already satisfied: langchain<0.3.0,>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from langchain-community) (0.2.1)\n", + "Requirement already satisfied: langchain-core<0.3.0,>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from langchain-community) (0.2.3)\n", + "Requirement already satisfied: langsmith<0.2.0,>=0.1.0 in /usr/local/lib/python3.10/dist-packages (from langchain-community) (0.1.66)\n", + "Requirement already satisfied: numpy<2,>=1 in /usr/local/lib/python3.10/dist-packages (from langchain-community) (1.25.2)\n", + "Requirement already satisfied: requests<3,>=2 in /usr/local/lib/python3.10/dist-packages (from langchain-community) (2.31.0)\n", + "Requirement already satisfied: tenacity<9.0.0,>=8.1.0 in /usr/local/lib/python3.10/dist-packages (from langchain-community) (8.3.0)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (1.3.1)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (23.2.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (1.4.1)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (6.0.5)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (1.9.4)\n", + "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (4.0.3)\n", + "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /usr/local/lib/python3.10/dist-packages (from dataclasses-json<0.7,>=0.5.7->langchain-community) (3.21.2)\n", + "Requirement already satisfied: typing-inspect<1,>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from dataclasses-json<0.7,>=0.5.7->langchain-community) (0.9.0)\n", + "Requirement already satisfied: langchain-text-splitters<0.3.0,>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from langchain<0.3.0,>=0.2.0->langchain-community) (0.2.0)\n", + "Requirement already satisfied: pydantic<3,>=1 in /usr/local/lib/python3.10/dist-packages (from langchain<0.3.0,>=0.2.0->langchain-community) (2.7.1)\n", + "Requirement already satisfied: jsonpatch<2.0,>=1.33 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.3.0,>=0.2.0->langchain-community) (1.33)\n", + "Requirement already satisfied: packaging<24.0,>=23.2 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.3.0,>=0.2.0->langchain-community) (23.2)\n", + "Requirement already satisfied: orjson<4.0.0,>=3.9.14 in /usr/local/lib/python3.10/dist-packages (from langsmith<0.2.0,>=0.1.0->langchain-community) (3.10.3)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain-community) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain-community) (3.7)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain-community) (2.0.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain-community) (2024.2.2)\n", + "Requirement already satisfied: typing-extensions>=4.6.0 in /usr/local/lib/python3.10/dist-packages (from SQLAlchemy<3,>=1.4->langchain-community) (4.11.0)\n", + "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.10/dist-packages (from SQLAlchemy<3,>=1.4->langchain-community) (3.0.3)\n", + "Requirement already satisfied: jsonpointer>=1.9 in /usr/local/lib/python3.10/dist-packages (from jsonpatch<2.0,>=1.33->langchain-core<0.3.0,>=0.2.0->langchain-community) (2.4)\n", + "Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain<0.3.0,>=0.2.0->langchain-community) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.18.2 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain<0.3.0,>=0.2.0->langchain-community) (2.18.2)\n", + "Requirement already satisfied: mypy-extensions>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain-community) (1.0.0)\n", + "Installing collected packages: langchain-community\n", + "Successfully installed langchain-community-0.2.1\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import os\n", + "import uuid\n", + "import base64\n", + "from IPython import display\n", + "from unstructured.partition.pdf import partition_pdf\n", + "from langchain.chat_models import ChatOpenAI\n", + "from langchain.embeddings import OpenAIEmbeddings\n", + "from langchain.chains import LLMChain\n", + "from langchain.prompts import PromptTemplate\n", + "from langchain.schema.messages import HumanMessage, SystemMessage\n", + "from langchain.schema.document import Document\n", + "from langchain.vectorstores import FAISS\n", + "from langchain.retrievers.multi_vector import MultiVectorRetriever" + ], + "metadata": { + "id": "Ekq9fFeSf-RF" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "from google.colab import userdata\n", + "openai_api_key = userdata.get('openapikey')" + ], + "metadata": { + "id": "HNY711ajiP6X" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "output_path = \"./images\"\n" + ], + "metadata": { + "id": "JeWaJcljig8A" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Get elements\n", + "raw_pdf_elements = partition_pdf(\n", + " filename=\"/content/AC-Aids-for-Dogs_Canine-Periodontal-Disease.pdf\",\n", + " extract_images_in_pdf=True,\n", + " infer_table_structure=True,\n", + " chunking_strategy=\"by_title\",\n", + " max_characters=4000,\n", + " new_after_n_chars=3800,\n", + " combine_text_under_n_chars=2000,\n", + " extract_image_block_output_dir=output_path,\n", + ")\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 304, + "referenced_widgets": [ + "f00c572439414e4b9a960175242d0be5", + "95907a58c9fc4e758976918df3e4d696", + "1ef85a4dbe5643df93666b95a1810e8c", + "1328c7e39383433eac1cb04515abe8c1", + "d7f200d9b80b4b0497011d8a4d8ccbaa", + "6bb350105f8d4b1e9730182587884f54", + "d3684c184dd547ba8afdcfd3dc35b5cb", + "195d818eeef8465281cc24642b05607e", + "06d74a0dbf96445fa948ce710581c94b", + "9449f30a8c0546b1b4111b743ce8034e", + "4aeac8542a9f43d880ddce30cb384ab0", + "e9ee42809e1d4ded9a45b6965b9b7a7a", + "e246be046226454e8315e0b988e68358", + "a707ee518c87430d82f12bc1ae7da4b0", + "c0e6a8eb79d8424e83db0b2796e167c3", + "a848b8f793b440d797c6576fe3d7824a", + "0a4e247fba624be8bf247a591f54fccf", + "233654916ea543feae69e7c81eb7828a", + "44087e28225f4277b82f32b8757405c9", + "4ac1c050832d4e70b2c970751852b4d1", + "ac99b0e7854749b786b7db3efe11418d", + "49f14e4282804abbbbf08ddab5151622", + "857d44aa6b5a42aaa19b687f8bfbab2e", + "1acf5f3e8fd24a98b4b14c2718b813be", + "d946d363b4ca4a8581f33a5cab6322a7", + "62b40363de754d51bd920b2a02c896a4", + "17202e04d4fe46a5825f73c2ffdc12b8", + "0addf04ae5da431ea728454fbc618d20", + "5255d7e4e1b4485fbd0042493ff40736", + "ccb21b1b5f114786982ed9816e97dda2", + "7f888a984b5d40ccbc3aa84dea2ed9fd", + "d1d1a62343d245d9b5e4d1cf9fd61d7e", + "b737918adb574c3a91fac7a70af0e73f", + "2302b01adee54fef8fbaf67e9051b956", + "6681aabe89b64879a789445f5cdee4a6", + "7ec6e77ea9d14b1ba893e19e9c56d385", + "78a171401f844335be96f958eaf2088c", + "e207f1d498e945e9bd29a178293c37c9", + "bac87ae477914d948419f3ed26a7789a", + "9a702decc7db41f8bb9e891cd5c46913", + "5d9b29bbe7644c9daee66975e04ada13", + "3a5a9eaf750f4b5c9941e77ea6ceb20b", + "5827837e4e1446ed974ffda49affe8e4", + "44e26064746847be93e2995ff7ba28b9" + ] + }, + "id": "k5Tx7BbAiZcf", + "outputId": "11d82243-8578-446a-8d8f-290b5f8c04cd" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "yolox_l0.05.onnx: 0%| | 0.00/217M [00:00=0.23.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.23.1)\n", + "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.25.2)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (23.2)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.1)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2024.5.15)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.31.0)\n", + "Requirement already satisfied: tokenizers<0.20,>=0.19 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.19.1)\n", + "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.4.3)\n", + "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.66.4)\n", + "Requirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.10/dist-packages (from torch) (4.11.0)\n", + "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch) (1.12)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch) (3.3)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch) (3.1.4)\n", + "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch) (2023.6.0)\n", + "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch) (12.1.105)\n", + "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch) (12.1.105)\n", + "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch) (12.1.105)\n", + "Requirement already satisfied: nvidia-cudnn-cu12==8.9.2.26 in /usr/local/lib/python3.10/dist-packages (from torch) (8.9.2.26)\n", + "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.10/dist-packages (from torch) (12.1.3.1)\n", + "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.10/dist-packages (from torch) (11.0.2.54)\n", + "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.10/dist-packages (from torch) (10.3.2.106)\n", + "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.10/dist-packages (from torch) (11.4.5.107)\n", + "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.10/dist-packages (from torch) (12.1.0.106)\n", + "Requirement already satisfied: nvidia-nccl-cu12==2.20.5 in /usr/local/lib/python3.10/dist-packages (from torch) (2.20.5)\n", + "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch) (12.1.105)\n", + "Requirement already satisfied: triton==2.3.0 in /usr/local/lib/python3.10/dist-packages (from torch) (2.3.0)\n", + "Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.10/dist-packages (from nvidia-cusolver-cu12==11.4.5.107->torch) (12.5.40)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch) (2.1.5)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.7)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.0.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2024.2.2)\n", + "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch) (1.3.0)\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "GMJYtMorliuC" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "\n", + "# Get image summaries\n", + "image_elements = []\n", + "image_summaries = []\n", + "import openai\n", + "from transformers import CLIPModel, CLIPProcessor\n", + "from PIL import Image\n", + "\n", + "# Load CLIP model and processor\n", + "model = CLIPModel.from_pretrained(\"openai/clip-vit-base-patch32\")\n", + "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-base-patch32\")\n", + "\n", + "def encode_image(image_path):\n", + " image = Image.open(image_path)\n", + " inputs = processor(images=image, return_tensors=\"pt\")\n", + " outputs = model.get_image_features(**inputs)\n", + " return outputs\n", + "\n", + "# def encode_image(image_path):\n", + "# with open(image_path, \"rb\") as f:\n", + "# return base64.b64encode(f.read()).decode('utf-8')\n", + "\n", + "# Function to get a textual summary\n", + "# Function to get a textual summary\n", + "# def summarize_image(encoded_image):\n", + "# # Convert tensor to list\n", + "# encoded_image_list = encoded_image.tolist()\n", + "\n", + "# # Use the encoded image features to generate a summary with the new API\n", + "# response = openai.ChatCompletion.create(\n", + "# model=\"gpt-3.5-turbo\",\n", + "# messages=[\n", + "# {\n", + "# \"role\": \"system\",\n", + "# \"content\": \"You are a helpful assistant.\"\n", + "# },\n", + "# {\n", + "# \"role\": \"user\",\n", + "# \"content\": f\"Generate a summary for the following image features: {encoded_image_list}\"\n", + "# }\n", + "# ],\n", + "# max_tokens=50\n", + "# )\n", + "# return response['choices'][0]['message']['content'].strip()\n", + "\n", + "\n", + "# Function to get a textual summary\n", + "# def summarize_image(encoded_image):\n", + "# # Convert tensor to list\n", + "# encoded_image_list = encoded_image.cpu().detach().numpy().tolist()\n", + "\n", + "# # Use the encoded image features to generate a summary with the new API\n", + "# response = openai.Completion.create(\n", + "# model=\"text-davinci-003\",\n", + "# prompt=f\"Generate a summary for the following image features: {encoded_image_list}\",\n", + "# max_tokens=50\n", + "# )\n", + "# return response.choices[0].text.strip()\n", + "\n", + "def summarize_image(encoded_image):\n", + " # Convert tensor to list\n", + " encoded_image_list = encoded_image.tolist()\n", + "\n", + " # Use the encoded image features to generate a summary with GPT-3.5 Turbo\n", + " prompt = [\n", + " SystemMessage(content=\"You are a bot that is good at analyzing images related to Dog's health.\"),\n", + " HumanMessage(content=[\n", + " {\n", + " \"type\": \"text\",\n", + " \"text\": \"Describe the contents of this image.{encoded_image_list}\"\n", + " },\n", + " # {\n", + " # \"type\": \"image_url\",\n", + " # \"image_url\": {\n", + " # \"url\": f\"data:image/jpeg;base64,{encoded_image}\"\n", + " # },\n", + " # },\n", + " ])\n", + " ]\n", + " response = ChatOpenAI(model=\"gpt-3.5-turbo\", openai_api_key=openai_api_key, max_tokens=1024).invoke(prompt)\n", + " return response.content\n", + "\n", + "for i in os.listdir(output_path):\n", + " if i.endswith(('.png', '.jpg', '.jpeg')):\n", + " image_path = os.path.join(output_path, i)\n", + " encoded_image = encode_image(image_path)\n", + " image_elements.append(encoded_image)\n", + " summary = summarize_image(encoded_image)\n", + " image_summaries.append(summary)" + ], + "metadata": { + "id": "7wXSc6xsj50I" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "!pip install faiss-cpu" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "gRp0W-QAp0e5", + "outputId": "aa16d5da-5725-4f40-a00c-1b888de382d8" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting faiss-cpu\n", + " Downloading faiss_cpu-1.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m27.0/27.0 MB\u001b[0m \u001b[31m27.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from faiss-cpu) (1.25.2)\n", + "Installing collected packages: faiss-cpu\n", + "Successfully installed faiss-cpu-1.8.0\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "\n", + "# Create Documents and Vectorstore\n", + "documents = []\n", + "retrieve_contents = []\n", + "\n", + "for e, s in zip(text_elements, text_summaries):\n", + " i = str(uuid.uuid4())\n", + " doc = Document(\n", + " page_content = s,\n", + " metadata = {\n", + " 'id': i,\n", + " 'type': 'text',\n", + " 'original_content': e\n", + " }\n", + " )\n", + " retrieve_contents.append((i, e))\n", + " documents.append(doc)\n", + "\n", + "for e, s in zip(table_elements, table_summaries):\n", + " doc = Document(\n", + " page_content = s,\n", + " metadata = {\n", + " 'id': i,\n", + " 'type': 'table',\n", + " 'original_content': e\n", + " }\n", + " )\n", + " retrieve_contents.append((i, e))\n", + " documents.append(doc)\n", + "\n", + "for e, s in zip(image_elements, image_summaries):\n", + " doc = Document(\n", + " page_content = s,\n", + " metadata = {\n", + " 'id': i,\n", + " 'type': 'image',\n", + " 'original_content': e\n", + " }\n", + " )\n", + " retrieve_contents.append((i, s))\n", + " documents.append(doc)\n", + "\n", + "vectorstore = FAISS.from_documents(documents=documents, embedding=OpenAIEmbeddings(openai_api_key=openai_api_key))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "1CSGuXB0qLr6", + "outputId": "a0eca2e1-276f-45c6-8f4c-f91e85bb6b08" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.10/dist-packages/langchain_core/_api/deprecation.py:119: LangChainDeprecationWarning: The class `OpenAIEmbeddings` was deprecated in LangChain 0.0.9 and will be removed in 0.3.0. An updated version of the class exists in the langchain-openai package and should be used instead. To use it run `pip install -U langchain-openai` and import as `from langchain_openai import OpenAIEmbeddings`.\n", + " warn_deprecated(\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "vectorstore.save_local(\"faiss_index\")" + ], + "metadata": { + "id": "knuEyEuEqWCX" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)" + ], + "metadata": { + "id": "mo02V-9iqaCk" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "db = FAISS.load_local(\"faiss_index\", embeddings, allow_dangerous_deserialization=True)" + ], + "metadata": { + "id": "-0gXkQfeqdXl" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "prompt_template = \"\"\"\n", + "You are a vet doctor and an expert in analyzing dog's health.\n", + "Answer the question based only on the following context, which can include text, images and tables:\n", + "{context}\n", + "Question: {question}\n", + "Don't answer if you are not sure and decline to answer and say \"Sorry, I don't have much information about it.\"\n", + "Just return the helpful answer in as much as detailed possible.\n", + "Answer:\n", + "\"\"\"" + ], + "metadata": { + "id": "KE8NxwZGqghn" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "qa_chain = LLMChain(llm=ChatOpenAI(model=\"gpt-3.5-turbo\", openai_api_key = openai_api_key, max_tokens=1024),\n", + " prompt=PromptTemplate.from_template(prompt_template))" + ], + "metadata": { + "id": "whZy4wEsq0qL" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "def answer(question):\n", + " relevant_docs = db.similarity_search(question)\n", + " context = \"\"\n", + " relevant_images = []\n", + " for d in relevant_docs:\n", + " if d.metadata['type'] == 'text':\n", + " context += '[text]' + d.metadata['original_content']\n", + " elif d.metadata['type'] == 'table':\n", + " context += '[table]' + d.metadata['original_content']\n", + " elif d.metadata['type'] == 'image':\n", + " context += '[image]' + d.page_content\n", + " relevant_images.append(d.metadata['original_content'])\n", + " result = qa_chain.run({'context': context, 'question': question})\n", + " return result, relevant_images" + ], + "metadata": { + "id": "e0io5g9wq6mc" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "result, relevant_images = answer(\"What is Gingivitis?\")\n", + "print(result)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "yEjX2vJ4q-N8", + "outputId": "b9efacf8-2072-4de9-bb3f-1bf9dca20fbb" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Gingivitis is a common form of periodontal disease in dogs that is characterized by inflammation of the gums. It is typically caused by the accumulation of plaque along the gumline, leading to irritation and redness of the gums. If left untreated, gingivitis can progress to more severe periodontal disease, potentially causing pain, infection, abscesses, and tooth loss in dogs. Regular dental care and professional cleanings can help prevent and treat gingivitis in dogs.\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "relevant_images[0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "WsdgCJ81rKKc", + "outputId": "14497f09-848b-4d04-d7af-a18f2168e460" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[ 3.6185e-01, -4.6712e-01, 2.4411e-01, -4.8606e-02, -1.6717e-01,\n", + " 6.5295e-02, -5.1602e-01, 5.0900e-01, -5.4800e-01, 9.4658e-02,\n", + " 2.7083e-01, 1.2258e-01, 6.4133e-01, 4.8218e-02, 2.2932e-01,\n", + " 5.7688e-01, 4.1314e-01, 2.6177e-01, 1.5845e-01, -1.0288e-01,\n", + " -5.6589e-01, 9.7690e-02, -9.5719e-02, -5.4196e-01, -8.9830e-02,\n", + " -1.8106e-01, -3.2562e-01, 4.0886e-03, 3.2479e-01, -2.3416e-01,\n", + " 8.3923e-02, -1.5671e-01, 3.1429e-01, -3.6439e-01, -4.5929e-01,\n", + " -3.2258e-02, -4.6586e-01, 9.2936e-02, -3.4521e-01, -4.3290e-01,\n", + " 8.5310e-03, 4.0394e-01, -1.4704e-01, -3.7642e-03, 4.1868e-02,\n", + " -6.7429e-01, 2.7983e-01, 2.8713e-01, 5.7737e-02, 3.0883e-01,\n", + " 6.7217e-02, -1.2114e-01, 3.0623e-01, 1.8967e-01, 7.7659e-01,\n", + " 5.2437e-01, -1.3684e-01, 1.1687e-01, -1.1939e-01, 2.0355e-01,\n", + " 2.5328e-01, 4.5170e-01, 2.9367e-01, -1.2723e-01, -2.1186e-01,\n", + " 2.1462e-01, 8.2521e-02, -3.8977e-01, -6.0487e-01, -1.9074e-01,\n", + " 6.7867e-02, 3.9730e-01, 2.2539e-02, -4.1650e-01, 2.7921e-01,\n", + " -3.6924e-01, 5.6222e-02, -1.3669e-01, -6.5048e-01, -3.2304e-01,\n", + " 7.0305e-02, -2.1993e-01, 2.7029e-01, -4.4334e-01, 2.6540e-01,\n", + " 1.9334e-01, 2.6775e-01, -2.4113e-01, -3.3366e-01, 2.2775e-01,\n", + " 1.3904e-01, -7.0636e-02, -6.8353e+00, 4.9474e-01, 2.6518e-01,\n", + " 1.8065e-01, 1.9837e-01, -1.9936e-01, 8.0931e-02, -8.5262e-01,\n", + " 1.3744e-01, -2.2392e-01, 2.0399e-01, 3.2329e-01, 7.5778e-01,\n", + " 5.9972e-01, -1.7393e+00, -4.4645e-02, -1.3621e-01, -3.7323e-01,\n", + " 3.9036e-01, -8.6416e-02, -5.5597e-01, -1.6553e-01, 4.1363e-02,\n", + " -1.3578e-01, 2.9941e-01, 1.6996e-01, 1.0469e-01, 3.4463e-01,\n", + " -7.1870e-02, 5.0068e-01, -1.6179e-01, 1.7798e-02, 2.2629e-01,\n", + " -2.0800e-01, -3.2106e-01, -1.4083e-02, 1.6722e-03, 2.6544e-01,\n", + " 4.5693e-01, 2.2569e-02, -1.8524e-01, 9.7431e-01, -1.4435e-01,\n", + " 4.7202e-02, 2.3754e-01, -3.9043e-01, -2.8887e-01, 2.6766e-01,\n", + " 1.1174e-01, -4.8231e-01, 3.0416e-02, -3.1721e-02, 4.8764e-01,\n", + " 5.0944e-01, -4.4235e-01, 2.8234e-01, -1.2598e-01, 3.8505e-01,\n", + " -9.9411e-02, -3.0527e-01, 1.1580e+00, -3.2163e-02, -2.1095e-01,\n", + " 1.0405e-01, 1.1398e-01, -1.7672e-01, 2.0804e-01, -2.1670e-01,\n", + " 4.7424e-01, -2.1490e-01, -1.6807e-01, -2.9787e-01, 1.5797e-01,\n", + " -1.7867e-01, -1.6878e-01, -1.4998e-01, 2.5526e-01, 1.3510e-01,\n", + " 2.1072e-01, 1.1379e-01, 8.6206e-01, -4.5415e-01, -1.1472e-01,\n", + " 1.3049e-01, 8.8761e-02, 4.4268e-01, -1.3360e-01, -1.2680e-01,\n", + " 6.4324e-01, -5.2249e-03, -2.5188e-01, -5.0668e-02, -5.0632e-02,\n", + " -2.2417e-01, 1.2415e-01, -4.1368e-01, -3.4318e-02, -2.5705e-03,\n", + " 3.6670e-01, -6.1327e-01, -2.9426e-01, 1.8461e-01, 4.7694e-01,\n", + " -1.3736e-01, 4.0246e-01, 2.2985e-01, -3.1189e-02, 3.2435e-01,\n", + " 5.3831e-01, -3.8268e-01, 5.1656e-01, 3.4240e-01, -3.8675e-01,\n", + " -2.6261e-02, -2.8718e-01, 2.7705e-02, 1.5306e-01, -7.2117e-01,\n", + " 1.7272e-01, 4.3873e-01, 1.3557e-01, 3.2687e-01, 4.5406e-01,\n", + " -2.2328e-01, 1.6284e-02, -1.6458e-02, 7.4136e-01, 1.3565e-01,\n", + " 5.2914e-01, -2.7179e-01, 3.5631e-01, 4.2685e-01, 8.5968e-01,\n", + " 9.7644e-03, 5.7529e-03, -1.8777e-01, 1.0999e-01, 2.8140e-01,\n", + " 9.9228e-02, -1.2107e-01, -2.2310e-01, -3.7565e-01, 6.6953e-01,\n", + " -1.5211e-01, 2.0613e-01, 9.0670e-02, -8.3060e-02, -1.3523e-01,\n", + " -4.4227e-01, -1.2992e-01, 1.5452e-01, 5.6156e-02, -5.6824e-01,\n", + " -5.9143e-01, -2.4695e-01, -1.2869e-02, 4.1085e-02, -2.1610e-01,\n", + " -1.2199e-01, -4.3543e-02, -7.2858e-01, -2.8886e-01, -8.7512e-03,\n", + " 2.6961e-01, -4.5799e-02, -6.0398e-02, 1.2601e+00, -1.1643e-01,\n", + " 1.3092e-01, -2.0428e-01, 7.2349e-02, -4.2915e-01, -1.9958e-01,\n", + " 5.4367e-01, -1.3689e-01, -9.6674e-02, 4.3693e-01, 9.9267e-02,\n", + " 8.7140e-01, -1.1392e-01, -7.8148e-02, 1.9134e-01, -1.0157e-01,\n", + " -1.1086e-01, -3.9150e-02, 5.6079e-02, -8.6805e-02, 1.8963e-01,\n", + " -1.5199e-01, -1.0811e-01, -4.9042e-01, 1.9550e-01, 3.1658e-01,\n", + " -1.2335e-01, -9.3311e-01, -1.9174e-01, 2.1807e-01, -3.6038e-01,\n", + " 4.5253e-02, -2.2686e-01, -2.1629e-01, 3.5251e-01, 3.3726e-01,\n", + " -1.2792e-01, -3.5591e-01, 7.3137e-01, -3.9616e-01, -2.3776e-01,\n", + " 1.1356e-01, -3.2465e-01, 7.6494e-02, 6.7168e-01, -1.4703e-01,\n", + " 2.8508e-02, 5.7080e-01, -4.4331e-02, -3.5693e-01, -1.7996e-01,\n", + " 5.8549e-02, 7.0144e-02, 9.7515e-01, 3.6202e-01, 2.9607e-01,\n", + " -2.3475e-01, 1.7875e-01, -4.6093e-01, -8.3662e-02, -7.4013e-01,\n", + " 3.0324e-01, -1.0504e+00, -2.9885e-01, -9.0814e-02, 2.0496e-01,\n", + " 2.4822e-01, -2.7535e-01, -1.9239e-01, 3.1790e-01, 4.4154e-01,\n", + " -1.1431e-02, 2.7488e-01, 3.0639e-01, -3.3122e-01, -1.6612e-01,\n", + " 1.2507e-01, -9.9997e-02, -5.9346e-02, -2.7041e-02, 1.8832e-01,\n", + " 4.7559e-01, 1.3700e-01, -1.5360e-01, 3.9649e-01, 5.6510e-01,\n", + " 3.4769e-01, 2.1666e-01, -9.6466e-03, 6.7279e-02, 3.1093e-01,\n", + " 6.4362e-01, 1.1236e-01, 6.9353e-01, -2.7010e-01, -1.5590e-01,\n", + " 2.8015e-01, -1.9501e-01, -2.6107e-01, 4.7186e-01, 1.8072e-01,\n", + " 2.8824e-01, -2.1628e-02, 2.3945e-01, 3.8647e-01, -1.7511e+00,\n", + " 5.7128e-02, 7.0729e-02, 5.6625e-01, 4.6666e-01, 1.3239e-02,\n", + " 1.7304e-01, -5.4237e-02, -6.3014e-02, 1.8336e-01, 1.9355e-02,\n", + " 2.8825e-01, -3.7311e-01, -8.5584e-03, -4.4177e-01, -2.9470e-01,\n", + " -1.9763e-01, 6.2692e-01, 3.2247e-01, -5.8331e-03, -3.8421e-01,\n", + " 5.9431e-02, 4.6097e-01, -7.3442e-02, -2.1268e-01, -1.0943e+00,\n", + " -6.2229e-01, -1.2226e-01, 2.7288e-02, -5.9582e-02, 1.3312e-01,\n", + " -1.2199e-01, 2.2504e-01, -5.5257e-01, -3.9207e-01, 1.0111e-01,\n", + " -2.9270e-02, 2.7582e-01, 8.4565e-02, 2.6065e-01, -7.8157e-02,\n", + " -1.9763e-01, -2.8595e-02, -8.9183e-02, 6.4259e-01, 1.1739e-01,\n", + " -7.2895e-01, -2.5010e-01, -1.4004e-01, 8.9347e-03, -3.0891e-01,\n", + " -1.1972e-01, 1.8893e-02, -4.5874e-02, -2.7905e-01, -4.5884e-01,\n", + " -4.9649e-01, -1.8671e-01, 2.5013e-01, -1.9815e-01, 7.1842e-02,\n", + " -1.6435e-01, 2.6645e-01, 5.4042e-01, -2.8274e+00, 1.7207e-01,\n", + " 7.1083e-01, 1.8806e-01, 8.2478e-01, -5.3171e-02, -1.4366e-01,\n", + " 1.0071e-01, 6.9616e-02, 2.5332e-01, 3.2452e-01, -1.5880e-01,\n", + " -3.3879e-01, 3.1588e-01, 3.0264e-01, 2.3303e-01, 4.2994e-01,\n", + " 2.6757e-01, -3.3439e-01, 2.8412e-01, -5.1480e-02, -1.5445e-01,\n", + " -3.8944e-01, -1.4391e-01, -2.2546e-01, 7.5892e-02, -1.1686e-01,\n", + " 7.2905e-02, 1.2294e-01, 2.4124e-01, -1.8961e-01, 4.9985e-02,\n", + " 6.7798e-02, 1.4188e-02, 4.4635e-01, -1.4716e-01, 7.5583e-01,\n", + " 1.3570e-01, 4.8897e-01, -1.5959e-01, -3.4690e-01, 1.5342e-01,\n", + " 3.9856e-01, -5.5425e-02, 1.8019e-01, 2.0631e-01, -2.1229e-01,\n", + " 2.5961e-01, 5.2037e-01, -2.4126e-02, -2.5550e-01, -3.3437e-01,\n", + " -5.1629e-02, 5.3571e-01, 3.6291e-01, 1.4487e-01, 1.0685e-01,\n", + " 5.0266e-01, 2.3597e-01, -5.6259e-02, 2.2791e-01, -5.9395e-01,\n", + " 2.2912e-02, -2.4185e-01, -1.8971e-01, -3.0720e-01, -2.1480e-01,\n", + " 1.1243e-01, -3.1341e-02, 3.5016e-01, -4.1101e-01, -2.8999e-01,\n", + " 8.8299e-02, -1.5014e-01, -1.1732e-01, -3.5040e-01, 1.4591e-01,\n", + " -2.7121e-01, 1.4426e-01, 1.2414e-01, -2.8283e-01, 1.0849e+00,\n", + " -1.7727e-01, -5.1442e-01]], requires_grad=True)" + ] + }, + "metadata": {}, + "execution_count": 31 + } + ] + }, + { + "cell_type": "code", + "source": [ + "image_tensor = relevant_images[0]" + ], + "metadata": { + "id": "Md7LWHDHrPTN" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n" + ], + "metadata": { + "id": "RWVC7D3MsKe3" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Assuming relevant_images[0] is your tensor\n", + "relevant_image_tensor = relevant_images[0]\n", + "\n", + "# Check the shape of the tensor\n", + "print(\"Shape of the tensor:\", relevant_image_tensor.shape)\n", + "\n", + "# Detach the tensor from the computation graph and convert to a numpy array\n", + "image_np = relevant_image_tensor.detach().numpy()\n", + "\n", + "# Reshape the array to a suitable format for visualization\n", + "# Since the original shape is (1, 512), we can reshape it to (16, 32)\n", + "image_np = image_np.reshape(16, 32)\n", + "\n", + "# Normalize the values to be between 0 and 1 if they are not already\n", + "image_np = (image_np - image_np.min()) / (image_np.max() - image_np.min())\n", + "\n", + "# Display the image\n", + "plt.imshow(image_np, cmap='gray')\n", + "plt.axis('off')\n", + "plt.show()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 302 + }, + "id": "Kt12P_6TsgsI", + "outputId": "02a3f450-e114-4a9f-e1d6-3fe2ba6cd0dd" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Shape of the tensor: torch.Size([1, 512])\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAgMAAAEMCAYAAABZZbUfAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAANbElEQVR4nO3dyW4cZJsF4K/ieCrPMXEcQkgCYoGCWCCxYMWlcFXcCjvuAomQQEBJsIWHeIrnofoC/u6m9B11e/E+z7qOju0aclKbdzAajUYNACjrzm3/AADA7TIGAKA4YwAAijMGAKA4YwAAijMGAKA4YwAAijMGAKA4YwAAirs77gPfvXvXXbKxsdGdba21tbW17uzp6WnUfXV11Z2dmZmJund3d7uzz549i7pfv37dnb1//37UPTk52Z1Nnq/WWlteXu7OJs9Xa61NTU11Z+fm5qLuy8vLW8m21tr19XV39uTkJOo+Ojrqzqav84uLi+7s4eFh1L24uNidTZ6v1rLXS/o6Hw6H3dn379/fWvf5+XnU/dVXX/3rY3wzAADFGQMAUJwxAADFGQMAUJwxAADFGQMAUJwxAADFGQMAUJwxAADFGQMAUJwxAADFGQMAUJwxAADFGQMAUNzYJ4xHo1F3ycTERHe2tda2tra6s8mpztayE8izs7NRd3LS9s2bN1H3vXv3urPp6eb9/f3ubHL+uLXsDPE///wTdT958qQ7m57qPjs7686mz3d6Ejfx6NGj7mz6cyfdq6urUff29nZ3Nv1cGwwG3dnkM7G11m5ubrqza2trUXdyujk9YTwO3wwAQHHGAAAUZwwAQHHGAAAUZwwAQHHGAAAUZwwAQHHGAAAUZwwAQHHGAAAUZwwAQHHGAAAUZwwAQHHGAAAUN/YJ4729ve6S5Bxua61dXV11Zw8ODm6tOz0ru7Cw0J1Nz4weHx93Zzc3N6Pu5FTozs5O1J38zVdWVqLu5Oz0cDiMupPfOzk53VprS0tLt9ad/N6p5GdPf+/k/PL8/HzUnXw2pZ+pz58/786mn2uj0ag7m76/x+GbAQAozhgAgOKMAQAozhgAgOKMAQAozhgAgOKMAQAozhgAgOKMAQAozhgAgOKMAQAozhgAgOKMAQAozhgAgOKMAQAo7u64D0zufh8dHXVnW8vuXx8eHkbdT5486c4eHx9H3Xfvjv30/IfkdnZrra2vr3dnV1dXo+6ffvqpO/vtt99G3RcXF93Z8/PzqPvp06fd2fR1vrOz051N79snf7fks6G11gaDQXc2fY9dXl52Z9P3WPJ6mZmZibqT19rk5GTU/eeff3Znt7a2ou6rq6vu7HA4jLrH4ZsBACjOGACA4owBACjOGACA4owBACjOGACA4owBACjOGACA4owBACjOGACA4owBACjOGACA4owBACjOGACA4sa+kZucdl1aWurOttba1NRUd/bRo0dR98nJSXf2iy++iLr/+OOP7mzyN2uttevr6+5scqK0tda+/vrr7mx66jM5K5uerE5ea3fuZLv+2bNn3dnkDHBrrb1586Y7++DBg6g7OeW7vLx8a93pWfjkPfbq1auo++bmpjs7MTERdSdnhNPX+crKSnc2fb7H4ZsBACjOGACA4owBACjOGACA4owBACjOGACA4owBACjOGACA4owBACjOGACA4owBACjOGACA4owBACjOGACA4owBACju7rgPXFpa6i7Z2trqzraW3ZFO7le3lv3ev//+e9SduLi4iPLT09Pd2eRud2ut3bnTv1EvLy+j7vX19e7s+/fvo+7kOZudnY26k3vp8/PzUXfysx8eHkbdDx8+7M5ub29H3clrdWZmJup++fJldzZ9j83NzXVn9/b2ou7ktTo1NRV1T05OdmfT1/k4fDMAAMUZAwBQnDEAAMUZAwBQnDEAAMUZAwBQnDEAAMUZAwBQnDEAAMUZAwBQnDEAAMUZAwBQnDEAAMUZAwBQ3NgnjDc3N7tLkjPArbU2HA67szs7O1F3crYyOcXbWmtnZ2fd2eRMaGvZCeO3b99G3Z988kl3Nj3d/OLFi+7s4uJi1J0839fX11H3xMREd/bVq1dRd/J8p6eb9/f3u7Ppeyz5XEzPJyfv7/SU7+rqapRPJCeMk38DW8teL5999lnUPQ7fDABAccYAABRnDABAccYAABRnDABAccYAABRnDABAccYAABRnDABAccYAABRnDABAccYAABRnDABAccYAABRnDABAcYPRaDQa54EfPnzoLvn777+7s6n0vn1yLz29+5387GdnZ1H3zc1Nd/by8jLqfvbsWXd2Y2Mj6k5ujl9fX0fdyfOdPF+ttXZ6etqdTd4jrbU2HA67s1dXV1H37u5ud3bMj87/UfJam56ejrrv3On/f2D62ZJ0T0xMRN3b29vd2cXFxah7ZmamO5u+x548efKvj/HNAAAUZwwAQHHGAAAUZwwAQHHGAAAUZwwAQHHGAAAUZwwAQHHGAAAUZwwAQHHGAAAUZwwAQHHGAAAUZwwAQHF3x33g27dv/y9/jv/Vl19+2Z395Zdfou7BYNCd3d/fj7qTs5XJSdrWshOpjx8/jrqTM8TJedTWst/75OQk6v7000+7s3/99VfUnZzTTc+rHh8fR/nE8vJydzY9p5ucX05PGCd/8/Q9lnSvra1F3cl59IODg6g7+Td0fX096h6HbwYAoDhjAACKMwYAoDhjAACKMwYAoDhjAACKMwYAoDhjAACKMwYAoDhjAACKMwYAoDhjAACKMwYAoDhjAACKMwYAoLi74z4wuVm+u7vbnW2ttZcvX3Znr6+vo+7k9va9e/ei7tFo1J1Nb8yfn593Z6empqLus7Oz7uzk5GTUfXJy0p09PDyMupPn++OPP466NzY2urOXl5dR94MHD7qzOzs7UXfyfKev85mZme7s1dVV1P355593Z9PP88XFxe7s3t5e1L2wsNCdTV4rrbX26NGj7uz29nbUPQ7fDABAccYAABRnDABAccYAABRnDABAccYAABRnDABAccYAABRnDABAccYAABRnDABAccYAABRnDABAccYAABRnDABAcXfHfeCdO/27YWtrqzubdk9PT0fds7Oz3dnr6+uoO7ndPTExEXWvra11Z09PT6Pu+fn57mzyfLXW2mAwiPKJs7Oz7mx65z2Rvs53d3e7szMzM1H3yspKd/b4+DjqTt4ny8vLUfeHDx+ifOLm5qY7m36eJ6/V9fX1qHtubq47OzU1FXWPwzcDAFCcMQAAxRkDAFCcMQAAxRkDAFCcMQAAxRkDAFCcMQAAxRkDAFCcMQAAxRkDAFCcMQAAxRkDAFCcMQAAxY19wnhhYaG75OHDh93Z1lq7e3fsH/M/JOdwW2ttf3+/O5uc6mwtO5l5cnISdY9Go+7swcFB1J2cpU1PN//888/d2e+++y7qTs4nLy4uRt3D4bA7e3h4GHVPTk52Z5Ozz2k+Paeb/N7pZ8ubN2+6s8kp3tZaOzo66s4m/xa0ln0+JOeuW8s+k6+urqLucfhmAACKMwYAoDhjAACKMwYAoDhjAACKMwYAoDhjAACKMwYAoDhjAACKMwYAoDhjAACKMwYAoDhjAACKMwYAoDhjAACKG/s49OvXr7tL0lvr8/Pz3dm9vb2oO7GxsRHlP3z40J1dXl6Ouqemprqzo9Eo6r6+vu7OJn+z1lr7/vvvu7Obm5tRd3LffjAYRN3JvfTZ2dmo++Liojs7HA6j7uPj4+5s+lpL3mNp99OnT7uz6Wfq6elpd3ZpaSnqvnOn//+/u7u7UffJyUl3NvlsGJdvBgCgOGMAAIozBgCgOGMAAIozBgCgOGMAAIozBgCgOGMAAIozBgCgOGMAAIozBgCgOGMAAIozBgCgOGMAAIobjMa8N/vbb791l9zc3HRnW2vt/Py8O5ucR20tO8ebnldNTj8fHBxE3clZ2rOzs6g7+b3Tc7o//vhjd/aHH36IuldWVrqz+/v7UfdHH33UnX337l3UnZysnpmZibqT93dygri17Jxu+v5OPhfv378fdSd/8/TfkuQ5u7y8jLqTE8jJ+7O11j799NN/fYxvBgCgOGMAAIozBgCgOGMAAIozBgCgOGMAAIozBgCgOGMAAIozBgCgOGMAAIozBgCgOGMAAIozBgCgOGMAAIozBgCguMFozOPSv/76a3dJcq+8tdbOzs66s+m98+R+9uzsbNR9fn7enZ2eno66k1vrExMTUffR0VF3Nv29B4NBdza9tb64uNidTW6lt9ba48ePu7M7OztR99zc3K11X1xcdGeXl5ej7qmpqe5s8h5pLXufJO+R1rKffWlpKepO/j3Y29uLupPP8/Tf0G+++eZfH+ObAQAozhgAgOKMAQAozhgAgOKMAQAozhgAgOKMAQAozhgAgOKMAQAozhgAgOKMAQAozhgAgOKMAQAozhgAgOLu/n+UrK2tRfnkzGhyNrK11sa88PzfWlhYiLpPT09vJdtadn55Y2Mj6l5dXe3Opr93crr56uoq6k5eq8PhMOre3Nzszs7Pz0fd7969686m57KT9+jJyUnUneSTz8TWsvPJh4eHUXfyN0/fY8nr/Pnz51H3ixcvurPp59o4fDMAAMUZAwBQnDEAAMUZAwBQnDEAAMUZAwBQnDEAAMUZAwBQnDEAAMUZAwBQnDEAAMUZAwBQnDEAAMUZAwBQnDEAAMUNRqPR6LZ/CADg9vhmAACKMwYAoDhjAACKMwYAoDhjAACKMwYAoDhjAACKMwYAoDhjAACK+y+mwhg82Q54rwAAAABJRU5ErkJggg==\n" + }, + "metadata": {} + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Assuming relevant_images[0] is your tensor\n", + "relevant_image_tensor = relevant_images[0]\n", + "\n", + "# Check the shape of the tensor\n", + "print(\"Shape of the tensor:\", relevant_image_tensor.shape)\n", + "\n", + "# Detach the tensor from the computation graph and convert to a numpy array\n", + "image_np = relevant_image_tensor.detach().numpy()\n", + "\n", + "# Reshape the array to a suitable format for visualization\n", + "# Since the original shape is (1, 512), we can reshape it to (16, 32)\n", + "image_np = image_np.reshape(16, 32)\n", + "\n", + "# Normalize the values to be between 0 and 1 if they are not already\n", + "# Ensure that the range is correct based on the expected data range\n", + "image_np = (image_np - image_np.min()) / (image_np.max() - image_np.min())\n", + "\n", + "# Display the image using grayscale colormap\n", + "plt.imshow(image_np, cmap='gray')\n", + "\n", + "# Remove axis\n", + "plt.axis('off')\n", + "\n", + "# Show the plot\n", + "plt.show()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 302 + }, + "id": "1Kn8IODXssyV", + "outputId": "0790c56b-f1a5-4fb0-f656-a5ed3c014188" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Shape of the tensor: torch.Size([1, 512])\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAgMAAAEMCAYAAABZZbUfAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAANbElEQVR4nO3dyW4cZJsF4K/ieCrPMXEcQkgCYoGCWCCxYMWlcFXcCjvuAomQQEBJsIWHeIrnofoC/u6m9B11e/E+z7qOju0aclKbdzAajUYNACjrzm3/AADA7TIGAKA4YwAAijMGAKA4YwAAijMGAKA4YwAAijMGAKA4YwAAirs77gPfvXvXXbKxsdGdba21tbW17uzp6WnUfXV11Z2dmZmJund3d7uzz549i7pfv37dnb1//37UPTk52Z1Nnq/WWlteXu7OJs9Xa61NTU11Z+fm5qLuy8vLW8m21tr19XV39uTkJOo+Ojrqzqav84uLi+7s4eFh1L24uNidTZ6v1rLXS/o6Hw6H3dn379/fWvf5+XnU/dVXX/3rY3wzAADFGQMAUJwxAADFGQMAUJwxAADFGQMAUJwxAADFGQMAUJwxAADFGQMAUJwxAADFGQMAUJwxAADFGQMAUNzYJ4xHo1F3ycTERHe2tda2tra6s8mpztayE8izs7NRd3LS9s2bN1H3vXv3urPp6eb9/f3ubHL+uLXsDPE///wTdT958qQ7m57qPjs7686mz3d6Ejfx6NGj7mz6cyfdq6urUff29nZ3Nv1cGwwG3dnkM7G11m5ubrqza2trUXdyujk9YTwO3wwAQHHGAAAUZwwAQHHGAAAUZwwAQHHGAAAUZwwAQHHGAAAUZwwAQHHGAAAUZwwAQHHGAAAUZwwAQHHGAAAUN/YJ4729ve6S5Bxua61dXV11Zw8ODm6tOz0ru7Cw0J1Nz4weHx93Zzc3N6Pu5FTozs5O1J38zVdWVqLu5Oz0cDiMupPfOzk53VprS0tLt9ad/N6p5GdPf+/k/PL8/HzUnXw2pZ+pz58/786mn2uj0ag7m76/x+GbAQAozhgAgOKMAQAozhgAgOKMAQAozhgAgOKMAQAozhgAgOKMAQAozhgAgOKMAQAozhgAgOKMAQAozhgAgOKMAQAo7u64D0zufh8dHXVnW8vuXx8eHkbdT5486c4eHx9H3Xfvjv30/IfkdnZrra2vr3dnV1dXo+6ffvqpO/vtt99G3RcXF93Z8/PzqPvp06fd2fR1vrOz051N79snf7fks6G11gaDQXc2fY9dXl52Z9P3WPJ6mZmZibqT19rk5GTU/eeff3Znt7a2ou6rq6vu7HA4jLrH4ZsBACjOGACA4owBACjOGACA4owBACjOGACA4owBACjOGACA4owBACjOGACA4owBACjOGACA4owBACjOGACA4sa+kZucdl1aWurOttba1NRUd/bRo0dR98nJSXf2iy++iLr/+OOP7mzyN2uttevr6+5scqK0tda+/vrr7mx66jM5K5uerE5ea3fuZLv+2bNn3dnkDHBrrb1586Y7++DBg6g7OeW7vLx8a93pWfjkPfbq1auo++bmpjs7MTERdSdnhNPX+crKSnc2fb7H4ZsBACjOGACA4owBACjOGACA4owBACjOGACA4owBACjOGACA4owBACjOGACA4owBACjOGACA4owBACjOGACA4owBACju7rgPXFpa6i7Z2trqzraW3ZFO7le3lv3ev//+e9SduLi4iPLT09Pd2eRud2ut3bnTv1EvLy+j7vX19e7s+/fvo+7kOZudnY26k3vp8/PzUXfysx8eHkbdDx8+7M5ub29H3clrdWZmJup++fJldzZ9j83NzXVn9/b2ou7ktTo1NRV1T05OdmfT1/k4fDMAAMUZAwBQnDEAAMUZAwBQnDEAAMUZAwBQnDEAAMUZAwBQnDEAAMUZAwBQnDEAAMUZAwBQnDEAAMUZAwBQ3NgnjDc3N7tLkjPArbU2HA67szs7O1F3crYyOcXbWmtnZ2fd2eRMaGvZCeO3b99G3Z988kl3Nj3d/OLFi+7s4uJi1J0839fX11H3xMREd/bVq1dRd/J8p6eb9/f3u7Ppeyz5XEzPJyfv7/SU7+rqapRPJCeMk38DW8teL5999lnUPQ7fDABAccYAABRnDABAccYAABRnDABAccYAABRnDABAccYAABRnDABAccYAABRnDABAccYAABRnDABAccYAABRnDABAcYPRaDQa54EfPnzoLvn777+7s6n0vn1yLz29+5387GdnZ1H3zc1Nd/by8jLqfvbsWXd2Y2Mj6k5ujl9fX0fdyfOdPF+ttXZ6etqdTd4jrbU2HA67s1dXV1H37u5ud3bMj87/UfJam56ejrrv3On/f2D62ZJ0T0xMRN3b29vd2cXFxah7ZmamO5u+x548efKvj/HNAAAUZwwAQHHGAAAUZwwAQHHGAAAUZwwAQHHGAAAUZwwAQHHGAAAUZwwAQHHGAAAUZwwAQHHGAAAUZwwAQHF3x33g27dv/y9/jv/Vl19+2Z395Zdfou7BYNCd3d/fj7qTs5XJSdrWshOpjx8/jrqTM8TJedTWst/75OQk6v7000+7s3/99VfUnZzTTc+rHh8fR/nE8vJydzY9p5ucX05PGCd/8/Q9lnSvra1F3cl59IODg6g7+Td0fX096h6HbwYAoDhjAACKMwYAoDhjAACKMwYAoDhjAACKMwYAoDhjAACKMwYAoDhjAACKMwYAoDhjAACKMwYAoDhjAACKMwYAoLi74z4wuVm+u7vbnW2ttZcvX3Znr6+vo+7k9va9e/ei7tFo1J1Nb8yfn593Z6empqLus7Oz7uzk5GTUfXJy0p09PDyMupPn++OPP466NzY2urOXl5dR94MHD7qzOzs7UXfyfKev85mZme7s1dVV1P355593Z9PP88XFxe7s3t5e1L2wsNCdTV4rrbX26NGj7uz29nbUPQ7fDABAccYAABRnDABAccYAABRnDABAccYAABRnDABAccYAABRnDABAccYAABRnDABAccYAABRnDABAccYAABRnDABAcXfHfeCdO/27YWtrqzubdk9PT0fds7Oz3dnr6+uoO7ndPTExEXWvra11Z09PT6Pu+fn57mzyfLXW2mAwiPKJs7Oz7mx65z2Rvs53d3e7szMzM1H3yspKd/b4+DjqTt4ny8vLUfeHDx+ifOLm5qY7m36eJ6/V9fX1qHtubq47OzU1FXWPwzcDAFCcMQAAxRkDAFCcMQAAxRkDAFCcMQAAxRkDAFCcMQAAxRkDAFCcMQAAxRkDAFCcMQAAxRkDAFCcMQAAxY19wnhhYaG75OHDh93Z1lq7e3fsH/M/JOdwW2ttf3+/O5uc6mwtO5l5cnISdY9Go+7swcFB1J2cpU1PN//888/d2e+++y7qTs4nLy4uRt3D4bA7e3h4GHVPTk52Z5Ozz2k+Paeb/N7pZ8ubN2+6s8kp3tZaOzo66s4m/xa0ln0+JOeuW8s+k6+urqLucfhmAACKMwYAoDhjAACKMwYAoDhjAACKMwYAoDhjAACKMwYAoDhjAACKMwYAoDhjAACKMwYAoDhjAACKMwYAoDhjAACKG/s49OvXr7tL0lvr8/Pz3dm9vb2oO7GxsRHlP3z40J1dXl6Ouqemprqzo9Eo6r6+vu7OJn+z1lr7/vvvu7Obm5tRd3LffjAYRN3JvfTZ2dmo++Liojs7HA6j7uPj4+5s+lpL3mNp99OnT7uz6Wfq6elpd3ZpaSnqvnOn//+/u7u7UffJyUl3NvlsGJdvBgCgOGMAAIozBgCgOGMAAIozBgCgOGMAAIozBgCgOGMAAIozBgCgOGMAAIozBgCgOGMAAIozBgCgOGMAAIobjMa8N/vbb791l9zc3HRnW2vt/Py8O5ucR20tO8ebnldNTj8fHBxE3clZ2rOzs6g7+b3Tc7o//vhjd/aHH36IuldWVrqz+/v7UfdHH33UnX337l3UnZysnpmZibqT93dygri17Jxu+v5OPhfv378fdSd/8/TfkuQ5u7y8jLqTE8jJ+7O11j799NN/fYxvBgCgOGMAAIozBgCgOGMAAIozBgCgOGMAAIozBgCgOGMAAIozBgCgOGMAAIozBgCgOGMAAIozBgCgOGMAAIozBgCguMFozOPSv/76a3dJcq+8tdbOzs66s+m98+R+9uzsbNR9fn7enZ2eno66k1vrExMTUffR0VF3Nv29B4NBdza9tb64uNidTW6lt9ba48ePu7M7OztR99zc3K11X1xcdGeXl5ej7qmpqe5s8h5pLXufJO+R1rKffWlpKepO/j3Y29uLupPP8/Tf0G+++eZfH+ObAQAozhgAgOKMAQAozhgAgOKMAQAozhgAgOKMAQAozhgAgOKMAQAozhgAgOKMAQAozhgAgOKMAQAozhgAgOLu/n+UrK2tRfnkzGhyNrK11sa88PzfWlhYiLpPT09vJdtadn55Y2Mj6l5dXe3Opr93crr56uoq6k5eq8PhMOre3Nzszs7Pz0fd7969686m57KT9+jJyUnUneSTz8TWsvPJh4eHUXfyN0/fY8nr/Pnz51H3ixcvurPp59o4fDMAAMUZAwBQnDEAAMUZAwBQnDEAAMUZAwBQnDEAAMUZAwBQnDEAAMUZAwBQnDEAAMUZAwBQnDEAAMUZAwBQnDEAAMUNRqPR6LZ/CADg9vhmAACKMwYAoDhjAACKMwYAoDhjAACKMwYAoDhjAACKMwYAoDhjAACK+y+mwhg82Q54rwAAAABJRU5ErkJggg==\n" + }, + "metadata": {} + } + ] + } + ] +} \ No newline at end of file