{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "authorship_tag": "ABX9TyMPh4RbxOzA/0Wh6s+3gc9P", "include_colab_link": true }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "7a469b6821ed458d99a1ed57e72b3d68": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_8c556c8c8ce941c6b433780fd4a6ae54", "IPY_MODEL_626b1ba98c374987913a7a4384f19fa1", "IPY_MODEL_a4fad4d11a8941f8b90abb3099e9a090" ], "layout": "IPY_MODEL_c3a4b958e4814294801495226697bce2" } }, "8c556c8c8ce941c6b433780fd4a6ae54": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_2e939db189424ab7b5f9095932f2c99f", "placeholder": "​", "style": "IPY_MODEL_fd6a36e947ec451a938d266117dab12e", "value": "Parsing nodes: 100%" } }, "626b1ba98c374987913a7a4384f19fa1": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_e4413564a300469d86c3abc567f24701", "max": 14, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_64167ae99cd24c729435aefc1ea13519", "value": 14 } }, "a4fad4d11a8941f8b90abb3099e9a090": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_2634e510d3c844d88891a98661beb6a9", "placeholder": "​", "style": "IPY_MODEL_6b3d2afb949f4de691ceac601bd96d0e", "value": " 14/14 [00:00<00:00, 34.02it/s]" } }, "c3a4b958e4814294801495226697bce2": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "2e939db189424ab7b5f9095932f2c99f": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "fd6a36e947ec451a938d266117dab12e": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "e4413564a300469d86c3abc567f24701": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "64167ae99cd24c729435aefc1ea13519": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "2634e510d3c844d88891a98661beb6a9": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "6b3d2afb949f4de691ceac601bd96d0e": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "8cc800fbe6bc4f4da5dd6b93d4a5143a": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_812d5d9b04f74592b850b3eb32f88c04", "IPY_MODEL_ed22c91e813c4351ab1d3eb7e174796c", "IPY_MODEL_de2088a425104f05b52b7a3236c7baa9" ], "layout": "IPY_MODEL_6f9f666836084de7894aa2e65c8dbe07" } }, "812d5d9b04f74592b850b3eb32f88c04": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_63a3dcff335349deacf4abb9b68d76ab", "placeholder": "​", "style": "IPY_MODEL_99eb83f4b8904e20b45573bab84aa5f4", "value": "Generating embeddings: 100%" } }, "ed22c91e813c4351ab1d3eb7e174796c": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_2c8aef5e8ec848c0a23c72581e5f4b1e", "max": 108, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_7d54abb8f3784a789fd042c2ed2dd685", "value": 108 } }, "de2088a425104f05b52b7a3236c7baa9": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_a1a88448b188407b8e4aa2af86fb9345", "placeholder": "​", "style": "IPY_MODEL_6a4cc229f5774cb0b4d3def7eee8b56e", "value": " 108/108 [00:04<00:00, 22.53it/s]" } }, "6f9f666836084de7894aa2e65c8dbe07": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "63a3dcff335349deacf4abb9b68d76ab": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "99eb83f4b8904e20b45573bab84aa5f4": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "2c8aef5e8ec848c0a23c72581e5f4b1e": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "7d54abb8f3784a789fd042c2ed2dd685": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "a1a88448b188407b8e4aa2af86fb9345": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "6a4cc229f5774cb0b4d3def7eee8b56e": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "b10233c49dcc4a2f89de5389309d4fb4": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_c617a0bc420b453693bb697a235e50d7", "IPY_MODEL_f14f74d98f824013b562c82fb251ac26", "IPY_MODEL_19f8baa6c24e4c7a8888f73f3cb7e3f8" ], "layout": "IPY_MODEL_19c0bf2b745640b3adf6478738ba02ea" } }, "c617a0bc420b453693bb697a235e50d7": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_0258a4a4bdc24404aa005c3b4d1235ee", "placeholder": "​", "style": "IPY_MODEL_8da878f475de494fac3f7acf29e4e7f0", "value": "Parsing nodes: 100%" } }, "f14f74d98f824013b562c82fb251ac26": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_dc5b9ea6aeea42dfae978e4a8961b03a", "max": 14, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_aefce46940904fce9c4e439784cbc28c", "value": 14 } }, "19f8baa6c24e4c7a8888f73f3cb7e3f8": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_dcfeadeb1cc2483399e8194ec43f2eee", "placeholder": "​", "style": "IPY_MODEL_7cec42608a51413796ec41250e0eed6d", "value": " 14/14 [00:00<00:00, 22.42it/s]" } }, "19c0bf2b745640b3adf6478738ba02ea": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "0258a4a4bdc24404aa005c3b4d1235ee": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "8da878f475de494fac3f7acf29e4e7f0": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "dc5b9ea6aeea42dfae978e4a8961b03a": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "aefce46940904fce9c4e439784cbc28c": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "dcfeadeb1cc2483399e8194ec43f2eee": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "7cec42608a51413796ec41250e0eed6d": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "036ae37776684a46a1a1f9e3c018a87e": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_de5e18d6629d4cd0abf9e5c72d07ac73", "IPY_MODEL_29ff5f2d9c114e8bb1b7461dbae2fdb8", "IPY_MODEL_0f79e4f5836f4ebf80af47c8e100b012" ], "layout": "IPY_MODEL_99a5712bb6b64f68b30b9a1dbbc803fb" } }, "de5e18d6629d4cd0abf9e5c72d07ac73": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_24fe3fb4e04546b3a17377d3e6ff61d6", "placeholder": "​", "style": "IPY_MODEL_931b9be975234aa79ae55aa12629f661", "value": "Generating embeddings: 100%" } }, "29ff5f2d9c114e8bb1b7461dbae2fdb8": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_63bf1ccee3ad4101920f74bb2410bfe6", "max": 94, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_8f353fecd64a4e18be6fe2eb4fea3f9d", "value": 94 } }, "0f79e4f5836f4ebf80af47c8e100b012": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_d190edde40f04461ba066bc7f10b9d31", "placeholder": "​", "style": "IPY_MODEL_3114d5176097487bb1313cd49867680f", "value": " 94/94 [00:13<00:00, 8.05it/s]" } }, "99a5712bb6b64f68b30b9a1dbbc803fb": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "24fe3fb4e04546b3a17377d3e6ff61d6": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "931b9be975234aa79ae55aa12629f661": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "63bf1ccee3ad4101920f74bb2410bfe6": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "8f353fecd64a4e18be6fe2eb4fea3f9d": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "d190edde40f04461ba066bc7f10b9d31": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "3114d5176097487bb1313cd49867680f": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } } } } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "\"Open" ] }, { "cell_type": "markdown", "source": [ "# Install Packages and Setup Variables" ], "metadata": { "id": "-zE1h0uQV7uT" } }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "QPJzr-I9XQ7l", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "34a040a3-c044-4348-ef4c-d8cc61364c90" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.7/15.7 MB\u001b[0m \u001b[31m27.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m225.4/225.4 kB\u001b[0m \u001b[31m18.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m39.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m508.6/508.6 kB\u001b[0m \u001b[31m29.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.9/79.9 MB\u001b[0m \u001b[31m11.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.7/45.7 kB\u001b[0m \u001b[31m4.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m51.7/51.7 kB\u001b[0m \u001b[31m4.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.9/75.9 kB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.4/2.4 MB\u001b[0m \u001b[31m59.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m92.1/92.1 kB\u001b[0m \u001b[31m8.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m60.7/60.7 kB\u001b[0m \u001b[31m5.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.1/41.1 kB\u001b[0m \u001b[31m3.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.4/5.4 MB\u001b[0m \u001b[31m70.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.8/6.8 MB\u001b[0m \u001b[31m65.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.9/57.9 kB\u001b[0m \u001b[31m6.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m105.6/105.6 kB\u001b[0m \u001b[31m9.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.3/67.3 kB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m698.9/698.9 kB\u001b[0m \u001b[31m41.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m64.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.6/67.6 kB\u001b[0m \u001b[31m7.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m71.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.5/71.5 kB\u001b[0m \u001b[31m7.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.9/76.9 kB\u001b[0m \u001b[31m8.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m4.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.8/50.8 kB\u001b[0m \u001b[31m5.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m341.4/341.4 kB\u001b[0m \u001b[31m30.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.4/3.4 MB\u001b[0m \u001b[31m73.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m59.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m130.2/130.2 kB\u001b[0m \u001b[31m13.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.4/49.4 kB\u001b[0m \u001b[31m5.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m10.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h Building wheel for pypika (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n" ] } ], "source": [ "!pip install -q llama-index==0.9.21 openai==1.6.0 tiktoken==0.5.2 chromadb==0.4.21 kaleido==0.2.1 python-multipart==0.0.6 cohere==4.39" ] }, { "cell_type": "code", "source": [ "import os\n", "\n", "# Set the \"OPENAI_API_KEY\" in the Python environment. Will be used by OpenAI client later.\n", "os.environ[\"OPENAI_API_KEY\"] = \"\"" ], "metadata": { "id": "riuXwpSPcvWC" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "import nest_asyncio\n", "\n", "nest_asyncio.apply()" ], "metadata": { "id": "jIEeZzqLbz0J" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "# Load a Model" ], "metadata": { "id": "Bkgi2OrYzF7q" } }, { "cell_type": "code", "source": [ "from llama_index.llms import OpenAI\n", "\n", "llm = OpenAI(temperature=0.9, model=\"gpt-3.5-turbo\", max_tokens=512)" ], "metadata": { "id": "9oGT6crooSSj" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "# Create a VectoreStore" ], "metadata": { "id": "0BwVuJXlzHVL" } }, { "cell_type": "code", "source": [ "import chromadb\n", "\n", "# create client and a new collection\n", "# chromadb.EphemeralClient saves data in-memory.\n", "chroma_client = chromadb.PersistentClient(path=\"./mini-llama-articles\")\n", "chroma_collection = chroma_client.create_collection(\"mini-llama-articles\")" ], "metadata": { "id": "SQP87lHczHKc" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "from llama_index.vector_stores import ChromaVectorStore\n", "\n", "# Define a storage context object using the created vector database.\n", "vector_store = ChromaVectorStore(chroma_collection=chroma_collection)" ], "metadata": { "id": "zAaGcYMJzHAN" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "# Load the Dataset (CSV)" ], "metadata": { "id": "I9JbAzFcjkpn" } }, { "cell_type": "markdown", "source": [ "## Download" ], "metadata": { "id": "ceveDuYdWCYk" } }, { "cell_type": "markdown", "source": [ "The dataset includes several articles from the TowardsAI blog, which provide an in-depth explanation of the LLaMA2 model. Read the dataset as a long string." ], "metadata": { "id": "eZwf6pv7WFmD" } }, { "cell_type": "code", "source": [ "!wget https://raw.githubusercontent.com/AlaFalaki/tutorial_notebooks/main/data/mini-llama-articles.csv" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "wl_pbPvMlv1h", "outputId": "02651edb-4a76-4bf4-e72f-92219f994292" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "--2024-02-06 19:59:09-- https://raw.githubusercontent.com/AlaFalaki/tutorial_notebooks/main/data/mini-llama-articles.csv\n", "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.111.133, 185.199.110.133, ...\n", "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 173646 (170K) [text/plain]\n", "Saving to: ‘mini-llama-articles.csv’\n", "\n", "\rmini-llama-articles 0%[ ] 0 --.-KB/s \rmini-llama-articles 100%[===================>] 169.58K --.-KB/s in 0.02s \n", "\n", "2024-02-06 19:59:09 (7.18 MB/s) - ‘mini-llama-articles.csv’ saved [173646/173646]\n", "\n" ] } ] }, { "cell_type": "markdown", "source": [ "## Read File" ], "metadata": { "id": "VWBLtDbUWJfA" } }, { "cell_type": "code", "source": [ "import csv\n", "\n", "rows = []\n", "\n", "# Load the file as a JSON\n", "with open(\"./mini-llama-articles.csv\", mode=\"r\", encoding=\"utf-8\") as file:\n", " csv_reader = csv.reader(file)\n", "\n", " for idx, row in enumerate( csv_reader ):\n", " if idx == 0: continue; # Skip header row\n", " rows.append( row )\n", "\n", "# The number of characters in the dataset.\n", "len( rows )" ], "metadata": { "id": "0Q9sxuW0g3Gd", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "b74eb24b-a956-404a-b343-4f961aca883f" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "14" ] }, "metadata": {}, "execution_count": 19 } ] }, { "cell_type": "markdown", "source": [ "# Convert to Document obj" ], "metadata": { "id": "S17g2RYOjmf2" } }, { "cell_type": "code", "source": [ "from llama_index import Document\n", "\n", "# Convert the chunks to Document objects so the LlamaIndex framework can process them.\n", "documents = [Document(text=row[1], metadata={\"title\": row[0], \"url\": row[2], \"source_name\": row[3]}) for row in rows]" ], "metadata": { "id": "YizvmXPejkJE" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "# Transforming" ], "metadata": { "id": "qjuLbmFuWsyl" } }, { "cell_type": "code", "source": [ "from llama_index.text_splitter import TokenTextSplitter\n", "\n", "# Define the splitter object that split the text into segments with 512 tokens,\n", "# with a 128 overlap between the segments.\n", "text_splitter = TokenTextSplitter(\n", " separator=\" \", chunk_size=512, chunk_overlap=128\n", ")" ], "metadata": { "id": "9z3t70DGWsjO" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "from llama_index.extractors import (\n", " SummaryExtractor,\n", " QuestionsAnsweredExtractor,\n", " KeywordExtractor,\n", ")\n", "from llama_index.embeddings import OpenAIEmbedding\n", "from llama_index.ingestion import IngestionPipeline\n", "\n", "# Create the pipeline to apply the transformation on each chunk,\n", "# and store the transformed text in the chroma vector store.\n", "pipeline = IngestionPipeline(\n", " transformations=[\n", " text_splitter,\n", " QuestionsAnsweredExtractor(questions=3, llm=llm),\n", " SummaryExtractor(summaries=[\"prev\", \"self\"], llm=llm),\n", " KeywordExtractor(keywords=10, llm=llm),\n", " OpenAIEmbedding(),\n", " ],\n", " vector_store=vector_store\n", ")\n", "\n", "# Run the transformation pipeline.\n", "nodes = pipeline.run(documents=documents, show_progress=True);" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 385, "referenced_widgets": [ "7a469b6821ed458d99a1ed57e72b3d68", "8c556c8c8ce941c6b433780fd4a6ae54", "626b1ba98c374987913a7a4384f19fa1", "a4fad4d11a8941f8b90abb3099e9a090", "c3a4b958e4814294801495226697bce2", "2e939db189424ab7b5f9095932f2c99f", "fd6a36e947ec451a938d266117dab12e", "e4413564a300469d86c3abc567f24701", "64167ae99cd24c729435aefc1ea13519", "2634e510d3c844d88891a98661beb6a9", "6b3d2afb949f4de691ceac601bd96d0e", "8cc800fbe6bc4f4da5dd6b93d4a5143a", "812d5d9b04f74592b850b3eb32f88c04", "ed22c91e813c4351ab1d3eb7e174796c", "de2088a425104f05b52b7a3236c7baa9", "6f9f666836084de7894aa2e65c8dbe07", "63a3dcff335349deacf4abb9b68d76ab", "99eb83f4b8904e20b45573bab84aa5f4", "2c8aef5e8ec848c0a23c72581e5f4b1e", "7d54abb8f3784a789fd042c2ed2dd685", "a1a88448b188407b8e4aa2af86fb9345", "6a4cc229f5774cb0b4d3def7eee8b56e" ] }, "id": "P9LDJ7o-Wsc-", "outputId": "2e27e965-fd4c-4754-94f5-3a6e33a72dea" }, "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "Parsing nodes: 0%| | 0/14 [00:00>\\nYou are a helpful Question Answering Assistant. Please only answer from this reference Source:8989REF\" However, that turned out to be a very naive attempt. Also, note that the generated QA missed transforming training data related to Professor Thiersch's method to a proper QA dataset. These and other improvements need to be experimented with, as well as to train with some completely new data that the model has not seen to test more effectively. Update: Training with new data was done by writing an imaginary story with ChatGPT help and then creating an instruction tuning data set (colab notebook). The model was then trained and tested (colab notebook) with this generated instruct dataset. The results confirm that the model learns via Instruct tuning, not only the fed questions but other details and relations of the domain. Problems with hallucinations remain (Bordor, Lila characters who are\n", "Score\t 0.7046179109299758\n", "-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_\n", "Node ID\t 6906e3b8-4c42-453c-9b60-9f5e4b1d3304\n", "Title\t LLaMA by Meta leaked by an anonymous forum: Questions Arises on Meta\n", "Text\t LLaMA: Meta's new AI tool According to the official release, LLaMA is a foundational language model developed to assist 'researchers and academics' in their work (as opposed to the average web user) to understand and study these NLP models. Leveraging AI in such a way could give researchers an edge in terms of time spent. You may not know this, but this would be Meta's third LLM after Blender Bot 3 and Galactica. However, the two LLMs were shut down soon, and Meta stopped their further development, as it produced erroneous results. Before moving further, it is important to emphasize that LLaMA is NOT a chatbot like ChatGPT. As I mentioned before, it is a 'research tool' for researchers. We can expect the initial versions of LLaMA to be a bit more technical and indirect to use as opposed to the case with ChatGPT, which was very direct, interactive, and a lot easy to use. \"Smaller, more performant models such as LLaMA enable ... research community who don't have access to large amounts of infrastructure to study these models.. further democratizing access in this important, fast-changing field,\" said Meta in its official blog. Meta's effort of \"democratizing\" access to the public could shed light on one of the critical issues of Generative AI - toxicity and bias. ChatGPT and other LLMs (obviously, I am referring to Bing) have a track record of responding in a way that is toxic and, well... evil. The Verge and major critics have covered it in much detail. Oh and the community did get the access, but not in the way Meta anticipated. On March 3rd, a downloadable torrent of the LLaMA system was posted on 4chan. 4chan is an anonymous online forum known for its controversial content and diverse range of discussions, which has nearly 222 million unique monthly visitors. LLaMA is currently not in use on any of Meta's products. But Meta has plans to make it available to researchers before they can use them in their own products. It's worth mentioning that Meta did not release\n", "Score\t 0.6920492401808848\n", "-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_\n" ] } ] }, { "cell_type": "markdown", "source": [ "From the articles:\n", " \n", "> [...]The 7 billion model of Llama2 has sufficient NLU (Natural Language Understanding) to create output based on a particular format[...]\n", "\n" ], "metadata": { "id": "TmkI8BV8rATi" } }, { "cell_type": "markdown", "source": [ "# No Metadata" ], "metadata": { "id": "6Wx-IPSMbSwC" } }, { "cell_type": "markdown", "source": [ "Now, let's evaluate the ability of the query engine independently of the generated metadata, like keyword extraction or summarization." ], "metadata": { "id": "h8QUcGEgeNsD" } }, { "cell_type": "code", "source": [ "from llama_index import Document\n", "\n", "documents_no_meta = [Document(text=row[1]) for row in rows]" ], "metadata": { "id": "oGunPKGRbT6H" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "from llama_index.embeddings import OpenAIEmbedding\n", "from llama_index.ingestion import IngestionPipeline\n", "\n", "pipeline = IngestionPipeline(\n", " transformations=[\n", " text_splitter,\n", " OpenAIEmbedding(),\n", " ]\n", ")\n", "\n", "nodes_no_meta = pipeline.run(documents=documents_no_meta, show_progress=True)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 331, "referenced_widgets": [ "b10233c49dcc4a2f89de5389309d4fb4", "c617a0bc420b453693bb697a235e50d7", "f14f74d98f824013b562c82fb251ac26", "19f8baa6c24e4c7a8888f73f3cb7e3f8", "19c0bf2b745640b3adf6478738ba02ea", "0258a4a4bdc24404aa005c3b4d1235ee", "8da878f475de494fac3f7acf29e4e7f0", "dc5b9ea6aeea42dfae978e4a8961b03a", "aefce46940904fce9c4e439784cbc28c", "dcfeadeb1cc2483399e8194ec43f2eee", "7cec42608a51413796ec41250e0eed6d", "036ae37776684a46a1a1f9e3c018a87e", "de5e18d6629d4cd0abf9e5c72d07ac73", "29ff5f2d9c114e8bb1b7461dbae2fdb8", "0f79e4f5836f4ebf80af47c8e100b012", "99a5712bb6b64f68b30b9a1dbbc803fb", "24fe3fb4e04546b3a17377d3e6ff61d6", "931b9be975234aa79ae55aa12629f661", "63bf1ccee3ad4101920f74bb2410bfe6", "8f353fecd64a4e18be6fe2eb4fea3f9d", "d190edde40f04461ba066bc7f10b9d31", "3114d5176097487bb1313cd49867680f" ] }, "id": "Hxf4jT6afiZt", "outputId": "48b34670-17cf-494f-9d39-58ae9c47822a" }, "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "Parsing nodes: 0%| | 0/14 [00:00>\\nYou are a helpful Question Answering Assistant. Please only answer from this reference Source:8989REF\" However, that turned out to be a very naive attempt. Also, note that the generated QA missed transforming training data related to Professor Thiersch's method to a proper QA dataset. These and other improvements need to be experimented with, as well as to train with some completely new data that the model has not seen\n", "Score\t 0.8218537826347032\n", "-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_\n", "Node ID\t fb383f04-a605-4e26-87dc-8fd5d6090334\n", "Text\t run the 7 billion Lamma2 pre-trained model open-sourced recently by Meta Research. Imagine the compressed knowledge and an NLU (Natural Language Understanding) model running on your local laptop. This is still a smallish model, but it's still capable of understanding and has sufficient world knowledge embedded in it to be quite useful. Imagine what a model like this or better models in the future could do if it could run in small servers or in cars, and leverage its causal reasoning and world model knowledge to supervise lower-level/specialist AI/ML systems. So we have now a way to fit reasonably large models (7B or more) in a single GPU, via Quantisation and then train them in a parameter-efficient way via LoRa/QLoRa. Take 1: Un-supervised Training Fine-tuning with QLoRa Using the small training data and QLoRA, I first tried to train a large 7B Lamma2 model by feeding in the training text as is (Causal LM model training via UnSupervised learning). Note that this model was loaded in 4-bit, making it runnable on a single T4 GPU and trained with QLoRa. With QLoRA, only a fraction of the adapter weights are trained and summed with the existing frozen pre-trained weights of the model during inference. Here is an illustrative Colab notebook. You can see that training the model with just the text as is, does not result in proper output to questions. The answers are not affected by the training data. Take 2: Instruct Fine-tuning with QLoRa Instruction Tuning concept is a higher-level training concept introduced by this paper FineTuned Language Models Are Zero shot Learners (FLAN) We leverage the intuition that NLP tasks can be described via natural language instructions, such as \"Is the sentiment of this movie review positive or negative?\" or \"Translate 'how are you' into Chinese.\" We take a pre-trained language model of 137B parameters and perform instruction tuning ... Since we use QLoRa we are effectively closely following this paper - QLORA: Efficient Finetuning of Quantized LLMs concerning the training data set, the format that the authors used to train their Gauanco model This is the format for the Llama2 model and will be different for others. One of the hardest problems of training is finding or creating a good quality data set to train. In our case, converting the available training data set to the instruction data set.\n", "Score\t 0.8203676280171278\n", "-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_\n" ] } ] }, { "cell_type": "markdown", "source": [ "# Evaluate" ], "metadata": { "id": "iMkpzH7vvb09" } }, { "cell_type": "code", "source": [ "from llama_index.evaluation import generate_question_context_pairs\n", "from llama_index.llms import OpenAI\n", "\n", "# Create questions for each segment. These questions will be used to\n", "# assess whether the retriever can accurately identify and return the\n", "# corresponding segment when queried.\n", "llm = OpenAI(model=\"gpt-3.5-turbo\")\n", "rag_eval_dataset = generate_question_context_pairs(\n", " nodes,\n", " llm=llm,\n", " num_questions_per_chunk=1\n", ")\n", "\n", "# We can save the evaluation dataset as a json file for later use.\n", "rag_eval_dataset.save_json(\"./rag_eval_dataset.json\")" ], "metadata": { "id": "H8a3eKgKvckU" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "If you have uploaded the generated question JSON file, please uncomment the code in the next cell block. This will avoid the need to generate the questions manually, saving you time and effort." ], "metadata": { "id": "eNP3cmiOe_xS" } }, { "cell_type": "code", "source": [ "# from llama_index.finetuning.embeddings.common import (\n", "# EmbeddingQAFinetuneDataset,\n", "# )\n", "# rag_eval_dataset = EmbeddingQAFinetuneDataset.from_json(\n", "# \"./rag_eval_dataset.json\"\n", "# )" ], "metadata": { "id": "3sA1K84U254o" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "import pandas as pd\n", "\n", "# A simple function to show the evaluation result.\n", "def display_results_retriever(name, eval_results):\n", " \"\"\"Display results from evaluate.\"\"\"\n", "\n", " metric_dicts = []\n", " for eval_result in eval_results:\n", " metric_dict = eval_result.metric_vals_dict\n", " metric_dicts.append(metric_dict)\n", "\n", " full_df = pd.DataFrame(metric_dicts)\n", "\n", " hit_rate = full_df[\"hit_rate\"].mean()\n", " mrr = full_df[\"mrr\"].mean()\n", "\n", " metric_df = pd.DataFrame(\n", " {\"Retriever Name\": [name], \"Hit Rate\": [hit_rate], \"MRR\": [mrr]}\n", " )\n", "\n", " return metric_df" ], "metadata": { "id": "H7ubvcbk27vr" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "from llama_index.evaluation import RetrieverEvaluator\n", "\n", "# We can evaluate the retievers with different top_k values.\n", "for i in [2, 4, 6, 8, 10]:\n", " retriever = index.as_retriever(similarity_top_k=i)\n", " retriever_evaluator = RetrieverEvaluator.from_metric_names(\n", " [\"mrr\", \"hit_rate\"], retriever=retriever\n", " )\n", " eval_results = await retriever_evaluator.aevaluate_dataset(rag_eval_dataset)\n", " print(display_results_retriever(f\"Retriever top_{i}\", eval_results))" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "uNLxDxoc2-Ac", "outputId": "4084d5d0-21b6-4f0e-aec3-4aab1c8c8c44" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ " Retriever Name Hit Rate MRR\n", "0 Retriever top_2 0.650589 0.538049\n", " Retriever Name Hit Rate MRR\n", "0 Retriever top_4 0.765273 0.572615\n", " Retriever Name Hit Rate MRR\n", "0 Retriever top_6 0.81672 0.58278\n", " Retriever Name Hit Rate MRR\n", "0 Retriever top_8 0.846731 0.586084\n", " Retriever Name Hit Rate MRR\n", "0 Retriever top_10 0.861736 0.587795\n" ] } ] }, { "cell_type": "code", "source": [ "from llama_index.evaluation import RelevancyEvaluator, FaithfulnessEvaluator, BatchEvalRunner\n", "from llama_index import ServiceContext\n", "from llama_index.llms import OpenAI\n", "\n", "for i in [2, 4, 6, 8, 10]:\n", " # Set Faithfulness and Relevancy evaluators\n", " query_engine = index.as_query_engine(similarity_top_k=i)\n", "\n", " # While we use GPT3.5-Turbo to answer questions, we can use GPT4 to evaluate the answers.\n", " llm_gpt4 = OpenAI(temperature=0, model=\"gpt-4-1106-preview\")\n", " service_context_gpt4 = ServiceContext.from_defaults(llm=llm_gpt4)\n", "\n", " faithfulness_evaluator = FaithfulnessEvaluator(service_context=service_context_gpt4)\n", " relevancy_evaluator = RelevancyEvaluator(service_context=service_context_gpt4)\n", "\n", " # Run evaluation\n", " queries = list(rag_eval_dataset.queries.values())\n", " batch_eval_queries = queries[:20]\n", "\n", " runner = BatchEvalRunner(\n", " {\"faithfulness\": faithfulness_evaluator, \"relevancy\": relevancy_evaluator},\n", " workers=8,\n", " )\n", " eval_results = await runner.aevaluate_queries(\n", " query_engine, queries=batch_eval_queries\n", " )\n", " faithfulness_score = sum(result.passing for result in eval_results['faithfulness']) / len(eval_results['faithfulness'])\n", " print(f\"top_{i} faithfulness_score: {faithfulness_score}\")\n", "\n", " relevancy_score = sum(result.passing for result in eval_results['faithfulness']) / len(eval_results['relevancy'])\n", " print(f\"top_{i} relevancy_score: {relevancy_score}\")\n", " print(\"-_\"*10)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "3ukkWC9R2_0J", "outputId": "ccde96d4-e431-4f9a-f83c-63678de56a93" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "top_2 faithfulness_score: 1.0\n", "top_2 relevancy_score: 1.0\n", "-_-_-_-_-_-_-_-_-_-_\n", "top_4 faithfulness_score: 1.0\n", "top_4 relevancy_score: 1.0\n", "-_-_-_-_-_-_-_-_-_-_\n", "top_6 faithfulness_score: 1.0\n", "top_6 relevancy_score: 1.0\n", "-_-_-_-_-_-_-_-_-_-_\n", "top_8 faithfulness_score: 1.0\n", "top_8 relevancy_score: 1.0\n", "-_-_-_-_-_-_-_-_-_-_\n", "top_10 faithfulness_score: 1.0\n", "top_10 relevancy_score: 1.0\n", "-_-_-_-_-_-_-_-_-_-_\n" ] } ] }, { "cell_type": "code", "source": [], "metadata": { "id": "1MB1YD1E3EKM" }, "execution_count": null, "outputs": [] } ] }