diff --git "a/finetune.ipynb" "b/finetune.ipynb" new file mode 100644--- /dev/null +++ "b/finetune.ipynb" @@ -0,0 +1,4022 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU", + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "76eba77b0fc9499e9b4015393156153e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_c42123767d4849ffbcdd48f09580ea13", + "IPY_MODEL_e89f07c1f5744328b071f96a3bdc9532", + "IPY_MODEL_da36c71ba5bc469eb32bde730c5038d3" + ], + "layout": "IPY_MODEL_6cc6b7c564fe48f086152c78f8a9a915" + } + }, + "c42123767d4849ffbcdd48f09580ea13": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c6a3adfa76dd4bbd8cc5e75ee28ef3e8", + "placeholder": "​", + "style": "IPY_MODEL_0c65f0e913b74de88ab712d65b9027f7", + "value": "Downloading data files: 100%" + } + }, + "e89f07c1f5744328b071f96a3bdc9532": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b07c7f3b6bc24f10821b88e254e88e43", + "max": 1, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_42ea2834ee074029abe8cf8f9ff79a16", + "value": 1 + } + }, + "da36c71ba5bc469eb32bde730c5038d3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ec4205b0491a4f53ad1cba42689d59c4", + "placeholder": "​", + "style": "IPY_MODEL_f8b9df09c8c54d53a22310e64e30772f", + "value": " 1/1 [00:00<00:00, 39.39it/s]" + } + }, + "6cc6b7c564fe48f086152c78f8a9a915": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c6a3adfa76dd4bbd8cc5e75ee28ef3e8": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0c65f0e913b74de88ab712d65b9027f7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "b07c7f3b6bc24f10821b88e254e88e43": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "42ea2834ee074029abe8cf8f9ff79a16": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "ec4205b0491a4f53ad1cba42689d59c4": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f8b9df09c8c54d53a22310e64e30772f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "e69b8912d3384180842ff0ead6096803": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_550a2ef0612e4d8396462d1b1c43608d", + "IPY_MODEL_f8d82b4d016f46ca8bab1a2fe591861e", + "IPY_MODEL_975d908a1874425198e0d62ae884a3ef" + ], + "layout": "IPY_MODEL_152aa8231b3f4b7db4e6f27bb61065b9" + } + }, + "550a2ef0612e4d8396462d1b1c43608d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_27b48982ae244c66bccd855e7e33a00d", + "placeholder": "​", + "style": "IPY_MODEL_a896cb94d1fc4641ad224c7a9ca619b1", + "value": "Extracting data files: 100%" + } + }, + "f8d82b4d016f46ca8bab1a2fe591861e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_be5d02d7001a4a25a848d6b0c582b42b", + "max": 1, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_609437910022418e83543fe6c3d16501", + "value": 1 + } + }, + "975d908a1874425198e0d62ae884a3ef": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c3b5ef42dae64f95a2ef055db7e50fa8", + "placeholder": "​", + "style": "IPY_MODEL_ee0f0aff0c254b82854d1a6f4b367161", + "value": " 1/1 [00:01<00:00, 1.73s/it]" + } + }, + "152aa8231b3f4b7db4e6f27bb61065b9": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "27b48982ae244c66bccd855e7e33a00d": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a896cb94d1fc4641ad224c7a9ca619b1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "be5d02d7001a4a25a848d6b0c582b42b": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "609437910022418e83543fe6c3d16501": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "c3b5ef42dae64f95a2ef055db7e50fa8": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ee0f0aff0c254b82854d1a6f4b367161": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "fd90085d416440579f409cdd9b29e053": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_fdd55d56775a4641925fc88e43f946b8", + "IPY_MODEL_a54ebe30a12b4c21b67b2f8d43aea2d3", + "IPY_MODEL_6f041f053ff240cbbb52f6d117ebfcf1" + ], + "layout": "IPY_MODEL_b7d5f8aaee0047178661e58aa59433ee" + } + }, + "fdd55d56775a4641925fc88e43f946b8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_563ca6cac1594ba0a7c168ee31ccc982", + "placeholder": "​", + "style": "IPY_MODEL_e10e7830d1404baea72e81e0df295239", + "value": "Generating train split: " + } + }, + "a54ebe30a12b4c21b67b2f8d43aea2d3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "info", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_acc07aa077fd49d38d74e0e10fa3991a", + "max": 1, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_441dfdd43ef0442894c19cad81a478db", + "value": 1 + } + }, + "6f041f053ff240cbbb52f6d117ebfcf1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9efe7ac9ee0946388eeb85f9a4603ffe", + "placeholder": "​", + "style": "IPY_MODEL_ae9996dcefcf4cfc8ea2f7488629c5ec", + "value": " 15011/0 [00:01<00:00, 14307.31 examples/s]" + } + }, + "b7d5f8aaee0047178661e58aa59433ee": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": "hidden", + "width": null + } + }, + "563ca6cac1594ba0a7c168ee31ccc982": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e10e7830d1404baea72e81e0df295239": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "acc07aa077fd49d38d74e0e10fa3991a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": "20px" + } + }, + "441dfdd43ef0442894c19cad81a478db": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "9efe7ac9ee0946388eeb85f9a4603ffe": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ae9996dcefcf4cfc8ea2f7488629c5ec": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "db463b34f98c488e8282d7a59c421347": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_2ba524e48f35421c9afc9cc81e9ce3fb", + "IPY_MODEL_1829c96471c64a03a4872e1b7a16551c", + "IPY_MODEL_f639a121fab0407595ebdf04015d9267" + ], + "layout": "IPY_MODEL_fa5ff44ae5f64ce4bbf9b59e300b7ba3" + } + }, + "2ba524e48f35421c9afc9cc81e9ce3fb": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e0d701b8c09744858db4fda6011b69df", + "placeholder": "​", + "style": "IPY_MODEL_7ef61952e0f2463bad3dd1a814bfb95a", + "value": "100%" + } + }, + "1829c96471c64a03a4872e1b7a16551c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_75263b1d19ce4cb0b95cacb6138b6fcb", + "max": 1, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_c3f35ce818e34b05b247fae76200c02e", + "value": 1 + } + }, + "f639a121fab0407595ebdf04015d9267": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_be8dddb19dde441da1193d5f8738413d", + "placeholder": "​", + "style": "IPY_MODEL_4be8d304ddce49319488f20ab36e7f53", + "value": " 1/1 [00:00<00:00, 22.58it/s]" + } + }, + "fa5ff44ae5f64ce4bbf9b59e300b7ba3": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e0d701b8c09744858db4fda6011b69df": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "7ef61952e0f2463bad3dd1a814bfb95a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "75263b1d19ce4cb0b95cacb6138b6fcb": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c3f35ce818e34b05b247fae76200c02e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "be8dddb19dde441da1193d5f8738413d": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4be8d304ddce49319488f20ab36e7f53": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "984896bacc39446091e2b6573facfa3e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_f15179aad6ad44dfa2dfbd703dc7c319", + "IPY_MODEL_f92c68902b984e648337da7e63fcc775", + "IPY_MODEL_5ce279d7dca74e55acae1c70efae6cd5" + ], + "layout": "IPY_MODEL_b187d889a6654fd7ad948d66a8bce197" + } + }, + "f15179aad6ad44dfa2dfbd703dc7c319": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e131a64ebaa84a809754b3c762525c37", + "placeholder": "​", + "style": "IPY_MODEL_72819a3c606c41f19d1493539d64aff5", + "value": "Downloading (…)lve/main/config.json: 100%" + } + }, + "f92c68902b984e648337da7e63fcc775": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_6761867d6e2b415b932bebd33a3144f3", + "max": 506, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_72c9aa3bb9614dfc8f316bc45a3455a0", + "value": 506 + } + }, + "5ce279d7dca74e55acae1c70efae6cd5": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_58e5e895215d4910b55d8cbe1417a111", + "placeholder": "​", + "style": "IPY_MODEL_b6bad7f690c54cb5814d09081e306393", + "value": " 506/506 [00:00<00:00, 31.8kB/s]" + } + }, + "b187d889a6654fd7ad948d66a8bce197": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e131a64ebaa84a809754b3c762525c37": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "72819a3c606c41f19d1493539d64aff5": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "6761867d6e2b415b932bebd33a3144f3": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "72c9aa3bb9614dfc8f316bc45a3455a0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "58e5e895215d4910b55d8cbe1417a111": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b6bad7f690c54cb5814d09081e306393": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ae3921fcb8e44862a42031f284f608c3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_aaac19cd3f524821a656ceffa57fb337", + "IPY_MODEL_a77af2c0771b4a20b42dade07c40021c", + "IPY_MODEL_7e5ced10225544399add5e68dda80595" + ], + "layout": "IPY_MODEL_ccf73891ca1941ca8b77c87ba84058f8" + } + }, + "aaac19cd3f524821a656ceffa57fb337": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_bfc1e1979f0741a09e6c9f4181ae88e7", + "placeholder": "​", + "style": "IPY_MODEL_8ce25329fdf04a6e8c4ad8097975111e", + "value": "Downloading pytorch_model.bin: 100%" + } + }, + "a77af2c0771b4a20b42dade07c40021c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_479d18fc0f9448a49b07b93ac608ae1d", + "max": 6853038093, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_85ffc3306cb4474aa5bfac22f6356352", + "value": 6853038093 + } + }, + "7e5ced10225544399add5e68dda80595": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0d5765bde1fa470eb087df5fe1e45e56", + "placeholder": "​", + "style": "IPY_MODEL_6873f0a521cd4e4390563e27046c1aa6", + "value": " 6.85G/6.85G [00:35<00:00, 230MB/s]" + } + }, + "ccf73891ca1941ca8b77c87ba84058f8": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "bfc1e1979f0741a09e6c9f4181ae88e7": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8ce25329fdf04a6e8c4ad8097975111e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "479d18fc0f9448a49b07b93ac608ae1d": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "85ffc3306cb4474aa5bfac22f6356352": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "0d5765bde1fa470eb087df5fe1e45e56": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6873f0a521cd4e4390563e27046c1aa6": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "0832a0aa65ac4f70ba2f303f9b867d25": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_436e9deafae04955a13f2ff0fc0e6348", + "IPY_MODEL_83427a912e3740008c0a7ffb9d20a1bd", + "IPY_MODEL_084f196c105c4c428253e0ce6dce8a21" + ], + "layout": "IPY_MODEL_403990397a4041abb8aff7bdcc5d8e8a" + } + }, + "436e9deafae04955a13f2ff0fc0e6348": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f957482962954f03b383d7c4f93c7b04", + "placeholder": "​", + "style": "IPY_MODEL_19a731496867404b8888986f3d9c83ef", + "value": "Downloading (…)neration_config.json: 100%" + } + }, + "83427a912e3740008c0a7ffb9d20a1bd": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e0b09d3aa9ae4719bcdfad8e75ad86c5", + "max": 137, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_9d41c197843644858919a396dc927936", + "value": 137 + } + }, + "084f196c105c4c428253e0ce6dce8a21": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_67f9f877dee04d538ffbe2de6c4dd181", + "placeholder": "​", + "style": "IPY_MODEL_001bb332cdb94a0c96dd2a322ff8718f", + "value": " 137/137 [00:00<00:00, 9.29kB/s]" + } + }, + "403990397a4041abb8aff7bdcc5d8e8a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f957482962954f03b383d7c4f93c7b04": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "19a731496867404b8888986f3d9c83ef": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "e0b09d3aa9ae4719bcdfad8e75ad86c5": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9d41c197843644858919a396dc927936": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "67f9f877dee04d538ffbe2de6c4dd181": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "001bb332cdb94a0c96dd2a322ff8718f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "310970764a4a4c9382e2334a4abbbd77": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_2cd59f5acc4948988814eb9b3d909aed", + "IPY_MODEL_8939309032ca4787a9d3c7b54b889926", + "IPY_MODEL_fcd31f8d1e5a482d9267ab63fd723e86" + ], + "layout": "IPY_MODEL_2d126272a4284ba9af39de65e9680fc8" + } + }, + "2cd59f5acc4948988814eb9b3d909aed": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d8a0e85fa9fc4a9291f9303861fc9efc", + "placeholder": "​", + "style": "IPY_MODEL_d21f1cfb275b4bb38baf7022665e0208", + "value": "Map: 100%" + } + }, + "8939309032ca4787a9d3c7b54b889926": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_81e25933b9af43a492a6da4a5e3560f6", + "max": 15011, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_5fe27494333b4e07a632769696744210", + "value": 15011 + } + }, + "fcd31f8d1e5a482d9267ab63fd723e86": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_35ca13104a3e4263a192889362dbc05e", + "placeholder": "​", + "style": "IPY_MODEL_85ae96c237754c4db014e5c09898e162", + "value": " 14994/15011 [00:44<00:00, 638.52 examples/s]" + } + }, + "2d126272a4284ba9af39de65e9680fc8": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": "hidden", + "width": null + } + }, + "d8a0e85fa9fc4a9291f9303861fc9efc": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d21f1cfb275b4bb38baf7022665e0208": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "81e25933b9af43a492a6da4a5e3560f6": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5fe27494333b4e07a632769696744210": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "35ca13104a3e4263a192889362dbc05e": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "85ae96c237754c4db014e5c09898e162": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + } + } + } + }, + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "YSW4FUQPwIYu", + "outputId": "421da5b9-be5e-4661-bf98-6c57fbf1f6fd" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Cloning into 'alpaca-lora'...\n", + "remote: Enumerating objects: 607, done.\u001b[K\n", + "remote: Counting objects: 100% (51/51), done.\u001b[K\n", + "remote: Compressing objects: 100% (32/32), done.\u001b[K\n", + "remote: Total 607 (delta 28), reused 33 (delta 19), pack-reused 556\u001b[K\n", + "Receiving objects: 100% (607/607), 27.78 MiB | 5.67 MiB/s, done.\n", + "Resolving deltas: 100% (360/360), done.\n" + ] + } + ], + "source": [ + "!git clone https://github.com/tloen/alpaca-lora.git" + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Alpaca OpenLLaMa 3B LoRA" + ], + "metadata": { + "id": "Gzg8SopX8EWH" + } + }, + { + "cell_type": "code", + "source": [ + "%cd alpaca-lora/" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "s1xm2uERx_st", + "outputId": "c77b42e1-202b-45ac-aa2c-3fdf2bcde155" + }, + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "/content/alpaca-lora\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!pip install -q datasets loralib sentencepiece\n", + "\n", + "!pip install -q git+https://github.com/huggingface/transformers.git\n", + "!pip install -q git+https://github.com/huggingface/peft.git" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "JCB9UzMVwsSM", + "outputId": "7f0688e4-f360-4da6-a4d3-0d59d4135649" + }, + "execution_count": 5, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m39.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Building wheel for transformers (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!pip install bitsandbytes" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "qCnXTszZxE2T", + "outputId": "e619573c-3ac2-4ac3-bed5-caab57d00f9a" + }, + "execution_count": 6, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", + "Collecting bitsandbytes\n", + " Downloading bitsandbytes-0.39.0-py3-none-any.whl (92.2 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m92.2/92.2 MB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hInstalling collected packages: bitsandbytes\n", + "Successfully installed bitsandbytes-0.39.0\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "from google.colab import drive\n", + "drive.mount('/content/drive')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "NyxvpDjcrUvv", + "outputId": "abe581a2-70ce-4956-b3dd-593fa4f3d8ef" + }, + "execution_count": 8, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Mounted at /content/drive\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Data Check" + ], + "metadata": { + "id": "9w0aSCzhxxQf" + } + }, + { + "cell_type": "code", + "source": [ + "from datasets import load_dataset\n", + "from transformers import LlamaTokenizer\n", + "\n", + "\n", + "tokenizer = LlamaTokenizer.from_pretrained(\"openlm-research/open_llama_3b_600bt_preview\", add_eos_token=True)\n", + "tokenizer.pad_token = tokenizer.eos_token\n", + "tokenizer.pad_token_id = tokenizer.eos_token_id\n", + "\n", + "data = load_dataset(\"json\", data_files=\"/content/drive/MyDrive/alpaca-data.json\")\n", + "\n", + "\n", + "def generate_prompt(data_point):\n", + " # sorry about the formatting disaster gotta move fast\n", + " if data_point[\"input\"]:\n", + " return f\"\"\"Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n", + "\n", + "### Instruction:\n", + "{data_point[\"instruction\"]}\n", + "\n", + "### Input:\n", + "{data_point[\"input\"]}\n", + "\n", + "### Response:\n", + "{data_point[\"output\"]}\"\"\"\n", + " else:\n", + " return f\"\"\"Below is an instruction that describes a task. Write a response that appropriately completes the request.\n", + "\n", + "### Instruction:\n", + "{data_point[\"instruction\"]}\n", + "\n", + "### Response:\n", + "{data_point[\"output\"]}\"\"\"" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 150, + "referenced_widgets": [ + "76eba77b0fc9499e9b4015393156153e", + "c42123767d4849ffbcdd48f09580ea13", + "e89f07c1f5744328b071f96a3bdc9532", + "da36c71ba5bc469eb32bde730c5038d3", + "6cc6b7c564fe48f086152c78f8a9a915", + "c6a3adfa76dd4bbd8cc5e75ee28ef3e8", + "0c65f0e913b74de88ab712d65b9027f7", + "b07c7f3b6bc24f10821b88e254e88e43", + "42ea2834ee074029abe8cf8f9ff79a16", + "ec4205b0491a4f53ad1cba42689d59c4", + "f8b9df09c8c54d53a22310e64e30772f", + "e69b8912d3384180842ff0ead6096803", + "550a2ef0612e4d8396462d1b1c43608d", + "f8d82b4d016f46ca8bab1a2fe591861e", + "975d908a1874425198e0d62ae884a3ef", + "152aa8231b3f4b7db4e6f27bb61065b9", + "27b48982ae244c66bccd855e7e33a00d", + "a896cb94d1fc4641ad224c7a9ca619b1", + "be5d02d7001a4a25a848d6b0c582b42b", + "609437910022418e83543fe6c3d16501", + "c3b5ef42dae64f95a2ef055db7e50fa8", + "ee0f0aff0c254b82854d1a6f4b367161", + "fd90085d416440579f409cdd9b29e053", + "fdd55d56775a4641925fc88e43f946b8", + "a54ebe30a12b4c21b67b2f8d43aea2d3", + "6f041f053ff240cbbb52f6d117ebfcf1", + "b7d5f8aaee0047178661e58aa59433ee", + "563ca6cac1594ba0a7c168ee31ccc982", + "e10e7830d1404baea72e81e0df295239", + "acc07aa077fd49d38d74e0e10fa3991a", + "441dfdd43ef0442894c19cad81a478db", + "9efe7ac9ee0946388eeb85f9a4603ffe", + "ae9996dcefcf4cfc8ea2f7488629c5ec", + "db463b34f98c488e8282d7a59c421347", + "2ba524e48f35421c9afc9cc81e9ce3fb", + "1829c96471c64a03a4872e1b7a16551c", + "f639a121fab0407595ebdf04015d9267", + "fa5ff44ae5f64ce4bbf9b59e300b7ba3", + "e0d701b8c09744858db4fda6011b69df", + "7ef61952e0f2463bad3dd1a814bfb95a", + "75263b1d19ce4cb0b95cacb6138b6fcb", + "c3f35ce818e34b05b247fae76200c02e", + "be8dddb19dde441da1193d5f8738413d", + "4be8d304ddce49319488f20ab36e7f53" + ] + }, + "id": "OdgRTo5YxyRL", + "outputId": "e6e64014-796b-4fa9-9635-3cb29f2dab7d" + }, + "execution_count": 9, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Downloading and preparing dataset json/default to /root/.cache/huggingface/datasets/json/default-e726d3f1eee28f16/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4...\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Downloading data files: 0%| | 0/1 [00:00" + ], + "text/html": [ + "\n", + "
\n", + " \n", + " \n", + " [ 3/119 01:48 < 3:29:02, 0.01 it/s, Epoch 0.02/1]\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StepTraining Loss
12.421700

" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "\u001b[31m╭─\u001b[0m\u001b[31m──────────────────────────────\u001b[0m\u001b[31m \u001b[0m\u001b[1;31mTraceback \u001b[0m\u001b[1;2;31m(most recent call last)\u001b[0m\u001b[31m \u001b[0m\u001b[31m───────────────────────────────\u001b[0m\u001b[31m─╮\u001b[0m\n", + "\u001b[31m│\u001b[0m in \u001b[92m\u001b[0m:\u001b[94m27\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/\u001b[0m\u001b[1;33mtrainer.py\u001b[0m:\u001b[94m1696\u001b[0m in \u001b[92mtrain\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1693 \u001b[0m\u001b[2m│ │ \u001b[0minner_training_loop = find_executable_batch_size( \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1694 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[96mself\u001b[0m._inner_training_loop, \u001b[96mself\u001b[0m._train_batch_size, args.auto_find_batch_size \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1695 \u001b[0m\u001b[2m│ │ \u001b[0m) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1696 \u001b[2m│ │ \u001b[0m\u001b[94mreturn\u001b[0m inner_training_loop( \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1697 \u001b[0m\u001b[2m│ │ │ \u001b[0margs=args, \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1698 \u001b[0m\u001b[2m│ │ │ \u001b[0mresume_from_checkpoint=resume_from_checkpoint, \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1699 \u001b[0m\u001b[2m│ │ │ \u001b[0mtrial=trial, \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/\u001b[0m\u001b[1;33mtrainer.py\u001b[0m:\u001b[94m1973\u001b[0m in \u001b[92m_inner_training_loop\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1970 \u001b[0m\u001b[2m│ │ │ │ │ \u001b[0m\u001b[94mwith\u001b[0m model.no_sync(): \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1971 \u001b[0m\u001b[2m│ │ │ │ │ │ \u001b[0mtr_loss_step = \u001b[96mself\u001b[0m.training_step(model, inputs) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1972 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[94melse\u001b[0m: \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1973 \u001b[2m│ │ │ │ │ \u001b[0mtr_loss_step = \u001b[96mself\u001b[0m.training_step(model, inputs) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1974 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1975 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[94mif\u001b[0m ( \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1976 \u001b[0m\u001b[2m│ │ │ │ │ \u001b[0margs.logging_nan_inf_filter \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/\u001b[0m\u001b[1;33mtrainer.py\u001b[0m:\u001b[94m2787\u001b[0m in \u001b[92mtraining_step\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2784 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mreturn\u001b[0m loss_mb.reduce_mean().detach().to(\u001b[96mself\u001b[0m.args.device) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2785 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2786 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mwith\u001b[0m \u001b[96mself\u001b[0m.compute_loss_context_manager(): \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m2787 \u001b[2m│ │ │ \u001b[0mloss = \u001b[96mself\u001b[0m.compute_loss(model, inputs) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2788 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2789 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[96mself\u001b[0m.args.n_gpu > \u001b[94m1\u001b[0m: \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2790 \u001b[0m\u001b[2m│ │ │ \u001b[0mloss = loss.mean() \u001b[2m# mean() to average on multi-gpu parallel training\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/\u001b[0m\u001b[1;33mtrainer.py\u001b[0m:\u001b[94m2819\u001b[0m in \u001b[92mcompute_loss\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2816 \u001b[0m\u001b[2m│ │ │ \u001b[0mlabels = inputs.pop(\u001b[33m\"\u001b[0m\u001b[33mlabels\u001b[0m\u001b[33m\"\u001b[0m) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2817 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94melse\u001b[0m: \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2818 \u001b[0m\u001b[2m│ │ │ \u001b[0mlabels = \u001b[94mNone\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m2819 \u001b[2m│ │ \u001b[0moutputs = model(**inputs) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2820 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[2m# Save past state if it exists\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2821 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[2m# TODO: this needs to be fixed and made cleaner later.\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2822 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[96mself\u001b[0m.args.past_index >= \u001b[94m0\u001b[0m: \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/\u001b[0m\u001b[1;33mmodule.py\u001b[0m:\u001b[94m1501\u001b[0m in \u001b[92m_call_impl\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1498 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m (\u001b[96mself\u001b[0m._backward_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._backward_pre_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._forward_hooks \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1499 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[95mor\u001b[0m _global_backward_pre_hooks \u001b[95mor\u001b[0m _global_backward_hooks \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1500 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[95mor\u001b[0m _global_forward_hooks \u001b[95mor\u001b[0m _global_forward_pre_hooks): \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1501 \u001b[2m│ │ │ \u001b[0m\u001b[94mreturn\u001b[0m forward_call(*args, **kwargs) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1502 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[2m# Do not call functions when jit is used\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1503 \u001b[0m\u001b[2m│ │ \u001b[0mfull_backward_hooks, non_full_backward_hooks = [], [] \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1504 \u001b[0m\u001b[2m│ │ \u001b[0mbackward_pre_hooks = [] \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/peft/\u001b[0m\u001b[1;33mpeft_model.py\u001b[0m:\u001b[94m686\u001b[0m in \u001b[92mforward\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m 683 \u001b[0m\u001b[2m│ \u001b[0m): \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m 684 \u001b[0m\u001b[2m│ │ \u001b[0mpeft_config = \u001b[96mself\u001b[0m.active_peft_config \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m 685 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m \u001b[96misinstance\u001b[0m(peft_config, PromptLearningConfig): \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m 686 \u001b[2m│ │ │ \u001b[0m\u001b[94mreturn\u001b[0m \u001b[96mself\u001b[0m.base_model( \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m 687 \u001b[0m\u001b[2m│ │ │ │ \u001b[0minput_ids=input_ids, \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m 688 \u001b[0m\u001b[2m│ │ │ │ \u001b[0mattention_mask=attention_mask, \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m 689 \u001b[0m\u001b[2m│ │ │ │ \u001b[0minputs_embeds=inputs_embeds, \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/\u001b[0m\u001b[1;33mmodule.py\u001b[0m:\u001b[94m1501\u001b[0m in \u001b[92m_call_impl\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1498 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m (\u001b[96mself\u001b[0m._backward_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._backward_pre_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._forward_hooks \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1499 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[95mor\u001b[0m _global_backward_pre_hooks \u001b[95mor\u001b[0m _global_backward_hooks \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1500 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[95mor\u001b[0m _global_forward_hooks \u001b[95mor\u001b[0m _global_forward_pre_hooks): \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1501 \u001b[2m│ │ │ \u001b[0m\u001b[94mreturn\u001b[0m forward_call(*args, **kwargs) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1502 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[2m# Do not call functions when jit is used\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1503 \u001b[0m\u001b[2m│ │ \u001b[0mfull_backward_hooks, non_full_backward_hooks = [], [] \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1504 \u001b[0m\u001b[2m│ │ \u001b[0mbackward_pre_hooks = [] \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/accelerate/\u001b[0m\u001b[1;33mhooks.py\u001b[0m:\u001b[94m165\u001b[0m in \u001b[92mnew_forward\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m162 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mwith\u001b[0m torch.no_grad(): \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m163 \u001b[0m\u001b[2m│ │ │ │ \u001b[0moutput = old_forward(*args, **kwargs) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m164 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94melse\u001b[0m: \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m165 \u001b[2m│ │ │ \u001b[0moutput = old_forward(*args, **kwargs) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m166 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mreturn\u001b[0m module._hf_hook.post_forward(module, output) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m167 \u001b[0m\u001b[2m│ \u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m168 \u001b[0m\u001b[2m│ \u001b[0mmodule.forward = new_forward \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/models/llama/\u001b[0m\u001b[1;33mmodeling_llama.py\u001b[0m:\u001b[94m687\u001b[0m in \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[92mforward\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m684 \u001b[0m\u001b[2m│ │ \u001b[0mreturn_dict = return_dict \u001b[94mif\u001b[0m return_dict \u001b[95mis\u001b[0m \u001b[95mnot\u001b[0m \u001b[94mNone\u001b[0m \u001b[94melse\u001b[0m \u001b[96mself\u001b[0m.config.use_return \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m685 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m686 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[2m# decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m687 \u001b[2m│ │ \u001b[0moutputs = \u001b[96mself\u001b[0m.model( \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m688 \u001b[0m\u001b[2m│ │ │ \u001b[0minput_ids=input_ids, \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m689 \u001b[0m\u001b[2m│ │ │ \u001b[0mattention_mask=attention_mask, \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m690 \u001b[0m\u001b[2m│ │ │ \u001b[0mposition_ids=position_ids, \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/\u001b[0m\u001b[1;33mmodule.py\u001b[0m:\u001b[94m1501\u001b[0m in \u001b[92m_call_impl\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1498 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m (\u001b[96mself\u001b[0m._backward_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._backward_pre_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._forward_hooks \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1499 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[95mor\u001b[0m _global_backward_pre_hooks \u001b[95mor\u001b[0m _global_backward_hooks \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1500 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[95mor\u001b[0m _global_forward_hooks \u001b[95mor\u001b[0m _global_forward_pre_hooks): \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1501 \u001b[2m│ │ │ \u001b[0m\u001b[94mreturn\u001b[0m forward_call(*args, **kwargs) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1502 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[2m# Do not call functions when jit is used\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1503 \u001b[0m\u001b[2m│ │ \u001b[0mfull_backward_hooks, non_full_backward_hooks = [], [] \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1504 \u001b[0m\u001b[2m│ │ \u001b[0mbackward_pre_hooks = [] \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/accelerate/\u001b[0m\u001b[1;33mhooks.py\u001b[0m:\u001b[94m165\u001b[0m in \u001b[92mnew_forward\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m162 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mwith\u001b[0m torch.no_grad(): \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m163 \u001b[0m\u001b[2m│ │ │ │ \u001b[0moutput = old_forward(*args, **kwargs) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m164 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94melse\u001b[0m: \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m165 \u001b[2m│ │ │ \u001b[0moutput = old_forward(*args, **kwargs) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m166 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mreturn\u001b[0m module._hf_hook.post_forward(module, output) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m167 \u001b[0m\u001b[2m│ \u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m168 \u001b[0m\u001b[2m│ \u001b[0mmodule.forward = new_forward \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/models/llama/\u001b[0m\u001b[1;33mmodeling_llama.py\u001b[0m:\u001b[94m569\u001b[0m in \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[92mforward\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m566 \u001b[0m\u001b[2m│ │ │ │ │ \u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m567 \u001b[0m\u001b[2m│ │ │ │ │ \u001b[0m\u001b[94mreturn\u001b[0m custom_forward \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m568 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m569 \u001b[2m│ │ │ │ \u001b[0mlayer_outputs = torch.utils.checkpoint.checkpoint( \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m570 \u001b[0m\u001b[2m│ │ │ │ │ \u001b[0mcreate_custom_forward(decoder_layer), \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m571 \u001b[0m\u001b[2m│ │ │ │ │ \u001b[0mhidden_states, \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m572 \u001b[0m\u001b[2m│ │ │ │ │ \u001b[0mattention_mask, \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/utils/\u001b[0m\u001b[1;33mcheckpoint.py\u001b[0m:\u001b[94m249\u001b[0m in \u001b[92mcheckpoint\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m246 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mraise\u001b[0m \u001b[96mValueError\u001b[0m(\u001b[33m\"\u001b[0m\u001b[33mUnexpected keyword arguments: \u001b[0m\u001b[33m\"\u001b[0m + \u001b[33m\"\u001b[0m\u001b[33m,\u001b[0m\u001b[33m\"\u001b[0m.join(arg \u001b[94mfor\u001b[0m arg \u001b[95min\u001b[0m kwar \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m247 \u001b[0m\u001b[2m│ \u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m248 \u001b[0m\u001b[2m│ \u001b[0m\u001b[94mif\u001b[0m use_reentrant: \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m249 \u001b[2m│ │ \u001b[0m\u001b[94mreturn\u001b[0m CheckpointFunction.apply(function, preserve, *args) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m250 \u001b[0m\u001b[2m│ \u001b[0m\u001b[94melse\u001b[0m: \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m251 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mreturn\u001b[0m _checkpoint_without_reentrant( \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m252 \u001b[0m\u001b[2m│ │ │ \u001b[0mfunction, \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/autograd/\u001b[0m\u001b[1;33mfunction.py\u001b[0m:\u001b[94m506\u001b[0m in \u001b[92mapply\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m503 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m torch._C._are_functorch_transforms_active(): \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m504 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[2m# See NOTE: [functorch vjp and autograd interaction]\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m505 \u001b[0m\u001b[2m│ │ │ \u001b[0margs = _functorch.utils.unwrap_dead_wrappers(args) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m506 \u001b[2m│ │ │ \u001b[0m\u001b[94mreturn\u001b[0m \u001b[96msuper\u001b[0m().apply(*args, **kwargs) \u001b[2m# type: ignore[misc]\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m507 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m508 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[96mcls\u001b[0m.setup_context == _SingleLevelFunction.setup_context: \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m509 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mraise\u001b[0m \u001b[96mRuntimeError\u001b[0m( \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/utils/\u001b[0m\u001b[1;33mcheckpoint.py\u001b[0m:\u001b[94m107\u001b[0m in \u001b[92mforward\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m104 \u001b[0m\u001b[2m│ │ \u001b[0mctx.save_for_backward(*tensor_inputs) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m105 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m106 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mwith\u001b[0m torch.no_grad(): \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m107 \u001b[2m│ │ │ \u001b[0moutputs = run_function(*args) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m108 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mreturn\u001b[0m outputs \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m109 \u001b[0m\u001b[2m│ \u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m110 \u001b[0m\u001b[2m│ \u001b[0m\u001b[1;95m@staticmethod\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/models/llama/\u001b[0m\u001b[1;33mmodeling_llama.py\u001b[0m:\u001b[94m565\u001b[0m in \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[92mcustom_forward\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m562 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[94mdef\u001b[0m \u001b[92mcreate_custom_forward\u001b[0m(module): \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m563 \u001b[0m\u001b[2m│ │ │ │ │ \u001b[0m\u001b[94mdef\u001b[0m \u001b[92mcustom_forward\u001b[0m(*inputs): \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m564 \u001b[0m\u001b[2m│ │ │ │ │ │ \u001b[0m\u001b[2m# None for past_key_value\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m565 \u001b[2m│ │ │ │ │ │ \u001b[0m\u001b[94mreturn\u001b[0m module(*inputs, output_attentions, \u001b[94mNone\u001b[0m) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m566 \u001b[0m\u001b[2m│ │ │ │ │ \u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m567 \u001b[0m\u001b[2m│ │ │ │ │ \u001b[0m\u001b[94mreturn\u001b[0m custom_forward \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m568 \u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/\u001b[0m\u001b[1;33mmodule.py\u001b[0m:\u001b[94m1501\u001b[0m in \u001b[92m_call_impl\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1498 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m (\u001b[96mself\u001b[0m._backward_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._backward_pre_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._forward_hooks \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1499 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[95mor\u001b[0m _global_backward_pre_hooks \u001b[95mor\u001b[0m _global_backward_hooks \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1500 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[95mor\u001b[0m _global_forward_hooks \u001b[95mor\u001b[0m _global_forward_pre_hooks): \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1501 \u001b[2m│ │ │ \u001b[0m\u001b[94mreturn\u001b[0m forward_call(*args, **kwargs) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1502 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[2m# Do not call functions when jit is used\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1503 \u001b[0m\u001b[2m│ │ \u001b[0mfull_backward_hooks, non_full_backward_hooks = [], [] \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1504 \u001b[0m\u001b[2m│ │ \u001b[0mbackward_pre_hooks = [] \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/accelerate/\u001b[0m\u001b[1;33mhooks.py\u001b[0m:\u001b[94m165\u001b[0m in \u001b[92mnew_forward\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m162 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mwith\u001b[0m torch.no_grad(): \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m163 \u001b[0m\u001b[2m│ │ │ │ \u001b[0moutput = old_forward(*args, **kwargs) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m164 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94melse\u001b[0m: \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m165 \u001b[2m│ │ │ \u001b[0moutput = old_forward(*args, **kwargs) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m166 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mreturn\u001b[0m module._hf_hook.post_forward(module, output) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m167 \u001b[0m\u001b[2m│ \u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m168 \u001b[0m\u001b[2m│ \u001b[0mmodule.forward = new_forward \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/models/llama/\u001b[0m\u001b[1;33mmodeling_llama.py\u001b[0m:\u001b[94m292\u001b[0m in \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[92mforward\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m289 \u001b[0m\u001b[2m│ │ \u001b[0mhidden_states = \u001b[96mself\u001b[0m.input_layernorm(hidden_states) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m290 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m291 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[2m# Self Attention\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m292 \u001b[2m│ │ \u001b[0mhidden_states, self_attn_weights, present_key_value = \u001b[96mself\u001b[0m.self_attn( \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m293 \u001b[0m\u001b[2m│ │ │ \u001b[0mhidden_states=hidden_states, \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m294 \u001b[0m\u001b[2m│ │ │ \u001b[0mattention_mask=attention_mask, \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m295 \u001b[0m\u001b[2m│ │ │ \u001b[0mposition_ids=position_ids, \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/\u001b[0m\u001b[1;33mmodule.py\u001b[0m:\u001b[94m1501\u001b[0m in \u001b[92m_call_impl\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1498 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m (\u001b[96mself\u001b[0m._backward_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._backward_pre_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._forward_hooks \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1499 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[95mor\u001b[0m _global_backward_pre_hooks \u001b[95mor\u001b[0m _global_backward_hooks \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1500 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[95mor\u001b[0m _global_forward_hooks \u001b[95mor\u001b[0m _global_forward_pre_hooks): \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1501 \u001b[2m│ │ │ \u001b[0m\u001b[94mreturn\u001b[0m forward_call(*args, **kwargs) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1502 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[2m# Do not call functions when jit is used\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1503 \u001b[0m\u001b[2m│ │ \u001b[0mfull_backward_hooks, non_full_backward_hooks = [], [] \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1504 \u001b[0m\u001b[2m│ │ \u001b[0mbackward_pre_hooks = [] \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/accelerate/\u001b[0m\u001b[1;33mhooks.py\u001b[0m:\u001b[94m165\u001b[0m in \u001b[92mnew_forward\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m162 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mwith\u001b[0m torch.no_grad(): \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m163 \u001b[0m\u001b[2m│ │ │ │ \u001b[0moutput = old_forward(*args, **kwargs) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m164 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94melse\u001b[0m: \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m165 \u001b[2m│ │ │ \u001b[0moutput = old_forward(*args, **kwargs) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m166 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mreturn\u001b[0m module._hf_hook.post_forward(module, output) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m167 \u001b[0m\u001b[2m│ \u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m168 \u001b[0m\u001b[2m│ \u001b[0mmodule.forward = new_forward \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/models/llama/\u001b[0m\u001b[1;33mmodeling_llama.py\u001b[0m:\u001b[94m194\u001b[0m in \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[92mforward\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m191 \u001b[0m\u001b[2m│ \u001b[0m) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m192 \u001b[0m\u001b[2m│ │ \u001b[0mbsz, q_len, _ = hidden_states.size() \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m193 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m194 \u001b[2m│ │ \u001b[0mquery_states = \u001b[96mself\u001b[0m.q_proj(hidden_states).view(bsz, q_len, \u001b[96mself\u001b[0m.num_heads, \u001b[96mself\u001b[0m. \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m195 \u001b[0m\u001b[2m│ │ \u001b[0mkey_states = \u001b[96mself\u001b[0m.k_proj(hidden_states).view(bsz, q_len, \u001b[96mself\u001b[0m.num_heads, \u001b[96mself\u001b[0m.he \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m196 \u001b[0m\u001b[2m│ │ \u001b[0mvalue_states = \u001b[96mself\u001b[0m.v_proj(hidden_states).view(bsz, q_len, \u001b[96mself\u001b[0m.num_heads, \u001b[96mself\u001b[0m. \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m197 \u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/\u001b[0m\u001b[1;33mmodule.py\u001b[0m:\u001b[94m1501\u001b[0m in \u001b[92m_call_impl\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1498 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m (\u001b[96mself\u001b[0m._backward_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._backward_pre_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._forward_hooks \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1499 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[95mor\u001b[0m _global_backward_pre_hooks \u001b[95mor\u001b[0m _global_backward_hooks \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1500 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[95mor\u001b[0m _global_forward_hooks \u001b[95mor\u001b[0m _global_forward_pre_hooks): \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1501 \u001b[2m│ │ │ \u001b[0m\u001b[94mreturn\u001b[0m forward_call(*args, **kwargs) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1502 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[2m# Do not call functions when jit is used\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1503 \u001b[0m\u001b[2m│ │ \u001b[0mfull_backward_hooks, non_full_backward_hooks = [], [] \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1504 \u001b[0m\u001b[2m│ │ \u001b[0mbackward_pre_hooks = [] \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/peft/tuners/\u001b[0m\u001b[1;33mlora.py\u001b[0m:\u001b[94m709\u001b[0m in \u001b[92mforward\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m706 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[96mself\u001b[0m.active_adapter = adapter_name \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m707 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m708 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mdef\u001b[0m \u001b[92mforward\u001b[0m(\u001b[96mself\u001b[0m, x: torch.Tensor): \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m709 \u001b[2m│ │ │ \u001b[0mresult = \u001b[96msuper\u001b[0m().forward(x) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m710 \u001b[0m\u001b[2m│ │ │ \u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m711 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[96mself\u001b[0m.disable_adapters \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m.active_adapter \u001b[95mnot\u001b[0m \u001b[95min\u001b[0m \u001b[96mself\u001b[0m.lora_A.keys(): \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m712 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[94mreturn\u001b[0m result \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/bitsandbytes/nn/\u001b[0m\u001b[1;33mmodules.py\u001b[0m:\u001b[94m388\u001b[0m in \u001b[92mforward\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m385 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[96mself\u001b[0m.bias \u001b[95mis\u001b[0m \u001b[95mnot\u001b[0m \u001b[94mNone\u001b[0m \u001b[95mand\u001b[0m \u001b[96mself\u001b[0m.bias.dtype != x.dtype: \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m386 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[96mself\u001b[0m.bias.data = \u001b[96mself\u001b[0m.bias.data.to(x.dtype) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m387 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m388 \u001b[2m│ │ \u001b[0mout = bnb.matmul(x, \u001b[96mself\u001b[0m.weight, bias=\u001b[96mself\u001b[0m.bias, state=\u001b[96mself\u001b[0m.state) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m389 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m390 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m \u001b[96mself\u001b[0m.state.has_fp16_weights: \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m391 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[96mself\u001b[0m.state.CB \u001b[95mis\u001b[0m \u001b[95mnot\u001b[0m \u001b[94mNone\u001b[0m \u001b[95mand\u001b[0m \u001b[96mself\u001b[0m.state.CxB \u001b[95mis\u001b[0m \u001b[95mnot\u001b[0m \u001b[94mNone\u001b[0m: \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/bitsandbytes/autograd/\u001b[0m\u001b[1;33m_functions.py\u001b[0m:\u001b[94m559\u001b[0m in \u001b[92mmatmul\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m556 \u001b[0m\u001b[2m│ \u001b[0mstate = state \u001b[95mor\u001b[0m MatmulLtState() \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m557 \u001b[0m\u001b[2m│ \u001b[0m\u001b[94mif\u001b[0m threshold > \u001b[94m0.0\u001b[0m: \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m558 \u001b[0m\u001b[2m│ │ \u001b[0mstate.threshold = threshold \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m559 \u001b[2m│ \u001b[0m\u001b[94mreturn\u001b[0m MatMul8bitLt.apply(A, B, out, bias, state) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m560 \u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m561 \u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m562 \u001b[0m\u001b[94mdef\u001b[0m \u001b[92mmatmul_4bit\u001b[0m(A: tensor, B: tensor, quant_state: List, out: tensor = \u001b[94mNone\u001b[0m, bias=\u001b[94mNone\u001b[0m): \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/autograd/\u001b[0m\u001b[1;33mfunction.py\u001b[0m:\u001b[94m506\u001b[0m in \u001b[92mapply\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m503 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m torch._C._are_functorch_transforms_active(): \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m504 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[2m# See NOTE: [functorch vjp and autograd interaction]\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m505 \u001b[0m\u001b[2m│ │ │ \u001b[0margs = _functorch.utils.unwrap_dead_wrappers(args) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m506 \u001b[2m│ │ │ \u001b[0m\u001b[94mreturn\u001b[0m \u001b[96msuper\u001b[0m().apply(*args, **kwargs) \u001b[2m# type: ignore[misc]\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m507 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m508 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[96mcls\u001b[0m.setup_context == _SingleLevelFunction.setup_context: \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m509 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mraise\u001b[0m \u001b[96mRuntimeError\u001b[0m( \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/bitsandbytes/autograd/\u001b[0m\u001b[1;33m_functions.py\u001b[0m:\u001b[94m323\u001b[0m in \u001b[92mforward\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m320 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[2m# 1. Quantize A\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m321 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[96mlen\u001b[0m(A.shape) == \u001b[94m3\u001b[0m: \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m322 \u001b[0m\u001b[2m│ │ │ \u001b[0mA = A.view(-\u001b[94m1\u001b[0m, A.shape[-\u001b[94m1\u001b[0m]).contiguous() \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m323 \u001b[2m│ │ \u001b[0mCA, CAt, SCA, SCAt, coo_tensorA = F.double_quant(A.to(torch.float16), threshold= \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m324 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m325 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m state.threshold > \u001b[94m0.0\u001b[0m \u001b[95mand\u001b[0m coo_tensorA \u001b[95mis\u001b[0m \u001b[95mnot\u001b[0m \u001b[94mNone\u001b[0m: \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m326 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mif\u001b[0m state.has_fp16_weights: \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/bitsandbytes/\u001b[0m\u001b[1;33mfunctional.py\u001b[0m:\u001b[94m2029\u001b[0m in \u001b[92mdouble_quant\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2026 \u001b[0m\u001b[2m│ │ │ │ \u001b[0mct.c_int32(rows), \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2027 \u001b[0m\u001b[2m│ │ │ │ \u001b[0mct.c_int32(cols), \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2028 \u001b[0m\u001b[2m│ │ │ \u001b[0m) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m2029 \u001b[2m│ │ │ \u001b[0mval, idx = torch.sort(coo_tensor.rowidx) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2030 \u001b[0m\u001b[2m│ │ │ \u001b[0mcoo_tensor.rowidx = val \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2031 \u001b[0m\u001b[2m│ │ │ \u001b[0mcoo_tensor.colidx = coo_tensor.colidx[idx] \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2032 \u001b[0m\u001b[2m│ │ │ \u001b[0mcoo_tensor.values = coo_tensor.values[idx] \u001b[31m│\u001b[0m\n", + "\u001b[31m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n", + "\u001b[1;91mKeyboardInterrupt\u001b[0m\n" + ], + "text/html": [ + "

╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮\n",
+              " in <cell line: 27>:27                                                                            \n",
+              "                                                                                                  \n",
+              " /usr/local/lib/python3.10/dist-packages/transformers/trainer.py:1696 in train                    \n",
+              "                                                                                                  \n",
+              "   1693 │   │   inner_training_loop = find_executable_batch_size(                                 \n",
+              "   1694 │   │   │   self._inner_training_loop, self._train_batch_size, args.auto_find_batch_size  \n",
+              "   1695 │   │   )                                                                                 \n",
+              " 1696 │   │   return inner_training_loop(                                                       \n",
+              "   1697 │   │   │   args=args,                                                                    \n",
+              "   1698 │   │   │   resume_from_checkpoint=resume_from_checkpoint,                                \n",
+              "   1699 │   │   │   trial=trial,                                                                  \n",
+              "                                                                                                  \n",
+              " /usr/local/lib/python3.10/dist-packages/transformers/trainer.py:1973 in _inner_training_loop     \n",
+              "                                                                                                  \n",
+              "   1970 │   │   │   │   │   with model.no_sync():                                                 \n",
+              "   1971 │   │   │   │   │   │   tr_loss_step = self.training_step(model, inputs)                  \n",
+              "   1972 │   │   │   │   else:                                                                     \n",
+              " 1973 │   │   │   │   │   tr_loss_step = self.training_step(model, inputs)                      \n",
+              "   1974 │   │   │   │                                                                             \n",
+              "   1975 │   │   │   │   if (                                                                      \n",
+              "   1976 │   │   │   │   │   args.logging_nan_inf_filter                                           \n",
+              "                                                                                                  \n",
+              " /usr/local/lib/python3.10/dist-packages/transformers/trainer.py:2787 in training_step            \n",
+              "                                                                                                  \n",
+              "   2784 │   │   │   return loss_mb.reduce_mean().detach().to(self.args.device)                    \n",
+              "   2785 │   │                                                                                     \n",
+              "   2786 │   │   with self.compute_loss_context_manager():                                         \n",
+              " �� 2787 │   │   │   loss = self.compute_loss(model, inputs)                                       \n",
+              "   2788 │   │                                                                                     \n",
+              "   2789 │   │   if self.args.n_gpu > 1:                                                           \n",
+              "   2790 │   │   │   loss = loss.mean()  # mean() to average on multi-gpu parallel training        \n",
+              "                                                                                                  \n",
+              " /usr/local/lib/python3.10/dist-packages/transformers/trainer.py:2819 in compute_loss             \n",
+              "                                                                                                  \n",
+              "   2816 │   │   │   labels = inputs.pop(\"labels\")                                                 \n",
+              "   2817 │   │   else:                                                                             \n",
+              "   2818 │   │   │   labels = None                                                                 \n",
+              " 2819 │   │   outputs = model(**inputs)                                                         \n",
+              "   2820 │   │   # Save past state if it exists                                                    \n",
+              "   2821 │   │   # TODO: this needs to be fixed and made cleaner later.                            \n",
+              "   2822 │   │   if self.args.past_index >= 0:                                                     \n",
+              "                                                                                                  \n",
+              " /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1501 in _call_impl            \n",
+              "                                                                                                  \n",
+              "   1498 │   │   if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks   \n",
+              "   1499 │   │   │   │   or _global_backward_pre_hooks or _global_backward_hooks                   \n",
+              "   1500 │   │   │   │   or _global_forward_hooks or _global_forward_pre_hooks):                   \n",
+              " 1501 │   │   │   return forward_call(*args, **kwargs)                                          \n",
+              "   1502 │   │   # Do not call functions when jit is used                                          \n",
+              "   1503 │   │   full_backward_hooks, non_full_backward_hooks = [], []                             \n",
+              "   1504 │   │   backward_pre_hooks = []                                                           \n",
+              "                                                                                                  \n",
+              " /usr/local/lib/python3.10/dist-packages/peft/peft_model.py:686 in forward                        \n",
+              "                                                                                                  \n",
+              "    683 │   ):                                                                                    \n",
+              "    684 │   │   peft_config = self.active_peft_config                                             \n",
+              "    685 │   │   if not isinstance(peft_config, PromptLearningConfig):                             \n",
+              "  686 │   │   │   return self.base_model(                                                       \n",
+              "    687 │   │   │   │   input_ids=input_ids,                                                      \n",
+              "    688 │   │   │   │   attention_mask=attention_mask,                                            \n",
+              "    689 │   │   │   │   inputs_embeds=inputs_embeds,                                              \n",
+              "                                                                                                  \n",
+              " /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1501 in _call_impl            \n",
+              "                                                                                                  \n",
+              "   1498 │   │   if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks   \n",
+              "   1499 │   │   │   │   or _global_backward_pre_hooks or _global_backward_hooks                   \n",
+              "   1500 │   │   │   │   or _global_forward_hooks or _global_forward_pre_hooks):                   \n",
+              " 1501 │   │   │   return forward_call(*args, **kwargs)                                          \n",
+              "   1502 │   │   # Do not call functions when jit is used                                          \n",
+              "   1503 │   │   full_backward_hooks, non_full_backward_hooks = [], []                             \n",
+              "   1504 │   │   backward_pre_hooks = []                                                           \n",
+              "                                                                                                  \n",
+              " /usr/local/lib/python3.10/dist-packages/accelerate/hooks.py:165 in new_forward                   \n",
+              "                                                                                                  \n",
+              "   162 │   │   │   with torch.no_grad():                                                          \n",
+              "   163 │   │   │   │   output = old_forward(*args, **kwargs)                                      \n",
+              "   164 │   │   else:                                                                              \n",
+              " 165 │   │   │   output = old_forward(*args, **kwargs)                                          \n",
+              "   166 │   │   return module._hf_hook.post_forward(module, output)                                \n",
+              "   167 │                                                                                          \n",
+              "   168 │   module.forward = new_forward                                                           \n",
+              "                                                                                                  \n",
+              " /usr/local/lib/python3.10/dist-packages/transformers/models/llama/modeling_llama.py:687 in       \n",
+              " forward                                                                                          \n",
+              "                                                                                                  \n",
+              "   684 │   │   return_dict = return_dict if return_dict is not None else self.config.use_return   \n",
+              "   685 │   │                                                                                      \n",
+              "   686 │   │   # decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)    \n",
+              " 687 │   │   outputs = self.model(                                                              \n",
+              "   688 │   │   │   input_ids=input_ids,                                                           \n",
+              "   689 │   │   │   attention_mask=attention_mask,                                                 \n",
+              "   690 │   │   │   position_ids=position_ids,                                                     \n",
+              "                                                                                                  \n",
+              " /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1501 in _call_impl            \n",
+              "                                                                                                  \n",
+              "   1498 │   │   if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks   \n",
+              "   1499 │   │   │   │   or _global_backward_pre_hooks or _global_backward_hooks                   \n",
+              "   1500 │   │   │   │   or _global_forward_hooks or _global_forward_pre_hooks):                   \n",
+              " 1501 │   │   │   return forward_call(*args, **kwargs)                                          \n",
+              "   1502 │   │   # Do not call functions when jit is used                                          \n",
+              "   1503 │   │   full_backward_hooks, non_full_backward_hooks = [], []                             \n",
+              "   1504 │   │   backward_pre_hooks = []                                                           \n",
+              "                                                                                                  \n",
+              " /usr/local/lib/python3.10/dist-packages/accelerate/hooks.py:165 in new_forward                   \n",
+              "                                                                                                  \n",
+              "   162 │   │   │   with torch.no_grad():                                                          \n",
+              "   163 │   │   │   │   output = old_forward(*args, **kwargs)                                      \n",
+              "   164 │   │   else:                                                                              \n",
+              " 165 │   │   │   output = old_forward(*args, **kwargs)                                          \n",
+              "   166 │   │   return module._hf_hook.post_forward(module, output)                                \n",
+              "   167 │                                                                                          \n",
+              "   168 │   module.forward = new_forward                                                           \n",
+              "                                                                                                  \n",
+              " /usr/local/lib/python3.10/dist-packages/transformers/models/llama/modeling_llama.py:569 in       \n",
+              " forward                                                                                          \n",
+              "                                                                                                  \n",
+              "   566 │   │   │   │   │                                                                          \n",
+              "   567 │   │   │   │   │   return custom_forward                                                  \n",
+              "   568 │   │   │   │                                                                              \n",
+              " 569 │   │   │   │   layer_outputs = torch.utils.checkpoint.checkpoint(                         \n",
+              "   570 │   │   │   │   │   create_custom_forward(decoder_layer),                                  \n",
+              "   571 │   │   │   │   │   hidden_states,                                                         \n",
+              "   572 │   │   │   │   │   attention_mask,                                                        \n",
+              "                                                                                                  \n",
+              " /usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:249 in checkpoint              \n",
+              "                                                                                                  \n",
+              "   246 │   │   raise ValueError(\"Unexpected keyword arguments: \" + \",\".join(arg for arg in kwar   \n",
+              "   247 │                                                                                          \n",
+              "   248 │   if use_reentrant:                                                                      \n",
+              " 249 │   │   return CheckpointFunction.apply(function, preserve, *args)                         \n",
+              "   250 │   else:                                                                                  \n",
+              "   251 │   │   return _checkpoint_without_reentrant(                                              \n",
+              "   252 │   │   │   function,                                                                      \n",
+              "                                                                                                  \n",
+              " /usr/local/lib/python3.10/dist-packages/torch/autograd/function.py:506 in apply                  \n",
+              "                                                                                                  \n",
+              "   503 │   │   if not torch._C._are_functorch_transforms_active():                                \n",
+              "   504 │   │   │   # See NOTE: [functorch vjp and autograd interaction]                           \n",
+              "   505 │   │   │   args = _functorch.utils.unwrap_dead_wrappers(args)                             \n",
+              " 506 │   │   │   return super().apply(*args, **kwargs)  # type: ignore[misc]                    \n",
+              "   507 │   │                                                                                      \n",
+              "   508 │   │   if cls.setup_context == _SingleLevelFunction.setup_context:                        \n",
+              "   509 │   │   │   raise RuntimeError(                                                            \n",
+              "                                                                                                  \n",
+              " /usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:107 in forward                 \n",
+              "                                                                                                  \n",
+              "   104 │   │   ctx.save_for_backward(*tensor_inputs)                                              \n",
+              "   105 │   │                                                                                      \n",
+              "   106 │   │   with torch.no_grad():                                                              \n",
+              " 107 │   │   │   outputs = run_function(*args)                                                  \n",
+              "   108 │   │   return outputs                                                                     \n",
+              "   109 │                                                                                          \n",
+              "   110 │   @staticmethod                                                                          \n",
+              "                                                                                                  \n",
+              " /usr/local/lib/python3.10/dist-packages/transformers/models/llama/modeling_llama.py:565 in       \n",
+              " custom_forward                                                                                   \n",
+              "                                                                                                  \n",
+              "   562 │   │   │   │   def create_custom_forward(module):                                         \n",
+              "   563 │   │   │   │   │   def custom_forward(*inputs):                                           \n",
+              "   564 │   │   │   │   │   │   # None for past_key_value                                          \n",
+              " 565 │   │   │   │   │   │   return module(*inputs, output_attentions, None)                    \n",
+              "   566 │   │   │   │   │                                                                          \n",
+              "   567 │   │   │   │   │   return custom_forward                                                  \n",
+              "   568                                                                                            \n",
+              "                                                                                                  \n",
+              " /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1501 in _call_impl            \n",
+              "                                                                                                  \n",
+              "   1498 │   │   if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks   \n",
+              "   1499 │   │   │   │   or _global_backward_pre_hooks or _global_backward_hooks                   \n",
+              "   1500 │   │   │   │   or _global_forward_hooks or _global_forward_pre_hooks):                   \n",
+              " 1501 │   │   │   return forward_call(*args, **kwargs)                                          \n",
+              "   1502 │   │   # Do not call functions when jit is used                                          \n",
+              "   1503 │   │   full_backward_hooks, non_full_backward_hooks = [], []                             \n",
+              "   1504 │   │   backward_pre_hooks = []                                                           \n",
+              "                                                                                                  \n",
+              " /usr/local/lib/python3.10/dist-packages/accelerate/hooks.py:165 in new_forward                   \n",
+              "                                                                                                  \n",
+              "   162 │   │   │   with torch.no_grad():                                                          \n",
+              "   163 │   │   │   │   output = old_forward(*args, **kwargs)                                      \n",
+              "   164 │   │   else:                                                                              \n",
+              " 165 │   │   │   output = old_forward(*args, **kwargs)                                          \n",
+              "   166 │   │   return module._hf_hook.post_forward(module, output)                                \n",
+              "   167 │                                                                                          \n",
+              "   168 │   module.forward = new_forward                                                           \n",
+              "                                                                                                  \n",
+              " /usr/local/lib/python3.10/dist-packages/transformers/models/llama/modeling_llama.py:292 in       \n",
+              " forward                                                                                          \n",
+              "                                                                                                  \n",
+              "   289 │   │   hidden_states = self.input_layernorm(hidden_states)                                \n",
+              "   290 │   │                                                                                      \n",
+              "   291 │   │   # Self Attention                                                                   \n",
+              " 292 │   │   hidden_states, self_attn_weights, present_key_value = self.self_attn(              \n",
+              "   293 │   │   │   hidden_states=hidden_states,                                                   \n",
+              "   294 │   │   │   attention_mask=attention_mask,                                                 \n",
+              "   295 │   │   │   position_ids=position_ids,                                                     \n",
+              "                                                                                                  \n",
+              " /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1501 in _call_impl            \n",
+              "                                                                                                  \n",
+              "   1498 │   │   if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks   \n",
+              "   1499 │   │   │   │   or _global_backward_pre_hooks or _global_backward_hooks                   \n",
+              "   1500 │   │   │   │   or _global_forward_hooks or _global_forward_pre_hooks):                   \n",
+              " 1501 │   │   │   return forward_call(*args, **kwargs)                                          \n",
+              "   1502 │   │   # Do not call functions when jit is used                                          \n",
+              "   1503 │   │   full_backward_hooks, non_full_backward_hooks = [], []                             \n",
+              "   1504 │   │   backward_pre_hooks = []                                                           \n",
+              "                                                                                                  \n",
+              " /usr/local/lib/python3.10/dist-packages/accelerate/hooks.py:165 in new_forward                   \n",
+              "                                                                                                  \n",
+              "   162 │   │   │   with torch.no_grad():                                                          \n",
+              "   163 │   │   │   │   output = old_forward(*args, **kwargs)                                      \n",
+              "   164 │   │   else:                                                                              \n",
+              " 165 │   │   │   output = old_forward(*args, **kwargs)                                          \n",
+              "   166 │   │   return module._hf_hook.post_forward(module, output)                                \n",
+              "   167 │                                                                                          \n",
+              "   168 │   module.forward = new_forward                                                           \n",
+              "                                                                                                  \n",
+              " /usr/local/lib/python3.10/dist-packages/transformers/models/llama/modeling_llama.py:194 in       \n",
+              " forward                                                                                          \n",
+              "                                                                                                  \n",
+              "   191 │   ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:       \n",
+              "   192 │   │   bsz, q_len, _ = hidden_states.size()                                               \n",
+              "   193 │   │                                                                                      \n",
+              " 194 │   │   query_states = self.q_proj(hidden_states).view(bsz, q_len, self.num_heads, self.   \n",
+              "   195 │   │   key_states = self.k_proj(hidden_states).view(bsz, q_len, self.num_heads, self.he   \n",
+              "   196 │   │   value_states = self.v_proj(hidden_states).view(bsz, q_len, self.num_heads, self.   \n",
+              "   197                                                                                            \n",
+              "                                                                                                  \n",
+              " /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1501 in _call_impl            \n",
+              "                                                                                                  \n",
+              "   1498 │   │   if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks   \n",
+              "   1499 │   │   │   │   or _global_backward_pre_hooks or _global_backward_hooks                   \n",
+              "   1500 │   │   │   │   or _global_forward_hooks or _global_forward_pre_hooks):                   \n",
+              " 1501 │   │   │   return forward_call(*args, **kwargs)                                          \n",
+              "   1502 │   │   # Do not call functions when jit is used                                          \n",
+              "   1503 │   │   full_backward_hooks, non_full_backward_hooks = [], []                             \n",
+              "   1504 │   │   backward_pre_hooks = []                                                           \n",
+              "                                                                                                  \n",
+              " /usr/local/lib/python3.10/dist-packages/peft/tuners/lora.py:709 in forward                       \n",
+              "                                                                                                  \n",
+              "   706 │   │   │   self.active_adapter = adapter_name                                             \n",
+              "   707 │   │                                                                                      \n",
+              "   708 │   │   def forward(self, x: torch.Tensor):                                                \n",
+              " 709 │   │   │   result = super().forward(x)                                                    \n",
+              "   710 │   │   │                                                                                  \n",
+              "   711 │   │   │   if self.disable_adapters or self.active_adapter not in self.lora_A.keys():     \n",
+              "   712 │   │   │   │   return result                                                              \n",
+              "                                                                                                  \n",
+              " /usr/local/lib/python3.10/dist-packages/bitsandbytes/nn/modules.py:388 in forward                \n",
+              "                                                                                                  \n",
+              "   385 │   │   if self.bias is not None and self.bias.dtype != x.dtype:                           \n",
+              "   386 │   │   │   self.bias.data = self.bias.data.to(x.dtype)                                    \n",
+              "   387 │   │                                                                                      \n",
+              " 388 │   │   out = bnb.matmul(x, self.weight, bias=self.bias, state=self.state)                 \n",
+              "   389 │   │                                                                                      \n",
+              "   390 │   │   if not self.state.has_fp16_weights:                                                \n",
+              "   391 │   │   │   if self.state.CB is not None and self.state.CxB is not None:                   \n",
+              "                                                                                                  \n",
+              " /usr/local/lib/python3.10/dist-packages/bitsandbytes/autograd/_functions.py:559 in matmul        \n",
+              "                                                                                                  \n",
+              "   556 │   state = state or MatmulLtState()                                                       \n",
+              "   557 │   if threshold > 0.0:                                                                    \n",
+              "   558 │   │   state.threshold = threshold                                                        \n",
+              " 559 return MatMul8bitLt.apply(A, B, out, bias, state)                                      \n",
+              "   560                                                                                            \n",
+              "   561                                                                                            \n",
+              "   562 def matmul_4bit(A: tensor, B: tensor, quant_state: List, out: tensor = None, bias=None):   \n",
+              "                                                                                                  \n",
+              " /usr/local/lib/python3.10/dist-packages/torch/autograd/function.py:506 in apply                  \n",
+              "                                                                                                  \n",
+              "   503 │   │   if not torch._C._are_functorch_transforms_active():                                \n",
+              "   504 │   │   │   # See NOTE: [functorch vjp and autograd interaction]                           \n",
+              "   505 │   │   │   args = _functorch.utils.unwrap_dead_wrappers(args)                             \n",
+              " 506 │   │   │   return super().apply(*args, **kwargs)  # type: ignore[misc]                    \n",
+              "   507 │   │                                                                                      \n",
+              "   508 │   │   if cls.setup_context == _SingleLevelFunction.setup_context:                        \n",
+              "   509 │   │   │   raise RuntimeError(                                                            \n",
+              "                                                                                                  \n",
+              " /usr/local/lib/python3.10/dist-packages/bitsandbytes/autograd/_functions.py:323 in forward       \n",
+              "                                                                                                  \n",
+              "   320 │   │   # 1. Quantize A                                                                    \n",
+              "   321 │   │   if len(A.shape) == 3:                                                              \n",
+              "   322 │   │   │   A = A.view(-1, A.shape[-1]).contiguous()                                       \n",
+              " 323 │   │   CA, CAt, SCA, SCAt, coo_tensorA = F.double_quant(A.to(torch.float16), threshold=   \n",
+              "   324 │   │                                                                                      \n",
+              "   325 │   │   if state.threshold > 0.0 and coo_tensorA is not None:                              \n",
+              "   326 │   │   │   if state.has_fp16_weights:                                                     \n",
+              "                                                                                                  \n",
+              " /usr/local/lib/python3.10/dist-packages/bitsandbytes/functional.py:2029 in double_quant          \n",
+              "                                                                                                  \n",
+              "   2026 │   │   │   │   ct.c_int32(rows),                                                         \n",
+              "   2027 │   │   │   │   ct.c_int32(cols),                                                         \n",
+              "   2028 │   │   │   )                                                                             \n",
+              " 2029 │   │   │   val, idx = torch.sort(coo_tensor.rowidx)                                      \n",
+              "   2030 │   │   │   coo_tensor.rowidx = val                                                       \n",
+              "   2031 │   │   │   coo_tensor.colidx = coo_tensor.colidx[idx]                                    \n",
+              "   2032 │   │   │   coo_tensor.values = coo_tensor.values[idx]                                    \n",
+              "╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+              "KeyboardInterrupt\n",
+              "
\n" + ] + }, + "metadata": {} + } + ] + } + ] +}