diff --git "a/finetune.ipynb" "b/finetune.ipynb"
new file mode 100644--- /dev/null
+++ "b/finetune.ipynb"
@@ -0,0 +1,4022 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    },
+    "accelerator": "GPU",
+    "widgets": {
+      "application/vnd.jupyter.widget-state+json": {
+        "76eba77b0fc9499e9b4015393156153e": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_c42123767d4849ffbcdd48f09580ea13",
+              "IPY_MODEL_e89f07c1f5744328b071f96a3bdc9532",
+              "IPY_MODEL_da36c71ba5bc469eb32bde730c5038d3"
+            ],
+            "layout": "IPY_MODEL_6cc6b7c564fe48f086152c78f8a9a915"
+          }
+        },
+        "c42123767d4849ffbcdd48f09580ea13": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_c6a3adfa76dd4bbd8cc5e75ee28ef3e8",
+            "placeholder": "​",
+            "style": "IPY_MODEL_0c65f0e913b74de88ab712d65b9027f7",
+            "value": "Downloading data files: 100%"
+          }
+        },
+        "e89f07c1f5744328b071f96a3bdc9532": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_b07c7f3b6bc24f10821b88e254e88e43",
+            "max": 1,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_42ea2834ee074029abe8cf8f9ff79a16",
+            "value": 1
+          }
+        },
+        "da36c71ba5bc469eb32bde730c5038d3": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_ec4205b0491a4f53ad1cba42689d59c4",
+            "placeholder": "​",
+            "style": "IPY_MODEL_f8b9df09c8c54d53a22310e64e30772f",
+            "value": " 1/1 [00:00&lt;00:00, 39.39it/s]"
+          }
+        },
+        "6cc6b7c564fe48f086152c78f8a9a915": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "c6a3adfa76dd4bbd8cc5e75ee28ef3e8": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "0c65f0e913b74de88ab712d65b9027f7": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "b07c7f3b6bc24f10821b88e254e88e43": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "42ea2834ee074029abe8cf8f9ff79a16": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "ec4205b0491a4f53ad1cba42689d59c4": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "f8b9df09c8c54d53a22310e64e30772f": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "e69b8912d3384180842ff0ead6096803": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_550a2ef0612e4d8396462d1b1c43608d",
+              "IPY_MODEL_f8d82b4d016f46ca8bab1a2fe591861e",
+              "IPY_MODEL_975d908a1874425198e0d62ae884a3ef"
+            ],
+            "layout": "IPY_MODEL_152aa8231b3f4b7db4e6f27bb61065b9"
+          }
+        },
+        "550a2ef0612e4d8396462d1b1c43608d": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_27b48982ae244c66bccd855e7e33a00d",
+            "placeholder": "​",
+            "style": "IPY_MODEL_a896cb94d1fc4641ad224c7a9ca619b1",
+            "value": "Extracting data files: 100%"
+          }
+        },
+        "f8d82b4d016f46ca8bab1a2fe591861e": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_be5d02d7001a4a25a848d6b0c582b42b",
+            "max": 1,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_609437910022418e83543fe6c3d16501",
+            "value": 1
+          }
+        },
+        "975d908a1874425198e0d62ae884a3ef": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_c3b5ef42dae64f95a2ef055db7e50fa8",
+            "placeholder": "​",
+            "style": "IPY_MODEL_ee0f0aff0c254b82854d1a6f4b367161",
+            "value": " 1/1 [00:01&lt;00:00,  1.73s/it]"
+          }
+        },
+        "152aa8231b3f4b7db4e6f27bb61065b9": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "27b48982ae244c66bccd855e7e33a00d": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "a896cb94d1fc4641ad224c7a9ca619b1": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "be5d02d7001a4a25a848d6b0c582b42b": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "609437910022418e83543fe6c3d16501": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "c3b5ef42dae64f95a2ef055db7e50fa8": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "ee0f0aff0c254b82854d1a6f4b367161": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "fd90085d416440579f409cdd9b29e053": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_fdd55d56775a4641925fc88e43f946b8",
+              "IPY_MODEL_a54ebe30a12b4c21b67b2f8d43aea2d3",
+              "IPY_MODEL_6f041f053ff240cbbb52f6d117ebfcf1"
+            ],
+            "layout": "IPY_MODEL_b7d5f8aaee0047178661e58aa59433ee"
+          }
+        },
+        "fdd55d56775a4641925fc88e43f946b8": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_563ca6cac1594ba0a7c168ee31ccc982",
+            "placeholder": "​",
+            "style": "IPY_MODEL_e10e7830d1404baea72e81e0df295239",
+            "value": "Generating train split: "
+          }
+        },
+        "a54ebe30a12b4c21b67b2f8d43aea2d3": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "info",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_acc07aa077fd49d38d74e0e10fa3991a",
+            "max": 1,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_441dfdd43ef0442894c19cad81a478db",
+            "value": 1
+          }
+        },
+        "6f041f053ff240cbbb52f6d117ebfcf1": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_9efe7ac9ee0946388eeb85f9a4603ffe",
+            "placeholder": "​",
+            "style": "IPY_MODEL_ae9996dcefcf4cfc8ea2f7488629c5ec",
+            "value": " 15011/0 [00:01&lt;00:00, 14307.31 examples/s]"
+          }
+        },
+        "b7d5f8aaee0047178661e58aa59433ee": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": "hidden",
+            "width": null
+          }
+        },
+        "563ca6cac1594ba0a7c168ee31ccc982": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "e10e7830d1404baea72e81e0df295239": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "acc07aa077fd49d38d74e0e10fa3991a": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": "20px"
+          }
+        },
+        "441dfdd43ef0442894c19cad81a478db": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "9efe7ac9ee0946388eeb85f9a4603ffe": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "ae9996dcefcf4cfc8ea2f7488629c5ec": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "db463b34f98c488e8282d7a59c421347": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_2ba524e48f35421c9afc9cc81e9ce3fb",
+              "IPY_MODEL_1829c96471c64a03a4872e1b7a16551c",
+              "IPY_MODEL_f639a121fab0407595ebdf04015d9267"
+            ],
+            "layout": "IPY_MODEL_fa5ff44ae5f64ce4bbf9b59e300b7ba3"
+          }
+        },
+        "2ba524e48f35421c9afc9cc81e9ce3fb": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_e0d701b8c09744858db4fda6011b69df",
+            "placeholder": "​",
+            "style": "IPY_MODEL_7ef61952e0f2463bad3dd1a814bfb95a",
+            "value": "100%"
+          }
+        },
+        "1829c96471c64a03a4872e1b7a16551c": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_75263b1d19ce4cb0b95cacb6138b6fcb",
+            "max": 1,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_c3f35ce818e34b05b247fae76200c02e",
+            "value": 1
+          }
+        },
+        "f639a121fab0407595ebdf04015d9267": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_be8dddb19dde441da1193d5f8738413d",
+            "placeholder": "​",
+            "style": "IPY_MODEL_4be8d304ddce49319488f20ab36e7f53",
+            "value": " 1/1 [00:00&lt;00:00, 22.58it/s]"
+          }
+        },
+        "fa5ff44ae5f64ce4bbf9b59e300b7ba3": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "e0d701b8c09744858db4fda6011b69df": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "7ef61952e0f2463bad3dd1a814bfb95a": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "75263b1d19ce4cb0b95cacb6138b6fcb": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "c3f35ce818e34b05b247fae76200c02e": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "be8dddb19dde441da1193d5f8738413d": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "4be8d304ddce49319488f20ab36e7f53": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "984896bacc39446091e2b6573facfa3e": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_f15179aad6ad44dfa2dfbd703dc7c319",
+              "IPY_MODEL_f92c68902b984e648337da7e63fcc775",
+              "IPY_MODEL_5ce279d7dca74e55acae1c70efae6cd5"
+            ],
+            "layout": "IPY_MODEL_b187d889a6654fd7ad948d66a8bce197"
+          }
+        },
+        "f15179aad6ad44dfa2dfbd703dc7c319": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_e131a64ebaa84a809754b3c762525c37",
+            "placeholder": "​",
+            "style": "IPY_MODEL_72819a3c606c41f19d1493539d64aff5",
+            "value": "Downloading (…)lve/main/config.json: 100%"
+          }
+        },
+        "f92c68902b984e648337da7e63fcc775": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_6761867d6e2b415b932bebd33a3144f3",
+            "max": 506,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_72c9aa3bb9614dfc8f316bc45a3455a0",
+            "value": 506
+          }
+        },
+        "5ce279d7dca74e55acae1c70efae6cd5": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_58e5e895215d4910b55d8cbe1417a111",
+            "placeholder": "​",
+            "style": "IPY_MODEL_b6bad7f690c54cb5814d09081e306393",
+            "value": " 506/506 [00:00&lt;00:00, 31.8kB/s]"
+          }
+        },
+        "b187d889a6654fd7ad948d66a8bce197": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "e131a64ebaa84a809754b3c762525c37": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "72819a3c606c41f19d1493539d64aff5": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "6761867d6e2b415b932bebd33a3144f3": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "72c9aa3bb9614dfc8f316bc45a3455a0": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "58e5e895215d4910b55d8cbe1417a111": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "b6bad7f690c54cb5814d09081e306393": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "ae3921fcb8e44862a42031f284f608c3": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_aaac19cd3f524821a656ceffa57fb337",
+              "IPY_MODEL_a77af2c0771b4a20b42dade07c40021c",
+              "IPY_MODEL_7e5ced10225544399add5e68dda80595"
+            ],
+            "layout": "IPY_MODEL_ccf73891ca1941ca8b77c87ba84058f8"
+          }
+        },
+        "aaac19cd3f524821a656ceffa57fb337": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_bfc1e1979f0741a09e6c9f4181ae88e7",
+            "placeholder": "​",
+            "style": "IPY_MODEL_8ce25329fdf04a6e8c4ad8097975111e",
+            "value": "Downloading pytorch_model.bin: 100%"
+          }
+        },
+        "a77af2c0771b4a20b42dade07c40021c": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_479d18fc0f9448a49b07b93ac608ae1d",
+            "max": 6853038093,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_85ffc3306cb4474aa5bfac22f6356352",
+            "value": 6853038093
+          }
+        },
+        "7e5ced10225544399add5e68dda80595": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_0d5765bde1fa470eb087df5fe1e45e56",
+            "placeholder": "​",
+            "style": "IPY_MODEL_6873f0a521cd4e4390563e27046c1aa6",
+            "value": " 6.85G/6.85G [00:35&lt;00:00, 230MB/s]"
+          }
+        },
+        "ccf73891ca1941ca8b77c87ba84058f8": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "bfc1e1979f0741a09e6c9f4181ae88e7": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "8ce25329fdf04a6e8c4ad8097975111e": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "479d18fc0f9448a49b07b93ac608ae1d": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "85ffc3306cb4474aa5bfac22f6356352": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "0d5765bde1fa470eb087df5fe1e45e56": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "6873f0a521cd4e4390563e27046c1aa6": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "0832a0aa65ac4f70ba2f303f9b867d25": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_436e9deafae04955a13f2ff0fc0e6348",
+              "IPY_MODEL_83427a912e3740008c0a7ffb9d20a1bd",
+              "IPY_MODEL_084f196c105c4c428253e0ce6dce8a21"
+            ],
+            "layout": "IPY_MODEL_403990397a4041abb8aff7bdcc5d8e8a"
+          }
+        },
+        "436e9deafae04955a13f2ff0fc0e6348": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_f957482962954f03b383d7c4f93c7b04",
+            "placeholder": "​",
+            "style": "IPY_MODEL_19a731496867404b8888986f3d9c83ef",
+            "value": "Downloading (…)neration_config.json: 100%"
+          }
+        },
+        "83427a912e3740008c0a7ffb9d20a1bd": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_e0b09d3aa9ae4719bcdfad8e75ad86c5",
+            "max": 137,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_9d41c197843644858919a396dc927936",
+            "value": 137
+          }
+        },
+        "084f196c105c4c428253e0ce6dce8a21": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_67f9f877dee04d538ffbe2de6c4dd181",
+            "placeholder": "​",
+            "style": "IPY_MODEL_001bb332cdb94a0c96dd2a322ff8718f",
+            "value": " 137/137 [00:00&lt;00:00, 9.29kB/s]"
+          }
+        },
+        "403990397a4041abb8aff7bdcc5d8e8a": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "f957482962954f03b383d7c4f93c7b04": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "19a731496867404b8888986f3d9c83ef": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "e0b09d3aa9ae4719bcdfad8e75ad86c5": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "9d41c197843644858919a396dc927936": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "67f9f877dee04d538ffbe2de6c4dd181": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "001bb332cdb94a0c96dd2a322ff8718f": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "310970764a4a4c9382e2334a4abbbd77": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_2cd59f5acc4948988814eb9b3d909aed",
+              "IPY_MODEL_8939309032ca4787a9d3c7b54b889926",
+              "IPY_MODEL_fcd31f8d1e5a482d9267ab63fd723e86"
+            ],
+            "layout": "IPY_MODEL_2d126272a4284ba9af39de65e9680fc8"
+          }
+        },
+        "2cd59f5acc4948988814eb9b3d909aed": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_d8a0e85fa9fc4a9291f9303861fc9efc",
+            "placeholder": "​",
+            "style": "IPY_MODEL_d21f1cfb275b4bb38baf7022665e0208",
+            "value": "Map: 100%"
+          }
+        },
+        "8939309032ca4787a9d3c7b54b889926": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_81e25933b9af43a492a6da4a5e3560f6",
+            "max": 15011,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_5fe27494333b4e07a632769696744210",
+            "value": 15011
+          }
+        },
+        "fcd31f8d1e5a482d9267ab63fd723e86": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_35ca13104a3e4263a192889362dbc05e",
+            "placeholder": "​",
+            "style": "IPY_MODEL_85ae96c237754c4db014e5c09898e162",
+            "value": " 14994/15011 [00:44&lt;00:00, 638.52 examples/s]"
+          }
+        },
+        "2d126272a4284ba9af39de65e9680fc8": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": "hidden",
+            "width": null
+          }
+        },
+        "d8a0e85fa9fc4a9291f9303861fc9efc": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "d21f1cfb275b4bb38baf7022665e0208": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "81e25933b9af43a492a6da4a5e3560f6": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "5fe27494333b4e07a632769696744210": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "35ca13104a3e4263a192889362dbc05e": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "85ae96c237754c4db014e5c09898e162": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        }
+      }
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": 1,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "YSW4FUQPwIYu",
+        "outputId": "421da5b9-be5e-4661-bf98-6c57fbf1f6fd"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Cloning into 'alpaca-lora'...\n",
+            "remote: Enumerating objects: 607, done.\u001b[K\n",
+            "remote: Counting objects: 100% (51/51), done.\u001b[K\n",
+            "remote: Compressing objects: 100% (32/32), done.\u001b[K\n",
+            "remote: Total 607 (delta 28), reused 33 (delta 19), pack-reused 556\u001b[K\n",
+            "Receiving objects: 100% (607/607), 27.78 MiB | 5.67 MiB/s, done.\n",
+            "Resolving deltas: 100% (360/360), done.\n"
+          ]
+        }
+      ],
+      "source": [
+        "!git clone https://github.com/tloen/alpaca-lora.git"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Alpaca OpenLLaMa 3B LoRA"
+      ],
+      "metadata": {
+        "id": "Gzg8SopX8EWH"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "%cd alpaca-lora/"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "s1xm2uERx_st",
+        "outputId": "c77b42e1-202b-45ac-aa2c-3fdf2bcde155"
+      },
+      "execution_count": 2,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "/content/alpaca-lora\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!pip install -q datasets loralib sentencepiece\n",
+        "\n",
+        "!pip install -q git+https://github.com/huggingface/transformers.git\n",
+        "!pip install -q git+https://github.com/huggingface/peft.git"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "JCB9UzMVwsSM",
+        "outputId": "7f0688e4-f360-4da6-a4d3-0d59d4135649"
+      },
+      "execution_count": 5,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
+            "  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
+            "  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m39.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25h  Building wheel for transformers (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
+            "  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
+            "  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
+            "  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!pip install bitsandbytes"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "qCnXTszZxE2T",
+        "outputId": "e619573c-3ac2-4ac3-bed5-caab57d00f9a"
+      },
+      "execution_count": 6,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
+            "Collecting bitsandbytes\n",
+            "  Downloading bitsandbytes-0.39.0-py3-none-any.whl (92.2 MB)\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m92.2/92.2 MB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25hInstalling collected packages: bitsandbytes\n",
+            "Successfully installed bitsandbytes-0.39.0\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from google.colab import drive\n",
+        "drive.mount('/content/drive')"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "NyxvpDjcrUvv",
+        "outputId": "abe581a2-70ce-4956-b3dd-593fa4f3d8ef"
+      },
+      "execution_count": 8,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Mounted at /content/drive\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## Data Check"
+      ],
+      "metadata": {
+        "id": "9w0aSCzhxxQf"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from datasets import load_dataset\n",
+        "from transformers import LlamaTokenizer\n",
+        "\n",
+        "\n",
+        "tokenizer = LlamaTokenizer.from_pretrained(\"openlm-research/open_llama_3b_600bt_preview\", add_eos_token=True)\n",
+        "tokenizer.pad_token = tokenizer.eos_token\n",
+        "tokenizer.pad_token_id = tokenizer.eos_token_id\n",
+        "\n",
+        "data = load_dataset(\"json\", data_files=\"/content/drive/MyDrive/alpaca-data.json\")\n",
+        "\n",
+        "\n",
+        "def generate_prompt(data_point):\n",
+        "    # sorry about the formatting disaster gotta move fast\n",
+        "    if data_point[\"input\"]:\n",
+        "        return f\"\"\"Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n",
+        "\n",
+        "### Instruction:\n",
+        "{data_point[\"instruction\"]}\n",
+        "\n",
+        "### Input:\n",
+        "{data_point[\"input\"]}\n",
+        "\n",
+        "### Response:\n",
+        "{data_point[\"output\"]}\"\"\"\n",
+        "    else:\n",
+        "        return f\"\"\"Below is an instruction that describes a task. Write a response that appropriately completes the request.\n",
+        "\n",
+        "### Instruction:\n",
+        "{data_point[\"instruction\"]}\n",
+        "\n",
+        "### Response:\n",
+        "{data_point[\"output\"]}\"\"\""
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 150,
+          "referenced_widgets": [
+            "76eba77b0fc9499e9b4015393156153e",
+            "c42123767d4849ffbcdd48f09580ea13",
+            "e89f07c1f5744328b071f96a3bdc9532",
+            "da36c71ba5bc469eb32bde730c5038d3",
+            "6cc6b7c564fe48f086152c78f8a9a915",
+            "c6a3adfa76dd4bbd8cc5e75ee28ef3e8",
+            "0c65f0e913b74de88ab712d65b9027f7",
+            "b07c7f3b6bc24f10821b88e254e88e43",
+            "42ea2834ee074029abe8cf8f9ff79a16",
+            "ec4205b0491a4f53ad1cba42689d59c4",
+            "f8b9df09c8c54d53a22310e64e30772f",
+            "e69b8912d3384180842ff0ead6096803",
+            "550a2ef0612e4d8396462d1b1c43608d",
+            "f8d82b4d016f46ca8bab1a2fe591861e",
+            "975d908a1874425198e0d62ae884a3ef",
+            "152aa8231b3f4b7db4e6f27bb61065b9",
+            "27b48982ae244c66bccd855e7e33a00d",
+            "a896cb94d1fc4641ad224c7a9ca619b1",
+            "be5d02d7001a4a25a848d6b0c582b42b",
+            "609437910022418e83543fe6c3d16501",
+            "c3b5ef42dae64f95a2ef055db7e50fa8",
+            "ee0f0aff0c254b82854d1a6f4b367161",
+            "fd90085d416440579f409cdd9b29e053",
+            "fdd55d56775a4641925fc88e43f946b8",
+            "a54ebe30a12b4c21b67b2f8d43aea2d3",
+            "6f041f053ff240cbbb52f6d117ebfcf1",
+            "b7d5f8aaee0047178661e58aa59433ee",
+            "563ca6cac1594ba0a7c168ee31ccc982",
+            "e10e7830d1404baea72e81e0df295239",
+            "acc07aa077fd49d38d74e0e10fa3991a",
+            "441dfdd43ef0442894c19cad81a478db",
+            "9efe7ac9ee0946388eeb85f9a4603ffe",
+            "ae9996dcefcf4cfc8ea2f7488629c5ec",
+            "db463b34f98c488e8282d7a59c421347",
+            "2ba524e48f35421c9afc9cc81e9ce3fb",
+            "1829c96471c64a03a4872e1b7a16551c",
+            "f639a121fab0407595ebdf04015d9267",
+            "fa5ff44ae5f64ce4bbf9b59e300b7ba3",
+            "e0d701b8c09744858db4fda6011b69df",
+            "7ef61952e0f2463bad3dd1a814bfb95a",
+            "75263b1d19ce4cb0b95cacb6138b6fcb",
+            "c3f35ce818e34b05b247fae76200c02e",
+            "be8dddb19dde441da1193d5f8738413d",
+            "4be8d304ddce49319488f20ab36e7f53"
+          ]
+        },
+        "id": "OdgRTo5YxyRL",
+        "outputId": "e6e64014-796b-4fa9-9635-3cb29f2dab7d"
+      },
+      "execution_count": 9,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Downloading and preparing dataset json/default to /root/.cache/huggingface/datasets/json/default-e726d3f1eee28f16/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4...\n"
+          ]
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "76eba77b0fc9499e9b4015393156153e"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "e69b8912d3384180842ff0ead6096803"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "Generating train split: 0 examples [00:00, ? examples/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "fd90085d416440579f409cdd9b29e053"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Dataset json downloaded and prepared to /root/.cache/huggingface/datasets/json/default-e726d3f1eee28f16/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4. Subsequent calls will reuse this data.\n"
+          ]
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "  0%|          | 0/1 [00:00<?, ?it/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "db463b34f98c488e8282d7a59c421347"
+            }
+          },
+          "metadata": {}
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## Fine Tuning"
+      ],
+      "metadata": {
+        "id": "j7qZqe0YxG2c"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import os\n",
+        "\n",
+        "# os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n",
+        "import torch\n",
+        "import torch.nn as nn\n",
+        "import bitsandbytes as bnb\n",
+        "import transformers\n",
+        "from transformers import AutoTokenizer, AutoConfig, LlamaForCausalLM\n",
+        "from peft import prepare_model_for_int8_training, LoraConfig, get_peft_model"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "EQyg3MygxKCW",
+        "outputId": "82c54601-2cf6-48f1-e3ca-994ae2b8b405"
+      },
+      "execution_count": 10,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "\n",
+            "===================================BUG REPORT===================================\n",
+            "Welcome to bitsandbytes. For bug reports, please run\n",
+            "\n",
+            "python -m bitsandbytes\n",
+            "\n",
+            " and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues\n",
+            "================================================================================\n",
+            "bin /usr/local/lib/python3.10/dist-packages/bitsandbytes/libbitsandbytes_cuda118.so\n",
+            "CUDA_SETUP: WARNING! libcudart.so not found in any environmental path. Searching in backup paths...\n",
+            "CUDA SETUP: CUDA runtime path found: /usr/local/cuda/lib64/libcudart.so\n",
+            "CUDA SETUP: Highest compute capability among GPUs detected: 7.5\n",
+            "CUDA SETUP: Detected CUDA version 118\n",
+            "CUDA SETUP: Loading binary /usr/local/lib/python3.10/dist-packages/bitsandbytes/libbitsandbytes_cuda118.so...\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.10/dist-packages/bitsandbytes/cuda_setup/main.py:149: UserWarning: /usr/lib64-nvidia did not contain ['libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'] as expected! Searching further paths...\n",
+            "  warn(msg)\n",
+            "/usr/local/lib/python3.10/dist-packages/bitsandbytes/cuda_setup/main.py:149: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('/sys/fs/cgroup/memory.events /var/colab/cgroup/jupyter-children/memory.events')}\n",
+            "  warn(msg)\n",
+            "/usr/local/lib/python3.10/dist-packages/bitsandbytes/cuda_setup/main.py:149: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('//172.28.0.1'), PosixPath('http'), PosixPath('8013')}\n",
+            "  warn(msg)\n",
+            "/usr/local/lib/python3.10/dist-packages/bitsandbytes/cuda_setup/main.py:149: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('--logtostderr --listen_host=172.28.0.12 --target_host=172.28.0.12 --tunnel_background_save_url=https'), PosixPath('//colab.research.google.com/tun/m/cc48301118ce562b961b3c22d803539adc1e0c19/gpu-t4-s-fy1dvkcgaqf0 --tunnel_background_save_delay=10s --tunnel_periodic_background_save_frequency=30m0s --enable_output_coalescing=true --output_coalescing_required=true')}\n",
+            "  warn(msg)\n",
+            "/usr/local/lib/python3.10/dist-packages/bitsandbytes/cuda_setup/main.py:149: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('/env/python')}\n",
+            "  warn(msg)\n",
+            "/usr/local/lib/python3.10/dist-packages/bitsandbytes/cuda_setup/main.py:149: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('module'), PosixPath('//ipykernel.pylab.backend_inline')}\n",
+            "  warn(msg)\n",
+            "/usr/local/lib/python3.10/dist-packages/bitsandbytes/cuda_setup/main.py:149: UserWarning: Found duplicate ['libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'] files: {PosixPath('/usr/local/cuda/lib64/libcudart.so'), PosixPath('/usr/local/cuda/lib64/libcudart.so.11.0')}.. We'll flip a coin and try one of these, in order to fail forward.\n",
+            "Either way, this might cause trouble in the future:\n",
+            "If you get `CUDA error: invalid device function` errors, the above might be the cause and the solution is to make sure only one ['libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'] in the paths that we search based on your env.\n",
+            "  warn(msg)\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Setting for A100 - For 3090 \n",
+        "MICRO_BATCH_SIZE = 3  # change to 4 for 3090\n",
+        "BATCH_SIZE = 128\n",
+        "GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // MICRO_BATCH_SIZE\n",
+        "EPOCHS = 1  # paper uses 3\n",
+        "LEARNING_RATE = 2e-5  # from the original paper\n",
+        "CUTOFF_LEN = 256  # 256 accounts for about 96% of the data\n",
+        "LORA_R = 4\n",
+        "LORA_ALPHA = 16\n",
+        "LORA_DROPOUT = 0.05"
+      ],
+      "metadata": {
+        "id": "76iZTtGJy26x"
+      },
+      "execution_count": 11,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "model = LlamaForCausalLM.from_pretrained(\n",
+        "    \"openlm-research/open_llama_3b_600bt_preview\",\n",
+        "    load_in_8bit=True,\n",
+        "    device_map=\"auto\",\n",
+        ")\n",
+        "\n",
+        "model = prepare_model_for_int8_training(model)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 150,
+          "referenced_widgets": [
+            "984896bacc39446091e2b6573facfa3e",
+            "f15179aad6ad44dfa2dfbd703dc7c319",
+            "f92c68902b984e648337da7e63fcc775",
+            "5ce279d7dca74e55acae1c70efae6cd5",
+            "b187d889a6654fd7ad948d66a8bce197",
+            "e131a64ebaa84a809754b3c762525c37",
+            "72819a3c606c41f19d1493539d64aff5",
+            "6761867d6e2b415b932bebd33a3144f3",
+            "72c9aa3bb9614dfc8f316bc45a3455a0",
+            "58e5e895215d4910b55d8cbe1417a111",
+            "b6bad7f690c54cb5814d09081e306393",
+            "ae3921fcb8e44862a42031f284f608c3",
+            "aaac19cd3f524821a656ceffa57fb337",
+            "a77af2c0771b4a20b42dade07c40021c",
+            "7e5ced10225544399add5e68dda80595",
+            "ccf73891ca1941ca8b77c87ba84058f8",
+            "bfc1e1979f0741a09e6c9f4181ae88e7",
+            "8ce25329fdf04a6e8c4ad8097975111e",
+            "479d18fc0f9448a49b07b93ac608ae1d",
+            "85ffc3306cb4474aa5bfac22f6356352",
+            "0d5765bde1fa470eb087df5fe1e45e56",
+            "6873f0a521cd4e4390563e27046c1aa6",
+            "0832a0aa65ac4f70ba2f303f9b867d25",
+            "436e9deafae04955a13f2ff0fc0e6348",
+            "83427a912e3740008c0a7ffb9d20a1bd",
+            "084f196c105c4c428253e0ce6dce8a21",
+            "403990397a4041abb8aff7bdcc5d8e8a",
+            "f957482962954f03b383d7c4f93c7b04",
+            "19a731496867404b8888986f3d9c83ef",
+            "e0b09d3aa9ae4719bcdfad8e75ad86c5",
+            "9d41c197843644858919a396dc927936",
+            "67f9f877dee04d538ffbe2de6c4dd181",
+            "001bb332cdb94a0c96dd2a322ff8718f"
+          ]
+        },
+        "id": "tI5Ta-gQy56c",
+        "outputId": "a07d8bea-809e-44e8-f3ea-28345af97157"
+      },
+      "execution_count": 12,
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "Downloading (…)lve/main/config.json:   0%|          | 0.00/506 [00:00<?, ?B/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "984896bacc39446091e2b6573facfa3e"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "Downloading pytorch_model.bin:   0%|          | 0.00/6.85G [00:00<?, ?B/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "ae3921fcb8e44862a42031f284f608c3"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "Downloading (…)neration_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "0832a0aa65ac4f70ba2f303f9b867d25"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.10/dist-packages/peft/utils/other.py:76: FutureWarning: prepare_model_for_int8_training is deprecated and will be removed in a future version. Use prepare_model_for_kbit_training instead.\n",
+            "  warnings.warn(\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "config = LoraConfig(\n",
+        "    r=LORA_R,\n",
+        "    lora_alpha=LORA_ALPHA,\n",
+        "    target_modules=[\"q_proj\", \"v_proj\"],\n",
+        "    lora_dropout=LORA_DROPOUT,\n",
+        "    bias=\"none\",\n",
+        "    task_type=\"CAUSAL_LM\",\n",
+        ")\n",
+        "model = get_peft_model(model, config)\n",
+        "tokenizer.pad_token_id = 0  # unk. we want this to be different from the eos token"
+      ],
+      "metadata": {
+        "id": "Oh-jJFKNzBpw"
+      },
+      "execution_count": 13,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "data = data.shuffle().map(\n",
+        "    lambda data_point: tokenizer(\n",
+        "        generate_prompt(data_point),\n",
+        "        truncation=True,\n",
+        "        max_length=CUTOFF_LEN,\n",
+        "        padding=\"max_length\",\n",
+        "    )\n",
+        ")\n",
+        "\n",
+        "trainer = transformers.Trainer(\n",
+        "    model=model,\n",
+        "    train_dataset=data[\"train\"],\n",
+        "    args=transformers.TrainingArguments(\n",
+        "        per_device_train_batch_size=MICRO_BATCH_SIZE,\n",
+        "        gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,\n",
+        "        warmup_steps=100,\n",
+        "        num_train_epochs=EPOCHS,\n",
+        "        learning_rate=LEARNING_RATE,\n",
+        "        fp16=True,\n",
+        "        logging_steps=1,\n",
+        "        output_dir=\"lora-alpaca\",\n",
+        "        save_total_limit=3,\n",
+        "    ),\n",
+        "    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),\n",
+        ")\n",
+        "model.config.use_cache = False\n",
+        "trainer.train(resume_from_checkpoint=False)\n",
+        "\n",
+        "model.save_pretrained(\"lora-alpaca\")\n",
+        "\n",
+        "!cp -rv lora-alpaca /content/drive/MyDrive/"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 1000,
+          "referenced_widgets": [
+            "310970764a4a4c9382e2334a4abbbd77",
+            "2cd59f5acc4948988814eb9b3d909aed",
+            "8939309032ca4787a9d3c7b54b889926",
+            "fcd31f8d1e5a482d9267ab63fd723e86",
+            "2d126272a4284ba9af39de65e9680fc8",
+            "d8a0e85fa9fc4a9291f9303861fc9efc",
+            "d21f1cfb275b4bb38baf7022665e0208",
+            "81e25933b9af43a492a6da4a5e3560f6",
+            "5fe27494333b4e07a632769696744210",
+            "35ca13104a3e4263a192889362dbc05e",
+            "85ae96c237754c4db014e5c09898e162"
+          ]
+        },
+        "id": "kWP89TPIwRkK",
+        "outputId": "fb4d3012-795b-46c2-90a0-f72fc8185a11"
+      },
+      "execution_count": 16,
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "Map:   0%|          | 0/15011 [00:00<?, ? examples/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "310970764a4a4c9382e2334a4abbbd77"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.10/dist-packages/transformers/optimization.py:407: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/bitsandbytes/autograd/_functions.py:318: UserWarning: MatMul8bitLt: inputs will be cast from torch.float32 to float16 during quantization\n",
+            "  warnings.warn(f\"MatMul8bitLt: inputs will be cast from {A.dtype} to float16 during quantization\")\n"
+          ]
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ],
+            "text/html": [
+              "\n",
+              "    <div>\n",
+              "      \n",
+              "      <progress value='3' max='119' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+              "      [  3/119 01:48 < 3:29:02, 0.01 it/s, Epoch 0.02/1]\n",
+              "    </div>\n",
+              "    <table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              " <tr style=\"text-align: left;\">\n",
+              "      <th>Step</th>\n",
+              "      <th>Training Loss</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <td>1</td>\n",
+              "      <td>2.421700</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table><p>"
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "\u001b[31m╭─\u001b[0m\u001b[31m──────────────────────────────\u001b[0m\u001b[31m \u001b[0m\u001b[1;31mTraceback \u001b[0m\u001b[1;2;31m(most recent call last)\u001b[0m\u001b[31m \u001b[0m\u001b[31m───────────────────────────────\u001b[0m\u001b[31m─╮\u001b[0m\n",
+              "\u001b[31m│\u001b[0m in \u001b[92m<cell line: 27>\u001b[0m:\u001b[94m27\u001b[0m                                                                            \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/\u001b[0m\u001b[1;33mtrainer.py\u001b[0m:\u001b[94m1696\u001b[0m in \u001b[92mtrain\u001b[0m                    \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m1693 \u001b[0m\u001b[2m│   │   \u001b[0minner_training_loop = find_executable_batch_size(                                 \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m1694 \u001b[0m\u001b[2m│   │   │   \u001b[0m\u001b[96mself\u001b[0m._inner_training_loop, \u001b[96mself\u001b[0m._train_batch_size, args.auto_find_batch_size  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m1695 \u001b[0m\u001b[2m│   │   \u001b[0m)                                                                                 \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1696 \u001b[2m│   │   \u001b[0m\u001b[94mreturn\u001b[0m inner_training_loop(                                                       \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m1697 \u001b[0m\u001b[2m│   │   │   \u001b[0margs=args,                                                                    \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m1698 \u001b[0m\u001b[2m│   │   │   \u001b[0mresume_from_checkpoint=resume_from_checkpoint,                                \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m1699 \u001b[0m\u001b[2m│   │   │   \u001b[0mtrial=trial,                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/\u001b[0m\u001b[1;33mtrainer.py\u001b[0m:\u001b[94m1973\u001b[0m in \u001b[92m_inner_training_loop\u001b[0m     \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m1970 \u001b[0m\u001b[2m│   │   │   │   │   \u001b[0m\u001b[94mwith\u001b[0m model.no_sync():                                                 \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m1971 \u001b[0m\u001b[2m│   │   │   │   │   │   \u001b[0mtr_loss_step = \u001b[96mself\u001b[0m.training_step(model, inputs)                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m1972 \u001b[0m\u001b[2m│   │   │   │   \u001b[0m\u001b[94melse\u001b[0m:                                                                     \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1973 \u001b[2m│   │   │   │   │   \u001b[0mtr_loss_step = \u001b[96mself\u001b[0m.training_step(model, inputs)                      \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m1974 \u001b[0m\u001b[2m│   │   │   │   \u001b[0m                                                                          \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m1975 \u001b[0m\u001b[2m│   │   │   │   \u001b[0m\u001b[94mif\u001b[0m (                                                                      \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m1976 \u001b[0m\u001b[2m│   │   │   │   │   \u001b[0margs.logging_nan_inf_filter                                           \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/\u001b[0m\u001b[1;33mtrainer.py\u001b[0m:\u001b[94m2787\u001b[0m in \u001b[92mtraining_step\u001b[0m            \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m2784 \u001b[0m\u001b[2m│   │   │   \u001b[0m\u001b[94mreturn\u001b[0m loss_mb.reduce_mean().detach().to(\u001b[96mself\u001b[0m.args.device)                    \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m2785 \u001b[0m\u001b[2m│   │   \u001b[0m                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m2786 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mwith\u001b[0m \u001b[96mself\u001b[0m.compute_loss_context_manager():                                         \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m2787 \u001b[2m│   │   │   \u001b[0mloss = \u001b[96mself\u001b[0m.compute_loss(model, inputs)                                       \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m2788 \u001b[0m\u001b[2m│   │   \u001b[0m                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m2789 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mif\u001b[0m \u001b[96mself\u001b[0m.args.n_gpu > \u001b[94m1\u001b[0m:                                                           \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m2790 \u001b[0m\u001b[2m│   │   │   \u001b[0mloss = loss.mean()  \u001b[2m# mean() to average on multi-gpu parallel training\u001b[0m        \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/\u001b[0m\u001b[1;33mtrainer.py\u001b[0m:\u001b[94m2819\u001b[0m in \u001b[92mcompute_loss\u001b[0m             \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m2816 \u001b[0m\u001b[2m│   │   │   \u001b[0mlabels = inputs.pop(\u001b[33m\"\u001b[0m\u001b[33mlabels\u001b[0m\u001b[33m\"\u001b[0m)                                                 \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m2817 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94melse\u001b[0m:                                                                             \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m2818 \u001b[0m\u001b[2m│   │   │   \u001b[0mlabels = \u001b[94mNone\u001b[0m                                                                 \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m2819 \u001b[2m│   │   \u001b[0moutputs = model(**inputs)                                                         \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m2820 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[2m# Save past state if it exists\u001b[0m                                                    \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m2821 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[2m# TODO: this needs to be fixed and made cleaner later.\u001b[0m                            \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m2822 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mif\u001b[0m \u001b[96mself\u001b[0m.args.past_index >= \u001b[94m0\u001b[0m:                                                     \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/\u001b[0m\u001b[1;33mmodule.py\u001b[0m:\u001b[94m1501\u001b[0m in \u001b[92m_call_impl\u001b[0m            \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m1498 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m (\u001b[96mself\u001b[0m._backward_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._backward_pre_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._forward_hooks   \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m1499 \u001b[0m\u001b[2m│   │   │   │   \u001b[0m\u001b[95mor\u001b[0m _global_backward_pre_hooks \u001b[95mor\u001b[0m _global_backward_hooks                   \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m1500 \u001b[0m\u001b[2m│   │   │   │   \u001b[0m\u001b[95mor\u001b[0m _global_forward_hooks \u001b[95mor\u001b[0m _global_forward_pre_hooks):                   \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1501 \u001b[2m│   │   │   \u001b[0m\u001b[94mreturn\u001b[0m forward_call(*args, **kwargs)                                          \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m1502 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[2m# Do not call functions when jit is used\u001b[0m                                          \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m1503 \u001b[0m\u001b[2m│   │   \u001b[0mfull_backward_hooks, non_full_backward_hooks = [], []                             \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m1504 \u001b[0m\u001b[2m│   │   \u001b[0mbackward_pre_hooks = []                                                           \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/peft/\u001b[0m\u001b[1;33mpeft_model.py\u001b[0m:\u001b[94m686\u001b[0m in \u001b[92mforward\u001b[0m                        \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m 683 \u001b[0m\u001b[2m│   \u001b[0m):                                                                                    \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m 684 \u001b[0m\u001b[2m│   │   \u001b[0mpeft_config = \u001b[96mself\u001b[0m.active_peft_config                                             \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m 685 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m \u001b[96misinstance\u001b[0m(peft_config, PromptLearningConfig):                             \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m 686 \u001b[2m│   │   │   \u001b[0m\u001b[94mreturn\u001b[0m \u001b[96mself\u001b[0m.base_model(                                                       \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m 687 \u001b[0m\u001b[2m│   │   │   │   \u001b[0minput_ids=input_ids,                                                      \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m 688 \u001b[0m\u001b[2m│   │   │   │   \u001b[0mattention_mask=attention_mask,                                            \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m 689 \u001b[0m\u001b[2m│   │   │   │   \u001b[0minputs_embeds=inputs_embeds,                                              \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/\u001b[0m\u001b[1;33mmodule.py\u001b[0m:\u001b[94m1501\u001b[0m in \u001b[92m_call_impl\u001b[0m            \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m1498 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m (\u001b[96mself\u001b[0m._backward_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._backward_pre_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._forward_hooks   \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m1499 \u001b[0m\u001b[2m│   │   │   │   \u001b[0m\u001b[95mor\u001b[0m _global_backward_pre_hooks \u001b[95mor\u001b[0m _global_backward_hooks                   \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m1500 \u001b[0m\u001b[2m│   │   │   │   \u001b[0m\u001b[95mor\u001b[0m _global_forward_hooks \u001b[95mor\u001b[0m _global_forward_pre_hooks):                   \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1501 \u001b[2m│   │   │   \u001b[0m\u001b[94mreturn\u001b[0m forward_call(*args, **kwargs)                                          \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m1502 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[2m# Do not call functions when jit is used\u001b[0m                                          \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m1503 \u001b[0m\u001b[2m│   │   \u001b[0mfull_backward_hooks, non_full_backward_hooks = [], []                             \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m1504 \u001b[0m\u001b[2m│   │   \u001b[0mbackward_pre_hooks = []                                                           \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/accelerate/\u001b[0m\u001b[1;33mhooks.py\u001b[0m:\u001b[94m165\u001b[0m in \u001b[92mnew_forward\u001b[0m                   \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m162 \u001b[0m\u001b[2m│   │   │   \u001b[0m\u001b[94mwith\u001b[0m torch.no_grad():                                                          \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m163 \u001b[0m\u001b[2m│   │   │   │   \u001b[0moutput = old_forward(*args, **kwargs)                                      \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m164 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94melse\u001b[0m:                                                                              \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m165 \u001b[2m│   │   │   \u001b[0moutput = old_forward(*args, **kwargs)                                          \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m166 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mreturn\u001b[0m module._hf_hook.post_forward(module, output)                                \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m167 \u001b[0m\u001b[2m│   \u001b[0m                                                                                       \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m168 \u001b[0m\u001b[2m│   \u001b[0mmodule.forward = new_forward                                                           \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/models/llama/\u001b[0m\u001b[1;33mmodeling_llama.py\u001b[0m:\u001b[94m687\u001b[0m in       \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[92mforward\u001b[0m                                                                                          \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m684 \u001b[0m\u001b[2m│   │   \u001b[0mreturn_dict = return_dict \u001b[94mif\u001b[0m return_dict \u001b[95mis\u001b[0m \u001b[95mnot\u001b[0m \u001b[94mNone\u001b[0m \u001b[94melse\u001b[0m \u001b[96mself\u001b[0m.config.use_return   \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m685 \u001b[0m\u001b[2m│   │   \u001b[0m                                                                                   \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m686 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[2m# decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)\u001b[0m    \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m687 \u001b[2m│   │   \u001b[0moutputs = \u001b[96mself\u001b[0m.model(                                                              \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m688 \u001b[0m\u001b[2m│   │   │   \u001b[0minput_ids=input_ids,                                                           \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m689 \u001b[0m\u001b[2m│   │   │   \u001b[0mattention_mask=attention_mask,                                                 \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m690 \u001b[0m\u001b[2m│   │   │   \u001b[0mposition_ids=position_ids,                                                     \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/\u001b[0m\u001b[1;33mmodule.py\u001b[0m:\u001b[94m1501\u001b[0m in \u001b[92m_call_impl\u001b[0m            \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m1498 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m (\u001b[96mself\u001b[0m._backward_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._backward_pre_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._forward_hooks   \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m1499 \u001b[0m\u001b[2m│   │   │   │   \u001b[0m\u001b[95mor\u001b[0m _global_backward_pre_hooks \u001b[95mor\u001b[0m _global_backward_hooks                   \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m1500 \u001b[0m\u001b[2m│   │   │   │   \u001b[0m\u001b[95mor\u001b[0m _global_forward_hooks \u001b[95mor\u001b[0m _global_forward_pre_hooks):                   \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1501 \u001b[2m│   │   │   \u001b[0m\u001b[94mreturn\u001b[0m forward_call(*args, **kwargs)                                          \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m1502 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[2m# Do not call functions when jit is used\u001b[0m                                          \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m1503 \u001b[0m\u001b[2m│   │   \u001b[0mfull_backward_hooks, non_full_backward_hooks = [], []                             \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m1504 \u001b[0m\u001b[2m│   │   \u001b[0mbackward_pre_hooks = []                                                           \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/accelerate/\u001b[0m\u001b[1;33mhooks.py\u001b[0m:\u001b[94m165\u001b[0m in \u001b[92mnew_forward\u001b[0m                   \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m162 \u001b[0m\u001b[2m│   │   │   \u001b[0m\u001b[94mwith\u001b[0m torch.no_grad():                                                          \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m163 \u001b[0m\u001b[2m│   │   │   │   \u001b[0moutput = old_forward(*args, **kwargs)                                      \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m164 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94melse\u001b[0m:                                                                              \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m165 \u001b[2m│   │   │   \u001b[0moutput = old_forward(*args, **kwargs)                                          \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m166 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mreturn\u001b[0m module._hf_hook.post_forward(module, output)                                \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m167 \u001b[0m\u001b[2m│   \u001b[0m                                                                                       \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m168 \u001b[0m\u001b[2m│   \u001b[0mmodule.forward = new_forward                                                           \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/models/llama/\u001b[0m\u001b[1;33mmodeling_llama.py\u001b[0m:\u001b[94m569\u001b[0m in       \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[92mforward\u001b[0m                                                                                          \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m566 \u001b[0m\u001b[2m│   │   │   │   │   \u001b[0m                                                                       \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m567 \u001b[0m\u001b[2m│   │   │   │   │   \u001b[0m\u001b[94mreturn\u001b[0m custom_forward                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m568 \u001b[0m\u001b[2m│   │   │   │   \u001b[0m                                                                           \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m569 \u001b[2m│   │   │   │   \u001b[0mlayer_outputs = torch.utils.checkpoint.checkpoint(                         \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m570 \u001b[0m\u001b[2m│   │   │   │   │   \u001b[0mcreate_custom_forward(decoder_layer),                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m571 \u001b[0m\u001b[2m│   │   │   │   │   \u001b[0mhidden_states,                                                         \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m572 \u001b[0m\u001b[2m│   │   │   │   │   \u001b[0mattention_mask,                                                        \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/utils/\u001b[0m\u001b[1;33mcheckpoint.py\u001b[0m:\u001b[94m249\u001b[0m in \u001b[92mcheckpoint\u001b[0m              \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m246 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mraise\u001b[0m \u001b[96mValueError\u001b[0m(\u001b[33m\"\u001b[0m\u001b[33mUnexpected keyword arguments: \u001b[0m\u001b[33m\"\u001b[0m + \u001b[33m\"\u001b[0m\u001b[33m,\u001b[0m\u001b[33m\"\u001b[0m.join(arg \u001b[94mfor\u001b[0m arg \u001b[95min\u001b[0m kwar   \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m247 \u001b[0m\u001b[2m│   \u001b[0m                                                                                       \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m248 \u001b[0m\u001b[2m│   \u001b[0m\u001b[94mif\u001b[0m use_reentrant:                                                                      \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m249 \u001b[2m│   │   \u001b[0m\u001b[94mreturn\u001b[0m CheckpointFunction.apply(function, preserve, *args)                         \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m250 \u001b[0m\u001b[2m│   \u001b[0m\u001b[94melse\u001b[0m:                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m251 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mreturn\u001b[0m _checkpoint_without_reentrant(                                              \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m252 \u001b[0m\u001b[2m│   │   │   \u001b[0mfunction,                                                                      \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/autograd/\u001b[0m\u001b[1;33mfunction.py\u001b[0m:\u001b[94m506\u001b[0m in \u001b[92mapply\u001b[0m                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m503 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m torch._C._are_functorch_transforms_active():                                \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m504 \u001b[0m\u001b[2m│   │   │   \u001b[0m\u001b[2m# See NOTE: [functorch vjp and autograd interaction]\u001b[0m                           \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m505 \u001b[0m\u001b[2m│   │   │   \u001b[0margs = _functorch.utils.unwrap_dead_wrappers(args)                             \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m506 \u001b[2m│   │   │   \u001b[0m\u001b[94mreturn\u001b[0m \u001b[96msuper\u001b[0m().apply(*args, **kwargs)  \u001b[2m# type: ignore[misc]\u001b[0m                    \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m507 \u001b[0m\u001b[2m│   │   \u001b[0m                                                                                   \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m508 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mif\u001b[0m \u001b[96mcls\u001b[0m.setup_context == _SingleLevelFunction.setup_context:                        \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m509 \u001b[0m\u001b[2m│   │   │   \u001b[0m\u001b[94mraise\u001b[0m \u001b[96mRuntimeError\u001b[0m(                                                            \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/utils/\u001b[0m\u001b[1;33mcheckpoint.py\u001b[0m:\u001b[94m107\u001b[0m in \u001b[92mforward\u001b[0m                 \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m104 \u001b[0m\u001b[2m│   │   \u001b[0mctx.save_for_backward(*tensor_inputs)                                              \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m105 \u001b[0m\u001b[2m│   │   \u001b[0m                                                                                   \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m106 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mwith\u001b[0m torch.no_grad():                                                              \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m107 \u001b[2m│   │   │   \u001b[0moutputs = run_function(*args)                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m108 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mreturn\u001b[0m outputs                                                                     \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m109 \u001b[0m\u001b[2m│   \u001b[0m                                                                                       \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m110 \u001b[0m\u001b[2m│   \u001b[0m\u001b[1;95m@staticmethod\u001b[0m                                                                          \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/models/llama/\u001b[0m\u001b[1;33mmodeling_llama.py\u001b[0m:\u001b[94m565\u001b[0m in       \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[92mcustom_forward\u001b[0m                                                                                   \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m562 \u001b[0m\u001b[2m│   │   │   │   \u001b[0m\u001b[94mdef\u001b[0m \u001b[92mcreate_custom_forward\u001b[0m(module):                                         \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m563 \u001b[0m\u001b[2m│   │   │   │   │   \u001b[0m\u001b[94mdef\u001b[0m \u001b[92mcustom_forward\u001b[0m(*inputs):                                           \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m564 \u001b[0m\u001b[2m│   │   │   │   │   │   \u001b[0m\u001b[2m# None for past_key_value\u001b[0m                                          \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m565 \u001b[2m│   │   │   │   │   │   \u001b[0m\u001b[94mreturn\u001b[0m module(*inputs, output_attentions, \u001b[94mNone\u001b[0m)                    \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m566 \u001b[0m\u001b[2m│   │   │   │   │   \u001b[0m                                                                       \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m567 \u001b[0m\u001b[2m│   │   │   │   │   \u001b[0m\u001b[94mreturn\u001b[0m custom_forward                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m568 \u001b[0m                                                                                           \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/\u001b[0m\u001b[1;33mmodule.py\u001b[0m:\u001b[94m1501\u001b[0m in \u001b[92m_call_impl\u001b[0m            \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m1498 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m (\u001b[96mself\u001b[0m._backward_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._backward_pre_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._forward_hooks   \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m1499 \u001b[0m\u001b[2m│   │   │   │   \u001b[0m\u001b[95mor\u001b[0m _global_backward_pre_hooks \u001b[95mor\u001b[0m _global_backward_hooks                   \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m1500 \u001b[0m\u001b[2m│   │   │   │   \u001b[0m\u001b[95mor\u001b[0m _global_forward_hooks \u001b[95mor\u001b[0m _global_forward_pre_hooks):                   \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1501 \u001b[2m│   │   │   \u001b[0m\u001b[94mreturn\u001b[0m forward_call(*args, **kwargs)                                          \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m1502 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[2m# Do not call functions when jit is used\u001b[0m                                          \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m1503 \u001b[0m\u001b[2m│   │   \u001b[0mfull_backward_hooks, non_full_backward_hooks = [], []                             \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m1504 \u001b[0m\u001b[2m│   │   \u001b[0mbackward_pre_hooks = []                                                           \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/accelerate/\u001b[0m\u001b[1;33mhooks.py\u001b[0m:\u001b[94m165\u001b[0m in \u001b[92mnew_forward\u001b[0m                   \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m162 \u001b[0m\u001b[2m│   │   │   \u001b[0m\u001b[94mwith\u001b[0m torch.no_grad():                                                          \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m163 \u001b[0m\u001b[2m│   │   │   │   \u001b[0moutput = old_forward(*args, **kwargs)                                      \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m164 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94melse\u001b[0m:                                                                              \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m165 \u001b[2m│   │   │   \u001b[0moutput = old_forward(*args, **kwargs)                                          \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m166 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mreturn\u001b[0m module._hf_hook.post_forward(module, output)                                \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m167 \u001b[0m\u001b[2m│   \u001b[0m                                                                                       \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m168 \u001b[0m\u001b[2m│   \u001b[0mmodule.forward = new_forward                                                           \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/models/llama/\u001b[0m\u001b[1;33mmodeling_llama.py\u001b[0m:\u001b[94m292\u001b[0m in       \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[92mforward\u001b[0m                                                                                          \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m289 \u001b[0m\u001b[2m│   │   \u001b[0mhidden_states = \u001b[96mself\u001b[0m.input_layernorm(hidden_states)                                \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m290 \u001b[0m\u001b[2m│   │   \u001b[0m                                                                                   \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m291 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[2m# Self Attention\u001b[0m                                                                   \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m292 \u001b[2m│   │   \u001b[0mhidden_states, self_attn_weights, present_key_value = \u001b[96mself\u001b[0m.self_attn(              \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m293 \u001b[0m\u001b[2m│   │   │   \u001b[0mhidden_states=hidden_states,                                                   \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m294 \u001b[0m\u001b[2m│   │   │   \u001b[0mattention_mask=attention_mask,                                                 \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m295 \u001b[0m\u001b[2m│   │   │   \u001b[0mposition_ids=position_ids,                                                     \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/\u001b[0m\u001b[1;33mmodule.py\u001b[0m:\u001b[94m1501\u001b[0m in \u001b[92m_call_impl\u001b[0m            \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m1498 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m (\u001b[96mself\u001b[0m._backward_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._backward_pre_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._forward_hooks   \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m1499 \u001b[0m\u001b[2m│   │   │   │   \u001b[0m\u001b[95mor\u001b[0m _global_backward_pre_hooks \u001b[95mor\u001b[0m _global_backward_hooks                   \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m1500 \u001b[0m\u001b[2m│   │   │   │   \u001b[0m\u001b[95mor\u001b[0m _global_forward_hooks \u001b[95mor\u001b[0m _global_forward_pre_hooks):                   \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1501 \u001b[2m│   │   │   \u001b[0m\u001b[94mreturn\u001b[0m forward_call(*args, **kwargs)                                          \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m1502 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[2m# Do not call functions when jit is used\u001b[0m                                          \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m1503 \u001b[0m\u001b[2m│   │   \u001b[0mfull_backward_hooks, non_full_backward_hooks = [], []                             \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m1504 \u001b[0m\u001b[2m│   │   \u001b[0mbackward_pre_hooks = []                                                           \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/accelerate/\u001b[0m\u001b[1;33mhooks.py\u001b[0m:\u001b[94m165\u001b[0m in \u001b[92mnew_forward\u001b[0m                   \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m162 \u001b[0m\u001b[2m│   │   │   \u001b[0m\u001b[94mwith\u001b[0m torch.no_grad():                                                          \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m163 \u001b[0m\u001b[2m│   │   │   │   \u001b[0moutput = old_forward(*args, **kwargs)                                      \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m164 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94melse\u001b[0m:                                                                              \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m165 \u001b[2m│   │   │   \u001b[0moutput = old_forward(*args, **kwargs)                                          \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m166 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mreturn\u001b[0m module._hf_hook.post_forward(module, output)                                \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m167 \u001b[0m\u001b[2m│   \u001b[0m                                                                                       \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m168 \u001b[0m\u001b[2m│   \u001b[0mmodule.forward = new_forward                                                           \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/models/llama/\u001b[0m\u001b[1;33mmodeling_llama.py\u001b[0m:\u001b[94m194\u001b[0m in       \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[92mforward\u001b[0m                                                                                          \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m191 \u001b[0m\u001b[2m│   \u001b[0m) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:       \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m192 \u001b[0m\u001b[2m│   │   \u001b[0mbsz, q_len, _ = hidden_states.size()                                               \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m193 \u001b[0m\u001b[2m│   │   \u001b[0m                                                                                   \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m194 \u001b[2m│   │   \u001b[0mquery_states = \u001b[96mself\u001b[0m.q_proj(hidden_states).view(bsz, q_len, \u001b[96mself\u001b[0m.num_heads, \u001b[96mself\u001b[0m.   \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m195 \u001b[0m\u001b[2m│   │   \u001b[0mkey_states = \u001b[96mself\u001b[0m.k_proj(hidden_states).view(bsz, q_len, \u001b[96mself\u001b[0m.num_heads, \u001b[96mself\u001b[0m.he   \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m196 \u001b[0m\u001b[2m│   │   \u001b[0mvalue_states = \u001b[96mself\u001b[0m.v_proj(hidden_states).view(bsz, q_len, \u001b[96mself\u001b[0m.num_heads, \u001b[96mself\u001b[0m.   \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m197 \u001b[0m                                                                                           \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/\u001b[0m\u001b[1;33mmodule.py\u001b[0m:\u001b[94m1501\u001b[0m in \u001b[92m_call_impl\u001b[0m            \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m1498 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m (\u001b[96mself\u001b[0m._backward_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._backward_pre_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._forward_hooks   \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m1499 \u001b[0m\u001b[2m│   │   │   │   \u001b[0m\u001b[95mor\u001b[0m _global_backward_pre_hooks \u001b[95mor\u001b[0m _global_backward_hooks                   \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m1500 \u001b[0m\u001b[2m│   │   │   │   \u001b[0m\u001b[95mor\u001b[0m _global_forward_hooks \u001b[95mor\u001b[0m _global_forward_pre_hooks):                   \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1501 \u001b[2m│   │   │   \u001b[0m\u001b[94mreturn\u001b[0m forward_call(*args, **kwargs)                                          \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m1502 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[2m# Do not call functions when jit is used\u001b[0m                                          \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m1503 \u001b[0m\u001b[2m│   │   \u001b[0mfull_backward_hooks, non_full_backward_hooks = [], []                             \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m1504 \u001b[0m\u001b[2m│   │   \u001b[0mbackward_pre_hooks = []                                                           \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/peft/tuners/\u001b[0m\u001b[1;33mlora.py\u001b[0m:\u001b[94m709\u001b[0m in \u001b[92mforward\u001b[0m                       \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m706 \u001b[0m\u001b[2m│   │   │   \u001b[0m\u001b[96mself\u001b[0m.active_adapter = adapter_name                                             \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m707 \u001b[0m\u001b[2m│   │   \u001b[0m                                                                                   \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m708 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mdef\u001b[0m \u001b[92mforward\u001b[0m(\u001b[96mself\u001b[0m, x: torch.Tensor):                                                \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m709 \u001b[2m│   │   │   \u001b[0mresult = \u001b[96msuper\u001b[0m().forward(x)                                                    \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m710 \u001b[0m\u001b[2m│   │   │   \u001b[0m                                                                               \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m711 \u001b[0m\u001b[2m│   │   │   \u001b[0m\u001b[94mif\u001b[0m \u001b[96mself\u001b[0m.disable_adapters \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m.active_adapter \u001b[95mnot\u001b[0m \u001b[95min\u001b[0m \u001b[96mself\u001b[0m.lora_A.keys():     \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m712 \u001b[0m\u001b[2m│   │   │   │   \u001b[0m\u001b[94mreturn\u001b[0m result                                                              \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/bitsandbytes/nn/\u001b[0m\u001b[1;33mmodules.py\u001b[0m:\u001b[94m388\u001b[0m in \u001b[92mforward\u001b[0m                \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m385 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mif\u001b[0m \u001b[96mself\u001b[0m.bias \u001b[95mis\u001b[0m \u001b[95mnot\u001b[0m \u001b[94mNone\u001b[0m \u001b[95mand\u001b[0m \u001b[96mself\u001b[0m.bias.dtype != x.dtype:                           \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m386 \u001b[0m\u001b[2m│   │   │   \u001b[0m\u001b[96mself\u001b[0m.bias.data = \u001b[96mself\u001b[0m.bias.data.to(x.dtype)                                    \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m387 \u001b[0m\u001b[2m│   │   \u001b[0m                                                                                   \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m388 \u001b[2m│   │   \u001b[0mout = bnb.matmul(x, \u001b[96mself\u001b[0m.weight, bias=\u001b[96mself\u001b[0m.bias, state=\u001b[96mself\u001b[0m.state)                 \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m389 \u001b[0m\u001b[2m│   │   \u001b[0m                                                                                   \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m390 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m \u001b[96mself\u001b[0m.state.has_fp16_weights:                                                \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m391 \u001b[0m\u001b[2m│   │   │   \u001b[0m\u001b[94mif\u001b[0m \u001b[96mself\u001b[0m.state.CB \u001b[95mis\u001b[0m \u001b[95mnot\u001b[0m \u001b[94mNone\u001b[0m \u001b[95mand\u001b[0m \u001b[96mself\u001b[0m.state.CxB \u001b[95mis\u001b[0m \u001b[95mnot\u001b[0m \u001b[94mNone\u001b[0m:                   \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/bitsandbytes/autograd/\u001b[0m\u001b[1;33m_functions.py\u001b[0m:\u001b[94m559\u001b[0m in \u001b[92mmatmul\u001b[0m        \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m556 \u001b[0m\u001b[2m│   \u001b[0mstate = state \u001b[95mor\u001b[0m MatmulLtState()                                                       \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m557 \u001b[0m\u001b[2m│   \u001b[0m\u001b[94mif\u001b[0m threshold > \u001b[94m0.0\u001b[0m:                                                                    \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m558 \u001b[0m\u001b[2m│   │   \u001b[0mstate.threshold = threshold                                                        \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m559 \u001b[2m│   \u001b[0m\u001b[94mreturn\u001b[0m MatMul8bitLt.apply(A, B, out, bias, state)                                      \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m560 \u001b[0m                                                                                           \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m561 \u001b[0m                                                                                           \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m562 \u001b[0m\u001b[94mdef\u001b[0m \u001b[92mmatmul_4bit\u001b[0m(A: tensor, B: tensor, quant_state: List, out: tensor = \u001b[94mNone\u001b[0m, bias=\u001b[94mNone\u001b[0m):   \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/autograd/\u001b[0m\u001b[1;33mfunction.py\u001b[0m:\u001b[94m506\u001b[0m in \u001b[92mapply\u001b[0m                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m503 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m torch._C._are_functorch_transforms_active():                                \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m504 \u001b[0m\u001b[2m│   │   │   \u001b[0m\u001b[2m# See NOTE: [functorch vjp and autograd interaction]\u001b[0m                           \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m505 \u001b[0m\u001b[2m│   │   │   \u001b[0margs = _functorch.utils.unwrap_dead_wrappers(args)                             \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m506 \u001b[2m│   │   │   \u001b[0m\u001b[94mreturn\u001b[0m \u001b[96msuper\u001b[0m().apply(*args, **kwargs)  \u001b[2m# type: ignore[misc]\u001b[0m                    \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m507 \u001b[0m\u001b[2m│   │   \u001b[0m                                                                                   \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m508 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mif\u001b[0m \u001b[96mcls\u001b[0m.setup_context == _SingleLevelFunction.setup_context:                        \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m509 \u001b[0m\u001b[2m│   │   │   \u001b[0m\u001b[94mraise\u001b[0m \u001b[96mRuntimeError\u001b[0m(                                                            \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/bitsandbytes/autograd/\u001b[0m\u001b[1;33m_functions.py\u001b[0m:\u001b[94m323\u001b[0m in \u001b[92mforward\u001b[0m       \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m320 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[2m# 1. Quantize A\u001b[0m                                                                    \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m321 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mif\u001b[0m \u001b[96mlen\u001b[0m(A.shape) == \u001b[94m3\u001b[0m:                                                              \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m322 \u001b[0m\u001b[2m│   │   │   \u001b[0mA = A.view(-\u001b[94m1\u001b[0m, A.shape[-\u001b[94m1\u001b[0m]).contiguous()                                       \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m323 \u001b[2m│   │   \u001b[0mCA, CAt, SCA, SCAt, coo_tensorA = F.double_quant(A.to(torch.float16), threshold=   \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m324 \u001b[0m\u001b[2m│   │   \u001b[0m                                                                                   \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m325 \u001b[0m\u001b[2m│   │   \u001b[0m\u001b[94mif\u001b[0m state.threshold > \u001b[94m0.0\u001b[0m \u001b[95mand\u001b[0m coo_tensorA \u001b[95mis\u001b[0m \u001b[95mnot\u001b[0m \u001b[94mNone\u001b[0m:                              \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m326 \u001b[0m\u001b[2m│   │   │   \u001b[0m\u001b[94mif\u001b[0m state.has_fp16_weights:                                                     \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/bitsandbytes/\u001b[0m\u001b[1;33mfunctional.py\u001b[0m:\u001b[94m2029\u001b[0m in \u001b[92mdouble_quant\u001b[0m          \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m2026 \u001b[0m\u001b[2m│   │   │   │   \u001b[0mct.c_int32(rows),                                                         \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m2027 \u001b[0m\u001b[2m│   │   │   │   \u001b[0mct.c_int32(cols),                                                         \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m2028 \u001b[0m\u001b[2m│   │   │   \u001b[0m)                                                                             \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m2029 \u001b[2m│   │   │   \u001b[0mval, idx = torch.sort(coo_tensor.rowidx)                                      \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m2030 \u001b[0m\u001b[2m│   │   │   \u001b[0mcoo_tensor.rowidx = val                                                       \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m2031 \u001b[0m\u001b[2m│   │   │   \u001b[0mcoo_tensor.colidx = coo_tensor.colidx[idx]                                    \u001b[31m│\u001b[0m\n",
+              "\u001b[31m│\u001b[0m   \u001b[2m2032 \u001b[0m\u001b[2m│   │   │   \u001b[0mcoo_tensor.values = coo_tensor.values[idx]                                    \u001b[31m│\u001b[0m\n",
+              "\u001b[31m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n",
+              "\u001b[1;91mKeyboardInterrupt\u001b[0m\n"
+            ],
+            "text/html": [
+              "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800000; text-decoration-color: #800000\">╭─────────────────────────────── </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">Traceback </span><span style=\"color: #bf7f7f; text-decoration-color: #bf7f7f; font-weight: bold\">(most recent call last)</span><span style=\"color: #800000; text-decoration-color: #800000\"> ────────────────────────────────╮</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">&lt;cell line: 27&gt;</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">27</span>                                                                            <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/transformers/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">trainer.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">1696</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">train</span>                    <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1693 │   │   </span>inner_training_loop = find_executable_batch_size(                                 <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1694 │   │   │   </span><span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._inner_training_loop, <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._train_batch_size, args.auto_find_batch_size  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1695 │   │   </span>)                                                                                 <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>1696 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> inner_training_loop(                                                       <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1697 │   │   │   </span>args=args,                                                                    <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1698 │   │   │   </span>resume_from_checkpoint=resume_from_checkpoint,                                <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1699 │   │   │   </span>trial=trial,                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/transformers/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">trainer.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">1973</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">_inner_training_loop</span>     <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1970 │   │   │   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">with</span> model.no_sync():                                                 <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1971 │   │   │   │   │   │   </span>tr_loss_step = <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.training_step(model, inputs)                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1972 │   │   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">else</span>:                                                                     <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>1973 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   │   │   │   </span>tr_loss_step = <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.training_step(model, inputs)                      <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1974 │   │   │   │   </span>                                                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1975 │   │   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> (                                                                      <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1976 │   │   │   │   │   </span>args.logging_nan_inf_filter                                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/transformers/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">trainer.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">2787</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">training_step</span>            <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">2784 │   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> loss_mb.reduce_mean().detach().to(<span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.args.device)                    <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">2785 │   │   </span>                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">2786 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">with</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.compute_loss_context_manager():                                         <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">�� </span>2787 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   │   </span>loss = <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.compute_loss(model, inputs)                                       <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">2788 │   │   </span>                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">2789 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.args.n_gpu &gt; <span style=\"color: #0000ff; text-decoration-color: #0000ff\">1</span>:                                                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">2790 │   │   │   </span>loss = loss.mean()  <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"># mean() to average on multi-gpu parallel training</span>        <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/transformers/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">trainer.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">2819</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">compute_loss</span>             <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">2816 │   │   │   </span>labels = inputs.pop(<span style=\"color: #808000; text-decoration-color: #808000\">\"labels\"</span>)                                                 <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">2817 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">else</span>:                                                                             <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">2818 │   │   │   </span>labels = <span style=\"color: #0000ff; text-decoration-color: #0000ff\">None</span>                                                                 <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>2819 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   </span>outputs = model(**inputs)                                                         <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">2820 │   │   # Save past state if it exists</span>                                                    <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">2821 │   │   # TODO: this needs to be fixed and made cleaner later.</span>                            <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">2822 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.args.past_index &gt;= <span style=\"color: #0000ff; text-decoration-color: #0000ff\">0</span>:                                                     <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/torch/nn/modules/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">module.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">1501</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">_call_impl</span>            <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1498 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">not</span> (<span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._backward_hooks <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._backward_pre_hooks <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._forward_hooks   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1499 │   │   │   │   </span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> _global_backward_pre_hooks <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> _global_backward_hooks                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1500 │   │   │   │   </span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> _global_forward_hooks <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> _global_forward_pre_hooks):                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>1501 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> forward_call(*args, **kwargs)                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1502 │   │   # Do not call functions when jit is used</span>                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1503 │   │   </span>full_backward_hooks, non_full_backward_hooks = [], []                             <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1504 │   │   </span>backward_pre_hooks = []                                                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/peft/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">peft_model.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">686</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">forward</span>                        <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 683 │   </span>):                                                                                    <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 684 │   │   </span>peft_config = <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.active_peft_config                                             <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 685 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">not</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">isinstance</span>(peft_config, PromptLearningConfig):                             <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span> 686 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.base_model(                                                       <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 687 │   │   │   │   </span>input_ids=input_ids,                                                      <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 688 │   │   │   │   </span>attention_mask=attention_mask,                                            <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 689 │   │   │   │   </span>inputs_embeds=inputs_embeds,                                              <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/torch/nn/modules/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">module.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">1501</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">_call_impl</span>            <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1498 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">not</span> (<span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._backward_hooks <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._backward_pre_hooks <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._forward_hooks   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1499 │   │   │   │   </span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> _global_backward_pre_hooks <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> _global_backward_hooks                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1500 │   │   │   │   </span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> _global_forward_hooks <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> _global_forward_pre_hooks):                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>1501 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> forward_call(*args, **kwargs)                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1502 │   │   # Do not call functions when jit is used</span>                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1503 │   │   </span>full_backward_hooks, non_full_backward_hooks = [], []                             <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1504 │   │   </span>backward_pre_hooks = []                                                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/accelerate/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">hooks.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">165</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">new_forward</span>                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">162 │   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">with</span> torch.no_grad():                                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">163 │   │   │   │   </span>output = old_forward(*args, **kwargs)                                      <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">164 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">else</span>:                                                                              <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>165 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   │   </span>output = old_forward(*args, **kwargs)                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">166 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> module._hf_hook.post_forward(module, output)                                <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">167 │   </span>                                                                                       <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">168 │   </span>module.forward = new_forward                                                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/transformers/models/llama/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">modeling_llama.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">687</span> in       <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #00ff00; text-decoration-color: #00ff00\">forward</span>                                                                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">684 │   │   </span>return_dict = return_dict <span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> return_dict <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">is</span> <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">not</span> <span style=\"color: #0000ff; text-decoration-color: #0000ff\">None</span> <span style=\"color: #0000ff; text-decoration-color: #0000ff\">else</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.config.use_return   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">685 │   │   </span>                                                                                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">686 │   │   # decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)</span>    <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>687 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   </span>outputs = <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.model(                                                              <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">688 │   │   │   </span>input_ids=input_ids,                                                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">689 │   │   │   </span>attention_mask=attention_mask,                                                 <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">690 │   │   │   </span>position_ids=position_ids,                                                     <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/torch/nn/modules/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">module.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">1501</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">_call_impl</span>            <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1498 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">not</span> (<span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._backward_hooks <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._backward_pre_hooks <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._forward_hooks   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1499 │   │   │   │   </span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> _global_backward_pre_hooks <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> _global_backward_hooks                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1500 │   │   │   │   </span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> _global_forward_hooks <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> _global_forward_pre_hooks):                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>1501 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> forward_call(*args, **kwargs)                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1502 │   │   # Do not call functions when jit is used</span>                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1503 │   │   </span>full_backward_hooks, non_full_backward_hooks = [], []                             <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1504 │   │   </span>backward_pre_hooks = []                                                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/accelerate/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">hooks.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">165</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">new_forward</span>                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">162 │   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">with</span> torch.no_grad():                                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">163 │   │   │   │   </span>output = old_forward(*args, **kwargs)                                      <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">164 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">else</span>:                                                                              <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>165 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   │   </span>output = old_forward(*args, **kwargs)                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">166 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> module._hf_hook.post_forward(module, output)                                <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">167 │   </span>                                                                                       <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">168 │   </span>module.forward = new_forward                                                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/transformers/models/llama/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">modeling_llama.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">569</span> in       <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #00ff00; text-decoration-color: #00ff00\">forward</span>                                                                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">566 │   │   │   │   │   </span>                                                                       <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">567 │   │   │   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> custom_forward                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">568 │   │   │   │   </span>                                                                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>569 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   │   │   </span>layer_outputs = torch.utils.checkpoint.checkpoint(                         <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">570 │   │   │   │   │   </span>create_custom_forward(decoder_layer),                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">571 │   │   │   │   │   </span>hidden_states,                                                         <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">572 │   │   │   │   │   </span>attention_mask,                                                        <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/torch/utils/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">checkpoint.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">249</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">checkpoint</span>              <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">246 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">raise</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">ValueError</span>(<span style=\"color: #808000; text-decoration-color: #808000\">\"Unexpected keyword arguments: \"</span> + <span style=\"color: #808000; text-decoration-color: #808000\">\",\"</span>.join(arg <span style=\"color: #0000ff; text-decoration-color: #0000ff\">for</span> arg <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">in</span> kwar   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">247 │   </span>                                                                                       <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">248 │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> use_reentrant:                                                                      <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>249 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> CheckpointFunction.apply(function, preserve, *args)                         <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">250 │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">else</span>:                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">251 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> _checkpoint_without_reentrant(                                              <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">252 │   │   │   </span>function,                                                                      <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/torch/autograd/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">function.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">506</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">apply</span>                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">503 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">not</span> torch._C._are_functorch_transforms_active():                                <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">504 │   │   │   # See NOTE: [functorch vjp and autograd interaction]</span>                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">505 │   │   │   </span>args = _functorch.utils.unwrap_dead_wrappers(args)                             <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>506 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">super</span>().apply(*args, **kwargs)  <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"># type: ignore[misc]</span>                    <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">507 │   │   </span>                                                                                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">508 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">cls</span>.setup_context == _SingleLevelFunction.setup_context:                        <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">509 │   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">raise</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">RuntimeError</span>(                                                            <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/torch/utils/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">checkpoint.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">107</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">forward</span>                 <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">104 │   │   </span>ctx.save_for_backward(*tensor_inputs)                                              <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">105 │   │   </span>                                                                                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">106 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">with</span> torch.no_grad():                                                              <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>107 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   │   </span>outputs = run_function(*args)                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">108 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> outputs                                                                     <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">109 │   </span>                                                                                       <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">110 │   </span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff; font-weight: bold\">@staticmethod</span>                                                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/transformers/models/llama/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">modeling_llama.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">565</span> in       <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #00ff00; text-decoration-color: #00ff00\">custom_forward</span>                                                                                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">562 │   │   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">def</span> <span style=\"color: #00ff00; text-decoration-color: #00ff00\">create_custom_forward</span>(module):                                         <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">563 │   │   │   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">def</span> <span style=\"color: #00ff00; text-decoration-color: #00ff00\">custom_forward</span>(*inputs):                                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">564 │   │   │   │   │   │   # None for past_key_value</span>                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>565 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   │   │   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> module(*inputs, output_attentions, <span style=\"color: #0000ff; text-decoration-color: #0000ff\">None</span>)                    <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">566 │   │   │   │   │   </span>                                                                       <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">567 │   │   │   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> custom_forward                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">568 </span>                                                                                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/torch/nn/modules/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">module.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">1501</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">_call_impl</span>            <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1498 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">not</span> (<span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._backward_hooks <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._backward_pre_hooks <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._forward_hooks   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1499 │   │   │   │   </span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> _global_backward_pre_hooks <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> _global_backward_hooks                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1500 │   │   │   │   </span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> _global_forward_hooks <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> _global_forward_pre_hooks):                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>1501 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> forward_call(*args, **kwargs)                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1502 │   │   # Do not call functions when jit is used</span>                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1503 │   │   </span>full_backward_hooks, non_full_backward_hooks = [], []                             <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1504 │   │   </span>backward_pre_hooks = []                                                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/accelerate/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">hooks.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">165</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">new_forward</span>                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">162 │   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">with</span> torch.no_grad():                                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">163 │   │   │   │   </span>output = old_forward(*args, **kwargs)                                      <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">164 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">else</span>:                                                                              <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>165 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   │   </span>output = old_forward(*args, **kwargs)                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">166 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> module._hf_hook.post_forward(module, output)                                <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">167 │   </span>                                                                                       <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">168 │   </span>module.forward = new_forward                                                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/transformers/models/llama/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">modeling_llama.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">292</span> in       <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #00ff00; text-decoration-color: #00ff00\">forward</span>                                                                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">289 │   │   </span>hidden_states = <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.input_layernorm(hidden_states)                                <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">290 │   │   </span>                                                                                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">291 │   │   # Self Attention</span>                                                                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>292 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   </span>hidden_states, self_attn_weights, present_key_value = <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.self_attn(              <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">293 │   │   │   </span>hidden_states=hidden_states,                                                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">294 │   │   │   </span>attention_mask=attention_mask,                                                 <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">295 │   │   │   </span>position_ids=position_ids,                                                     <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/torch/nn/modules/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">module.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">1501</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">_call_impl</span>            <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1498 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">not</span> (<span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._backward_hooks <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._backward_pre_hooks <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._forward_hooks   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1499 │   │   │   │   </span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> _global_backward_pre_hooks <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> _global_backward_hooks                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1500 │   │   │   │   </span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> _global_forward_hooks <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> _global_forward_pre_hooks):                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>1501 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> forward_call(*args, **kwargs)                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1502 │   │   # Do not call functions when jit is used</span>                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1503 │   │   </span>full_backward_hooks, non_full_backward_hooks = [], []                             <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1504 │   │   </span>backward_pre_hooks = []                                                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/accelerate/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">hooks.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">165</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">new_forward</span>                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">162 │   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">with</span> torch.no_grad():                                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">163 │   │   │   │   </span>output = old_forward(*args, **kwargs)                                      <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">164 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">else</span>:                                                                              <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>165 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   │   </span>output = old_forward(*args, **kwargs)                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">166 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> module._hf_hook.post_forward(module, output)                                <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">167 │   </span>                                                                                       <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">168 │   </span>module.forward = new_forward                                                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/transformers/models/llama/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">modeling_llama.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">194</span> in       <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #00ff00; text-decoration-color: #00ff00\">forward</span>                                                                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">191 │   </span>) -&gt; Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:       <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">192 │   │   </span>bsz, q_len, _ = hidden_states.size()                                               <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">193 │   │   </span>                                                                                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>194 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   </span>query_states = <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.q_proj(hidden_states).view(bsz, q_len, <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.num_heads, <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">195 │   │   </span>key_states = <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.k_proj(hidden_states).view(bsz, q_len, <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.num_heads, <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.he   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">196 │   │   </span>value_states = <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.v_proj(hidden_states).view(bsz, q_len, <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.num_heads, <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">197 </span>                                                                                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/torch/nn/modules/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">module.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">1501</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">_call_impl</span>            <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1498 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">not</span> (<span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._backward_hooks <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._backward_pre_hooks <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._forward_hooks   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1499 │   │   │   │   </span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> _global_backward_pre_hooks <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> _global_backward_hooks                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1500 │   │   │   │   </span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> _global_forward_hooks <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> _global_forward_pre_hooks):                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>1501 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> forward_call(*args, **kwargs)                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1502 │   │   # Do not call functions when jit is used</span>                                          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1503 │   │   </span>full_backward_hooks, non_full_backward_hooks = [], []                             <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">1504 │   │   </span>backward_pre_hooks = []                                                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/peft/tuners/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">lora.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">709</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">forward</span>                       <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">706 │   │   │   </span><span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.active_adapter = adapter_name                                             <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">707 │   │   </span>                                                                                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">708 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">def</span> <span style=\"color: #00ff00; text-decoration-color: #00ff00\">forward</span>(<span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>, x: torch.Tensor):                                                <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>709 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   │   </span>result = <span style=\"color: #00ffff; text-decoration-color: #00ffff\">super</span>().forward(x)                                                    <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">710 │   │   │   </span>                                                                               <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">711 │   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.disable_adapters <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.active_adapter <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">not</span> <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">in</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.lora_A.keys():     <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">712 │   │   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> result                                                              <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/bitsandbytes/nn/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">modules.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">388</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">forward</span>                <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">385 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.bias <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">is</span> <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">not</span> <span style=\"color: #0000ff; text-decoration-color: #0000ff\">None</span> <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">and</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.bias.dtype != x.dtype:                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">386 │   │   │   </span><span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.bias.data = <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.bias.data.to(x.dtype)                                    <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">387 │   │   </span>                                                                                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>388 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   </span>out = bnb.matmul(x, <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.weight, bias=<span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.bias, state=<span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.state)                 <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">389 │   │   </span>                                                                                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">390 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">not</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.state.has_fp16_weights:                                                <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">391 │   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.state.CB <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">is</span> <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">not</span> <span style=\"color: #0000ff; text-decoration-color: #0000ff\">None</span> <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">and</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>.state.CxB <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">is</span> <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">not</span> <span style=\"color: #0000ff; text-decoration-color: #0000ff\">None</span>:                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/bitsandbytes/autograd/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">_functions.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">559</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">matmul</span>        <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">556 │   </span>state = state <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">or</span> MatmulLtState()                                                       <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">557 │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> threshold &gt; <span style=\"color: #0000ff; text-decoration-color: #0000ff\">0.0</span>:                                                                    <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">558 │   │   </span>state.threshold = threshold                                                        <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>559 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> MatMul8bitLt.apply(A, B, out, bias, state)                                      <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">560 </span>                                                                                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">561 </span>                                                                                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">562 </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">def</span> <span style=\"color: #00ff00; text-decoration-color: #00ff00\">matmul_4bit</span>(A: tensor, B: tensor, quant_state: List, out: tensor = <span style=\"color: #0000ff; text-decoration-color: #0000ff\">None</span>, bias=<span style=\"color: #0000ff; text-decoration-color: #0000ff\">None</span>):   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/torch/autograd/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">function.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">506</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">apply</span>                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">503 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">not</span> torch._C._are_functorch_transforms_active():                                <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">504 │   │   │   # See NOTE: [functorch vjp and autograd interaction]</span>                           <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">505 │   │   │   </span>args = _functorch.utils.unwrap_dead_wrappers(args)                             <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>506 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">return</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">super</span>().apply(*args, **kwargs)  <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"># type: ignore[misc]</span>                    <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">507 │   │   </span>                                                                                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">508 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">cls</span>.setup_context == _SingleLevelFunction.setup_context:                        <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">509 │   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">raise</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">RuntimeError</span>(                                                            <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/bitsandbytes/autograd/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">_functions.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">323</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">forward</span>       <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">320 │   │   # 1. Quantize A</span>                                                                    <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">321 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">len</span>(A.shape) == <span style=\"color: #0000ff; text-decoration-color: #0000ff\">3</span>:                                                              <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">322 │   │   │   </span>A = A.view(-<span style=\"color: #0000ff; text-decoration-color: #0000ff\">1</span>, A.shape[-<span style=\"color: #0000ff; text-decoration-color: #0000ff\">1</span>]).contiguous()                                       <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>323 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   </span>CA, CAt, SCA, SCAt, coo_tensorA = F.double_quant(A.to(torch.float16), threshold=   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">324 │   │   </span>                                                                                   <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">325 │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> state.threshold &gt; <span style=\"color: #0000ff; text-decoration-color: #0000ff\">0.0</span> <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">and</span> coo_tensorA <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">is</span> <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">not</span> <span style=\"color: #0000ff; text-decoration-color: #0000ff\">None</span>:                              <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">326 │   │   │   </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> state.has_fp16_weights:                                                     <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/usr/local/lib/python3.10/dist-packages/bitsandbytes/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">functional.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">2029</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">double_quant</span>          <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>                                                                                                  <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">2026 │   │   │   │   </span>ct.c_int32(rows),                                                         <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">2027 │   │   │   │   </span>ct.c_int32(cols),                                                         <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">2028 │   │   │   </span>)                                                                             <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>2029 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│   │   │   </span>val, idx = torch.sort(coo_tensor.rowidx)                                      <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">2030 │   │   │   </span>coo_tensor.rowidx = val                                                       <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">2031 │   │   │   </span>coo_tensor.colidx = coo_tensor.colidx[idx]                                    <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">│</span>   <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">2032 │   │   │   </span>coo_tensor.values = coo_tensor.values[idx]                                    <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
+              "<span style=\"color: #800000; text-decoration-color: #800000\">╰──────────────────────────────────────────────────────────────────────────────────────────────────╯</span>\n",
+              "<span style=\"color: #ff0000; text-decoration-color: #ff0000; font-weight: bold\">KeyboardInterrupt</span>\n",
+              "</pre>\n"
+            ]
+          },
+          "metadata": {}
+        }
+      ]
+    }
+  ]
+}