{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "gpuType": "T4"
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "accelerator": "GPU",
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "114bfeb2ab21477397ff356a8d50d678": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_6c2a683959b74f06b71a21f62dc9d732",
              "IPY_MODEL_bfd850f32c4748cb9fc21e3f67a1e079",
              "IPY_MODEL_5405a44a0740459c9b2dc317f7e32af2"
            ],
            "layout": "IPY_MODEL_5ced55e2613f4ca09c576df4936d444d"
          }
        },
        "6c2a683959b74f06b71a21f62dc9d732": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_3d205afcaa37416dbafa166e7f6bb6ba",
            "placeholder": "​",
            "style": "IPY_MODEL_25969d182a7a4b569625efbb6e619bab",
            "value": "Downloading (…)okenizer_config.json: 100%"
          }
        },
        "bfd850f32c4748cb9fc21e3f67a1e079": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_29c82e0054084c54bb58af9c3518017e",
            "max": 234,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_f969ec4a051d4a5ba29d285ed3f7f8f2",
            "value": 234
          }
        },
        "5405a44a0740459c9b2dc317f7e32af2": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_c3f0dd842604416bb3fdfb40d79d57c1",
            "placeholder": "​",
            "style": "IPY_MODEL_85d7256713914254b5ba39ef2eee37bb",
            "value": " 234/234 [00:00&lt;00:00, 8.59kB/s]"
          }
        },
        "5ced55e2613f4ca09c576df4936d444d": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "3d205afcaa37416dbafa166e7f6bb6ba": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "25969d182a7a4b569625efbb6e619bab": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "29c82e0054084c54bb58af9c3518017e": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "f969ec4a051d4a5ba29d285ed3f7f8f2": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "c3f0dd842604416bb3fdfb40d79d57c1": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "85d7256713914254b5ba39ef2eee37bb": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "9826e15254ca413eb37dc7f8837f6827": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_8d9500746f61474e81bcf51beaba54e1",
              "IPY_MODEL_41b89216ec9544af91b8b7f7e2f4664e",
              "IPY_MODEL_1538103aa26d41459b67843f917c5a1a"
            ],
            "layout": "IPY_MODEL_48bd08d79550483f84b241127e31878c"
          }
        },
        "8d9500746f61474e81bcf51beaba54e1": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_b4f2ddbbf9de4898a7830c79ca2e20d7",
            "placeholder": "​",
            "style": "IPY_MODEL_c9ddb75f29af47b38f79b09792116673",
            "value": "Downloading (…)olve/main/vocab.json: 100%"
          }
        },
        "41b89216ec9544af91b8b7f7e2f4664e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_6f3eba08a72348df89642e7eee1fec30",
            "max": 1042301,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_4ae0e50ecd7f424dac6c9c2c5aa62696",
            "value": 1042301
          }
        },
        "1538103aa26d41459b67843f917c5a1a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_a71e6379e60c46b5915f0ecd097a2a33",
            "placeholder": "​",
            "style": "IPY_MODEL_0a780d58219d46978fad719257430d5f",
            "value": " 1.04M/1.04M [00:00&lt;00:00, 3.15MB/s]"
          }
        },
        "48bd08d79550483f84b241127e31878c": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "b4f2ddbbf9de4898a7830c79ca2e20d7": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "c9ddb75f29af47b38f79b09792116673": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "6f3eba08a72348df89642e7eee1fec30": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "4ae0e50ecd7f424dac6c9c2c5aa62696": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "a71e6379e60c46b5915f0ecd097a2a33": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "0a780d58219d46978fad719257430d5f": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "e685a0136ffa4280b0a49263c4f842d9": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_845a84b8f7344a468e0b0b72f0708378",
              "IPY_MODEL_a56823a90ee64b8fb87b2d2ba8857e53",
              "IPY_MODEL_b6698a0e0ca2409daf771b0d0a419e3a"
            ],
            "layout": "IPY_MODEL_c8ac228076fa4fb88ff37e2f3c34be93"
          }
        },
        "845a84b8f7344a468e0b0b72f0708378": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_f16775e4439a4c4f886c4c4027d30832",
            "placeholder": "​",
            "style": "IPY_MODEL_3b40ee2e5b60499e81f8c79a6383b2ec",
            "value": "Downloading (…)olve/main/merges.txt: 100%"
          }
        },
        "a56823a90ee64b8fb87b2d2ba8857e53": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_9a1f2210fd1e47d3ba66cf0a99f937b8",
            "max": 456318,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_fc2aa788745848c8b363c44e7d07116b",
            "value": 456318
          }
        },
        "b6698a0e0ca2409daf771b0d0a419e3a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_d9ea0947db244309b8effbfe53267042",
            "placeholder": "​",
            "style": "IPY_MODEL_f52cccc762dc4c419671b2aade6a3506",
            "value": " 456k/456k [00:00&lt;00:00, 2.84MB/s]"
          }
        },
        "c8ac228076fa4fb88ff37e2f3c34be93": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "f16775e4439a4c4f886c4c4027d30832": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "3b40ee2e5b60499e81f8c79a6383b2ec": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "9a1f2210fd1e47d3ba66cf0a99f937b8": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "fc2aa788745848c8b363c44e7d07116b": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "d9ea0947db244309b8effbfe53267042": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "f52cccc762dc4c419671b2aade6a3506": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "9c7c0a7882964d8ca8031ac18af93253": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_f5f57b4e23174d0fa8142dcb57207720",
              "IPY_MODEL_50f474df9ab147f7ba094c1cdb2ba69e",
              "IPY_MODEL_35c204e247044a11a9ee871cfbcbe430"
            ],
            "layout": "IPY_MODEL_8596f49e4d594ae4a0e848608c7d122f"
          }
        },
        "f5f57b4e23174d0fa8142dcb57207720": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_874be31d42ce487697133b07065f815d",
            "placeholder": "​",
            "style": "IPY_MODEL_41d87a1c6a6a4d32ac20f35f779207d5",
            "value": "Downloading (…)cial_tokens_map.json: 100%"
          }
        },
        "50f474df9ab147f7ba094c1cdb2ba69e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_b7ab4be64f8b487893997a5d57eb9f09",
            "max": 99,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_8620138609a24a3aae22da774fb32bdb",
            "value": 99
          }
        },
        "35c204e247044a11a9ee871cfbcbe430": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_a76b7ee703ed4ed08a1e0e1a708aa117",
            "placeholder": "​",
            "style": "IPY_MODEL_44aa877b3a844922a1ef50809713fe32",
            "value": " 99.0/99.0 [00:00&lt;00:00, 2.91kB/s]"
          }
        },
        "8596f49e4d594ae4a0e848608c7d122f": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "874be31d42ce487697133b07065f815d": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "41d87a1c6a6a4d32ac20f35f779207d5": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "b7ab4be64f8b487893997a5d57eb9f09": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "8620138609a24a3aae22da774fb32bdb": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "a76b7ee703ed4ed08a1e0e1a708aa117": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "44aa877b3a844922a1ef50809713fe32": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "21f3adc7a34149d88b0e67b537eec488": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_61c7dc85009b4ab0968b8222a49297a1",
              "IPY_MODEL_194d14e6d61f47a28f7615f0c0071831",
              "IPY_MODEL_1e5eea17dab34a18a3e8fafe6988ad90"
            ],
            "layout": "IPY_MODEL_630b9cb70bed4900b97cfa3c476d6ca0"
          }
        },
        "61c7dc85009b4ab0968b8222a49297a1": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_7d0e80dbc0dc4431ba3eacc2ed719e2e",
            "placeholder": "​",
            "style": "IPY_MODEL_46561fc96ed645e1bfc4039a222b2f66",
            "value": "Downloading (…)lve/main/config.json: 100%"
          }
        },
        "194d14e6d61f47a28f7615f0c0071831": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_162e24639a1d462eaa67e8e8040ca879",
            "max": 1242,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_9ab05a9fc9134fe19ef600b8cde42b78",
            "value": 1242
          }
        },
        "1e5eea17dab34a18a3e8fafe6988ad90": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_b80761f44a4e4c889e3acfa06852c064",
            "placeholder": "​",
            "style": "IPY_MODEL_713e206a8c894bcf965cb96a97f30be7",
            "value": " 1.24k/1.24k [00:00&lt;00:00, 35.4kB/s]"
          }
        },
        "630b9cb70bed4900b97cfa3c476d6ca0": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "7d0e80dbc0dc4431ba3eacc2ed719e2e": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "46561fc96ed645e1bfc4039a222b2f66": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "162e24639a1d462eaa67e8e8040ca879": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "9ab05a9fc9134fe19ef600b8cde42b78": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "b80761f44a4e4c889e3acfa06852c064": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "713e206a8c894bcf965cb96a97f30be7": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "85a1329fbf9e44c8b708a1ad868691d7": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_9be2d523ce7e4c2daeed4f4846f16cff",
              "IPY_MODEL_3321f3d71e684ca4afa48a1e0b889ebd",
              "IPY_MODEL_adb1ff19806c4d9abfef424270408759"
            ],
            "layout": "IPY_MODEL_d7a5976a0b6b4a62913bdf148d06a6bf"
          }
        },
        "9be2d523ce7e4c2daeed4f4846f16cff": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_efed82b2f6f84516989d07f9515a102d",
            "placeholder": "​",
            "style": "IPY_MODEL_4f6b53cc236b448093d23240bd28a6d8",
            "value": "Downloading (…)onfiguration_btlm.py: 100%"
          }
        },
        "3321f3d71e684ca4afa48a1e0b889ebd": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_b7821089bfe94594982736d3005b82e9",
            "max": 7584,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_d6f016012139482b9ffc48407a0e7cb1",
            "value": 7584
          }
        },
        "adb1ff19806c4d9abfef424270408759": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_eee7b8b660f74f0e9e0634a17b29ecb1",
            "placeholder": "​",
            "style": "IPY_MODEL_6296781e8cc94316a44c5a0a0a77f46c",
            "value": " 7.58k/7.58k [00:00&lt;00:00, 221kB/s]"
          }
        },
        "d7a5976a0b6b4a62913bdf148d06a6bf": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "efed82b2f6f84516989d07f9515a102d": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "4f6b53cc236b448093d23240bd28a6d8": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "b7821089bfe94594982736d3005b82e9": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "d6f016012139482b9ffc48407a0e7cb1": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "eee7b8b660f74f0e9e0634a17b29ecb1": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "6296781e8cc94316a44c5a0a0a77f46c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "0c4007f367744ccf80f3c9af09af53de": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_87a60648650a41fa9b3573be98e4adad",
              "IPY_MODEL_0cbd3f72373544b8a946c70627e515f9",
              "IPY_MODEL_d0f8fa0a331b4da98c0b55311ea073a0"
            ],
            "layout": "IPY_MODEL_a312c5e9461d470ca8c3cd2184a7ee09"
          }
        },
        "87a60648650a41fa9b3573be98e4adad": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_8980f61b2d4b4164a21b596ce515f6e3",
            "placeholder": "​",
            "style": "IPY_MODEL_d58f9dcf2c7b4fa887702b231ba0ddd1",
            "value": "Downloading (…)ain/modeling_btlm.py: 100%"
          }
        },
        "0cbd3f72373544b8a946c70627e515f9": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_36cafb73c9fd4bb486853771b146fb44",
            "max": 71508,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_a31d4ba8182a41f7a164a87e0492bfe2",
            "value": 71508
          }
        },
        "d0f8fa0a331b4da98c0b55311ea073a0": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_48584e07c5074502acc46680e17b6dd8",
            "placeholder": "​",
            "style": "IPY_MODEL_7731d185d2d0401c9ce8dd045ffc187a",
            "value": " 71.5k/71.5k [00:00&lt;00:00, 935kB/s]"
          }
        },
        "a312c5e9461d470ca8c3cd2184a7ee09": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "8980f61b2d4b4164a21b596ce515f6e3": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "d58f9dcf2c7b4fa887702b231ba0ddd1": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "36cafb73c9fd4bb486853771b146fb44": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "a31d4ba8182a41f7a164a87e0492bfe2": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "48584e07c5074502acc46680e17b6dd8": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "7731d185d2d0401c9ce8dd045ffc187a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "b5f78d5cabff40ca8749376141ea8d23": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_8e4700723e894b7380862d08a34afe9d",
              "IPY_MODEL_3b1d6cbf8b2b4332a3fdbe2a5a755116",
              "IPY_MODEL_5f52ddd679124bbfb141e10103b5ac42"
            ],
            "layout": "IPY_MODEL_289ac602411348498cd757b2a6b53a49"
          }
        },
        "8e4700723e894b7380862d08a34afe9d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_fdfb46b541434685b1b0cf11c2fb8597",
            "placeholder": "​",
            "style": "IPY_MODEL_c45db1178d344416b5a107048f482a3e",
            "value": "Downloading pytorch_model.bin: 100%"
          }
        },
        "3b1d6cbf8b2b4332a3fdbe2a5a755116": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_b8ffc780321f405592a9112487316d31",
            "max": 5292652837,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_b24b1532289f4ed99e3b645248aff802",
            "value": 5292652837
          }
        },
        "5f52ddd679124bbfb141e10103b5ac42": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_6d924d2d6820449daf637270470fa50b",
            "placeholder": "​",
            "style": "IPY_MODEL_ca9129ae04054d419c5a2c5b8f5b8364",
            "value": " 5.29G/5.29G [00:33&lt;00:00, 215MB/s]"
          }
        },
        "289ac602411348498cd757b2a6b53a49": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "fdfb46b541434685b1b0cf11c2fb8597": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "c45db1178d344416b5a107048f482a3e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "b8ffc780321f405592a9112487316d31": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "b24b1532289f4ed99e3b645248aff802": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "6d924d2d6820449daf637270470fa50b": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "ca9129ae04054d419c5a2c5b8f5b8364": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "8bf00751b22e4aae8bb5e715940dea8f": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_b996c1623b584cdeaa79a68fcb3f6535",
              "IPY_MODEL_53b6219000d541a2ab09b807084e33da",
              "IPY_MODEL_2714c5ad04fb4621baf8affc9795027a"
            ],
            "layout": "IPY_MODEL_d6200cca0bab43e9b24a86be582833d9"
          }
        },
        "b996c1623b584cdeaa79a68fcb3f6535": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_a8085708e61f4f08a9f90b881d957e88",
            "placeholder": "​",
            "style": "IPY_MODEL_cc839ad647d94e2fac9bffb8e3e1c421",
            "value": "Downloading (…)neration_config.json: 100%"
          }
        },
        "53b6219000d541a2ab09b807084e33da": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_0eab7e2aafd14b96bb601b11fbf6af59",
            "max": 119,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_bff03d7ae4f04a55b0ba015fe9b70c54",
            "value": 119
          }
        },
        "2714c5ad04fb4621baf8affc9795027a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_c95ee59f813e4ccaad4c845fe2c67512",
            "placeholder": "​",
            "style": "IPY_MODEL_823526b63b3d4598a456955884ba7293",
            "value": " 119/119 [00:00&lt;00:00, 5.35kB/s]"
          }
        },
        "d6200cca0bab43e9b24a86be582833d9": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "a8085708e61f4f08a9f90b881d957e88": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "cc839ad647d94e2fac9bffb8e3e1c421": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "0eab7e2aafd14b96bb601b11fbf6af59": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "bff03d7ae4f04a55b0ba015fe9b70c54": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "c95ee59f813e4ccaad4c845fe2c67512": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "823526b63b3d4598a456955884ba7293": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "427689dd96c94e9e8706872de004600c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "VBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "VBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "VBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_a4174533851147d0b5452428cfbcb4d0",
              "IPY_MODEL_e4cc83f17f3941a69be2c5a328c1df7c",
              "IPY_MODEL_9f696bef605146c088cd39c3e2d41fa7",
              "IPY_MODEL_4b995377bec241c6b61b7e68518403bb"
            ],
            "layout": "IPY_MODEL_59aec4ac781a42709f49465aaa8beda8"
          }
        },
        "f3d6c5dcb2674419a9cc9d0a78a417ea": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_96c57c6e5c5a446d96664912b2a1f923",
            "placeholder": "​",
            "style": "IPY_MODEL_acf63c0070b346f8ba0833bf9d6dbf76",
            "value": "<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svg\nalt='Hugging Face'> <br> Copy a token from <a\nhref=\"https://huggingface.co/settings/tokens\" target=\"_blank\">your Hugging Face\ntokens page</a> and paste it below. <br> Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file. </center>"
          }
        },
        "1f0e6e0047dc471a91a581505031dcf4": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "PasswordModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "PasswordModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "PasswordView",
            "continuous_update": true,
            "description": "Token:",
            "description_tooltip": null,
            "disabled": false,
            "layout": "IPY_MODEL_72da3e460812424db18da699f1adc68f",
            "placeholder": "​",
            "style": "IPY_MODEL_6b9996976aeb46a781c2827c7eb80400",
            "value": ""
          }
        },
        "63e88741fa824103a38134ca7f92efc4": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "CheckboxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "CheckboxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "CheckboxView",
            "description": "Add token as git credential?",
            "description_tooltip": null,
            "disabled": false,
            "indent": true,
            "layout": "IPY_MODEL_0f5aa3f5a3fd4869ad22a7d7eb3f4a01",
            "style": "IPY_MODEL_1a4f383e5b6e4edea22cf8269ac3e136",
            "value": true
          }
        },
        "760f19938d4a420fba6b604413bd75c5": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ButtonModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ButtonModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ButtonView",
            "button_style": "",
            "description": "Login",
            "disabled": false,
            "icon": "",
            "layout": "IPY_MODEL_5e880b14816c40e1ade44bb4e8e3e7a0",
            "style": "IPY_MODEL_c214e6421b8c444eb91f01729650dfc3",
            "tooltip": ""
          }
        },
        "1dc3baf2eb7a4d3981a7a38d574545ea": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_5ce845d6ca8e40c5ad7c9d07f7ad271c",
            "placeholder": "​",
            "style": "IPY_MODEL_4b7fc4873cc44b9eaf915d4e8bd5e134",
            "value": "\n<b>Pro Tip:</b> If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. </center>"
          }
        },
        "59aec4ac781a42709f49465aaa8beda8": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": "center",
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": "flex",
            "flex": null,
            "flex_flow": "column",
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": "50%"
          }
        },
        "96c57c6e5c5a446d96664912b2a1f923": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "acf63c0070b346f8ba0833bf9d6dbf76": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "72da3e460812424db18da699f1adc68f": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "6b9996976aeb46a781c2827c7eb80400": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "0f5aa3f5a3fd4869ad22a7d7eb3f4a01": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "1a4f383e5b6e4edea22cf8269ac3e136": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "5e880b14816c40e1ade44bb4e8e3e7a0": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "c214e6421b8c444eb91f01729650dfc3": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ButtonStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ButtonStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "button_color": null,
            "font_weight": ""
          }
        },
        "5ce845d6ca8e40c5ad7c9d07f7ad271c": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "4b7fc4873cc44b9eaf915d4e8bd5e134": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "c8fcc29ea48f44b49ec3547067348bc2": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_665705e3373b4a2093bd921a2341bf99",
              "IPY_MODEL_e890ec5fafb344609f506f16be57d799",
              "IPY_MODEL_92302f6d76014a778c6cb7370d2f114c"
            ],
            "layout": "IPY_MODEL_a1652fbe06094b51994fc22f16f9615a"
          }
        },
        "665705e3373b4a2093bd921a2341bf99": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_a122a0fc52604974a77fe18f7dd43c2f",
            "placeholder": "​",
            "style": "IPY_MODEL_dfa55c882b3d44c395d89c53af5f68d7",
            "value": "btlm-3b.ggml.bin: 100%"
          }
        },
        "e890ec5fafb344609f506f16be57d799": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_fac01390a0634c0caa885e4626fda033",
            "max": 5557636513,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_6c8e3fe6bca1438a879a0ee18abc5294",
            "value": 5557636513
          }
        },
        "92302f6d76014a778c6cb7370d2f114c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_d9ffd07e99cb4a2da0966f653351e6cf",
            "placeholder": "​",
            "style": "IPY_MODEL_0005a3238bb84c619e657100845f9c84",
            "value": " 5.56G/5.56G [03:45&lt;00:00, 20.9MB/s]"
          }
        },
        "a1652fbe06094b51994fc22f16f9615a": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "a122a0fc52604974a77fe18f7dd43c2f": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "dfa55c882b3d44c395d89c53af5f68d7": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "fac01390a0634c0caa885e4626fda033": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "6c8e3fe6bca1438a879a0ee18abc5294": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "d9ffd07e99cb4a2da0966f653351e6cf": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "0005a3238bb84c619e657100845f9c84": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "b7ace70b606943059acf751b89b53291": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "LabelModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "LabelModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "LabelView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_b6e56ddf9da04a52b99de60c9e5a3f57",
            "placeholder": "​",
            "style": "IPY_MODEL_55b11341f930432282c18b908e8c7c4a",
            "value": "Connecting..."
          }
        },
        "b6e56ddf9da04a52b99de60c9e5a3f57": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "55b11341f930432282c18b908e8c7c4a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "a4174533851147d0b5452428cfbcb4d0": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "LabelModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "LabelModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "LabelView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_b7e2a45a24724bc79db6b055ff3c6e90",
            "placeholder": "​",
            "style": "IPY_MODEL_1b2fb7402b6d4bdcbe38e3e1e9179b74",
            "value": "Token is valid (permission: write)."
          }
        },
        "e4cc83f17f3941a69be2c5a328c1df7c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "LabelModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "LabelModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "LabelView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_51921dbb33f445f7ab72666ecac3f85c",
            "placeholder": "​",
            "style": "IPY_MODEL_e804d48983884eb7bfd85123482317f7",
            "value": "Your token has been saved in your configured git credential helpers (store)."
          }
        },
        "9f696bef605146c088cd39c3e2d41fa7": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "LabelModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "LabelModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "LabelView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_a15221a1538c43c7ac162324644f2c2d",
            "placeholder": "​",
            "style": "IPY_MODEL_baac4e68e10c4e7bbffeff26b066f308",
            "value": "Your token has been saved to /root/.cache/huggingface/token"
          }
        },
        "4b995377bec241c6b61b7e68518403bb": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "LabelModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "LabelModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "LabelView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_04ed18f2ff804865af93d4d73b347d77",
            "placeholder": "​",
            "style": "IPY_MODEL_603ecce9c961468eb90510fe0587bc32",
            "value": "Login successful"
          }
        },
        "b7e2a45a24724bc79db6b055ff3c6e90": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "1b2fb7402b6d4bdcbe38e3e1e9179b74": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "51921dbb33f445f7ab72666ecac3f85c": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "e804d48983884eb7bfd85123482317f7": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "a15221a1538c43c7ac162324644f2c2d": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "baac4e68e10c4e7bbffeff26b066f308": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "04ed18f2ff804865af93d4d73b347d77": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "603ecce9c961468eb90510fe0587bc32": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        }
      }
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "execution_count": 1,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "5tqWiYIiWl96",
        "outputId": "07b855ab-a412-4af7-d36c-de2cc77810c3"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Collecting transformers\n",
            "  Downloading transformers-4.31.0-py3-none-any.whl (7.4 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.4/7.4 MB\u001b[0m \u001b[31m26.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.12.2)\n",
            "Collecting huggingface-hub<1.0,>=0.14.1 (from transformers)\n",
            "  Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m268.8/268.8 kB\u001b[0m \u001b[31m30.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.22.4)\n",
            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (23.1)\n",
            "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.1)\n",
            "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2022.10.31)\n",
            "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.27.1)\n",
            "Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)\n",
            "  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m83.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting safetensors>=0.3.1 (from transformers)\n",
            "  Downloading safetensors-0.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m70.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.65.0)\n",
            "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.14.1->transformers) (2023.6.0)\n",
            "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.14.1->transformers) (4.7.1)\n",
            "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (1.26.16)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2023.7.22)\n",
            "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.0.12)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.4)\n",
            "Installing collected packages: tokenizers, safetensors, huggingface-hub, transformers\n",
            "Successfully installed huggingface-hub-0.16.4 safetensors-0.3.1 tokenizers-0.13.3 transformers-4.31.0\n",
            "Collecting accelerate\n",
            "  Downloading accelerate-0.21.0-py3-none-any.whl (244 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m244.2/244.2 kB\u001b[0m \u001b[31m4.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from accelerate) (1.22.4)\n",
            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from accelerate) (23.1)\n",
            "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate) (5.9.5)\n",
            "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from accelerate) (6.0.1)\n",
            "Requirement already satisfied: torch>=1.10.0 in /usr/local/lib/python3.10/dist-packages (from accelerate) (2.0.1+cu118)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (3.12.2)\n",
            "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (4.7.1)\n",
            "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (1.11.1)\n",
            "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (3.1)\n",
            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (3.1.2)\n",
            "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (2.0.0)\n",
            "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.10.0->accelerate) (3.25.2)\n",
            "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.10.0->accelerate) (16.0.6)\n",
            "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.10.0->accelerate) (2.1.3)\n",
            "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.10.0->accelerate) (1.3.0)\n",
            "Installing collected packages: accelerate\n",
            "Successfully installed accelerate-0.21.0\n",
            "Collecting bitsandbytes\n",
            "  Downloading bitsandbytes-0.41.1-py3-none-any.whl (92.6 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m92.6/92.6 MB\u001b[0m \u001b[31m8.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hInstalling collected packages: bitsandbytes\n",
            "Successfully installed bitsandbytes-0.41.1\n"
          ]
        }
      ],
      "source": [
        "!pip install -U transformers\n",
        "!pip install -U accelerate #git+https://github.com/huggingface/accelerate.git\n",
        "!pip install -U bitsandbytes #git+ https://github.com/timdettmers/bitsandbytes.git"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "from transformers import AutoTokenizer, AutoModelForCausalLM\n",
        "\n",
        "# Load the tokenizer and model\n",
        "tokenizer = AutoTokenizer.from_pretrained(\"cerebras/btlm-3b-8k-base\")\n",
        "model = AutoModelForCausalLM.from_pretrained(\n",
        "    \"cerebras/btlm-3b-8k-base\",\n",
        "    trust_remote_code=True,\n",
        "    torch_dtype=\"auto\",\n",
        "    load_in_8bit=True,\n",
        "    offload_folder=\"offload\",\n",
        "  )\n",
        "\n",
        "# Set the prompt for generating text\n",
        "prompt = \"Albert Einstein was known for \"\n",
        "\n",
        "# Tokenize the prompt and convert to PyTorch tensors\n",
        "inputs = tokenizer(prompt, return_tensors=\"pt\")\n",
        "\n",
        "# Generate text using the model\n",
        "outputs = model.generate(\n",
        "    **inputs,\n",
        "    num_beams=5,\n",
        "    max_new_tokens=50,\n",
        "    early_stopping=True,\n",
        "    no_repeat_ngram_size=2\n",
        ")\n",
        "\n",
        "# Convert the generated token IDs back to text\n",
        "generated_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)\n",
        "\n",
        "# Print the generated text\n",
        "print(generated_text[0])"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 603,
          "referenced_widgets": [
            "114bfeb2ab21477397ff356a8d50d678",
            "6c2a683959b74f06b71a21f62dc9d732",
            "bfd850f32c4748cb9fc21e3f67a1e079",
            "5405a44a0740459c9b2dc317f7e32af2",
            "5ced55e2613f4ca09c576df4936d444d",
            "3d205afcaa37416dbafa166e7f6bb6ba",
            "25969d182a7a4b569625efbb6e619bab",
            "29c82e0054084c54bb58af9c3518017e",
            "f969ec4a051d4a5ba29d285ed3f7f8f2",
            "c3f0dd842604416bb3fdfb40d79d57c1",
            "85d7256713914254b5ba39ef2eee37bb",
            "9826e15254ca413eb37dc7f8837f6827",
            "8d9500746f61474e81bcf51beaba54e1",
            "41b89216ec9544af91b8b7f7e2f4664e",
            "1538103aa26d41459b67843f917c5a1a",
            "48bd08d79550483f84b241127e31878c",
            "b4f2ddbbf9de4898a7830c79ca2e20d7",
            "c9ddb75f29af47b38f79b09792116673",
            "6f3eba08a72348df89642e7eee1fec30",
            "4ae0e50ecd7f424dac6c9c2c5aa62696",
            "a71e6379e60c46b5915f0ecd097a2a33",
            "0a780d58219d46978fad719257430d5f",
            "e685a0136ffa4280b0a49263c4f842d9",
            "845a84b8f7344a468e0b0b72f0708378",
            "a56823a90ee64b8fb87b2d2ba8857e53",
            "b6698a0e0ca2409daf771b0d0a419e3a",
            "c8ac228076fa4fb88ff37e2f3c34be93",
            "f16775e4439a4c4f886c4c4027d30832",
            "3b40ee2e5b60499e81f8c79a6383b2ec",
            "9a1f2210fd1e47d3ba66cf0a99f937b8",
            "fc2aa788745848c8b363c44e7d07116b",
            "d9ea0947db244309b8effbfe53267042",
            "f52cccc762dc4c419671b2aade6a3506",
            "9c7c0a7882964d8ca8031ac18af93253",
            "f5f57b4e23174d0fa8142dcb57207720",
            "50f474df9ab147f7ba094c1cdb2ba69e",
            "35c204e247044a11a9ee871cfbcbe430",
            "8596f49e4d594ae4a0e848608c7d122f",
            "874be31d42ce487697133b07065f815d",
            "41d87a1c6a6a4d32ac20f35f779207d5",
            "b7ab4be64f8b487893997a5d57eb9f09",
            "8620138609a24a3aae22da774fb32bdb",
            "a76b7ee703ed4ed08a1e0e1a708aa117",
            "44aa877b3a844922a1ef50809713fe32",
            "21f3adc7a34149d88b0e67b537eec488",
            "61c7dc85009b4ab0968b8222a49297a1",
            "194d14e6d61f47a28f7615f0c0071831",
            "1e5eea17dab34a18a3e8fafe6988ad90",
            "630b9cb70bed4900b97cfa3c476d6ca0",
            "7d0e80dbc0dc4431ba3eacc2ed719e2e",
            "46561fc96ed645e1bfc4039a222b2f66",
            "162e24639a1d462eaa67e8e8040ca879",
            "9ab05a9fc9134fe19ef600b8cde42b78",
            "b80761f44a4e4c889e3acfa06852c064",
            "713e206a8c894bcf965cb96a97f30be7",
            "85a1329fbf9e44c8b708a1ad868691d7",
            "9be2d523ce7e4c2daeed4f4846f16cff",
            "3321f3d71e684ca4afa48a1e0b889ebd",
            "adb1ff19806c4d9abfef424270408759",
            "d7a5976a0b6b4a62913bdf148d06a6bf",
            "efed82b2f6f84516989d07f9515a102d",
            "4f6b53cc236b448093d23240bd28a6d8",
            "b7821089bfe94594982736d3005b82e9",
            "d6f016012139482b9ffc48407a0e7cb1",
            "eee7b8b660f74f0e9e0634a17b29ecb1",
            "6296781e8cc94316a44c5a0a0a77f46c",
            "0c4007f367744ccf80f3c9af09af53de",
            "87a60648650a41fa9b3573be98e4adad",
            "0cbd3f72373544b8a946c70627e515f9",
            "d0f8fa0a331b4da98c0b55311ea073a0",
            "a312c5e9461d470ca8c3cd2184a7ee09",
            "8980f61b2d4b4164a21b596ce515f6e3",
            "d58f9dcf2c7b4fa887702b231ba0ddd1",
            "36cafb73c9fd4bb486853771b146fb44",
            "a31d4ba8182a41f7a164a87e0492bfe2",
            "48584e07c5074502acc46680e17b6dd8",
            "7731d185d2d0401c9ce8dd045ffc187a",
            "b5f78d5cabff40ca8749376141ea8d23",
            "8e4700723e894b7380862d08a34afe9d",
            "3b1d6cbf8b2b4332a3fdbe2a5a755116",
            "5f52ddd679124bbfb141e10103b5ac42",
            "289ac602411348498cd757b2a6b53a49",
            "fdfb46b541434685b1b0cf11c2fb8597",
            "c45db1178d344416b5a107048f482a3e",
            "b8ffc780321f405592a9112487316d31",
            "b24b1532289f4ed99e3b645248aff802",
            "6d924d2d6820449daf637270470fa50b",
            "ca9129ae04054d419c5a2c5b8f5b8364",
            "8bf00751b22e4aae8bb5e715940dea8f",
            "b996c1623b584cdeaa79a68fcb3f6535",
            "53b6219000d541a2ab09b807084e33da",
            "2714c5ad04fb4621baf8affc9795027a",
            "d6200cca0bab43e9b24a86be582833d9",
            "a8085708e61f4f08a9f90b881d957e88",
            "cc839ad647d94e2fac9bffb8e3e1c421",
            "0eab7e2aafd14b96bb601b11fbf6af59",
            "bff03d7ae4f04a55b0ba015fe9b70c54",
            "c95ee59f813e4ccaad4c845fe2c67512",
            "823526b63b3d4598a456955884ba7293"
          ]
        },
        "id": "PUU3t5QeXhTM",
        "outputId": "0829a1e4-9ea6-4dbd-89f2-59504837dc95"
      },
      "execution_count": 2,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Downloading (…)okenizer_config.json:   0%|          | 0.00/234 [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "114bfeb2ab21477397ff356a8d50d678"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Downloading (…)olve/main/vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "9826e15254ca413eb37dc7f8837f6827"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "e685a0136ffa4280b0a49263c4f842d9"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Downloading (…)cial_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "9c7c0a7882964d8ca8031ac18af93253"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Downloading (…)lve/main/config.json:   0%|          | 0.00/1.24k [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "21f3adc7a34149d88b0e67b537eec488"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Downloading (…)onfiguration_btlm.py:   0%|          | 0.00/7.58k [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "85a1329fbf9e44c8b708a1ad868691d7"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "A new version of the following files was downloaded from https://huggingface.co/cerebras/btlm-3b-8k-base:\n",
            "- configuration_btlm.py\n",
            ". Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Downloading (…)ain/modeling_btlm.py:   0%|          | 0.00/71.5k [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "0c4007f367744ccf80f3c9af09af53de"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "A new version of the following files was downloaded from https://huggingface.co/cerebras/btlm-3b-8k-base:\n",
            "- modeling_btlm.py\n",
            ". Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Downloading pytorch_model.bin:   0%|          | 0.00/5.29G [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "b5f78d5cabff40ca8749376141ea8d23"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Downloading (…)neration_config.json:   0%|          | 0.00/119 [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "8bf00751b22e4aae8bb5e715940dea8f"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n",
            "/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py:1468: UserWarning: You are calling .generate() with the `input_ids` being on a device type different than your model's device. `input_ids` is on cpu, whereas the model is on cuda. You may experience unexpected behaviors or slower generation. Please make sure that you have put `input_ids` to the correct device by calling for example input_ids = input_ids.to('cuda') before running `.generate()`.\n",
            "  warnings.warn(\n",
            "/usr/local/lib/python3.10/dist-packages/bitsandbytes/autograd/_functions.py:322: UserWarning: MatMul8bitLt: inputs will be cast from torch.bfloat16 to float16 during quantization\n",
            "  warnings.warn(f\"MatMul8bitLt: inputs will be cast from {A.dtype} to float16 during quantization\")\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Albert Einstein was known for \n",
            "his theory of relativity and his work on the photoelectric effect.\n",
            "He was awarded the 1921 Nobel Prize in Physics \"for his services\n",
            "to theoretical physics, and especially for his discovery of the law\n",
            "which unifies in a single equation the\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "print(model)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Eiv2x1g7YTlv",
        "outputId": "ac6aefdc-8e02-4489-9704-462028c20d51"
      },
      "execution_count": 3,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "BTLMLMHeadModel(\n",
            "  (transformer): BTLMModel(\n",
            "    (wte): Embedding(50257, 2560)\n",
            "    (drop): Dropout(p=0.0, inplace=False)\n",
            "    (h): ModuleList(\n",
            "      (0-31): 32 x BTLMBlock(\n",
            "        (ln_1): LayerNorm((2560,), eps=1e-05, elementwise_affine=True)\n",
            "        (attn): BTLMAttention(\n",
            "          (c_attn): Linear8bitLt(in_features=2560, out_features=7680, bias=True)\n",
            "          (c_proj): Linear8bitLt(in_features=2560, out_features=2560, bias=True)\n",
            "          (attn_dropout): Dropout(p=0.0, inplace=False)\n",
            "          (resid_dropout): Dropout(p=0.0, inplace=False)\n",
            "        )\n",
            "        (ln_2): LayerNorm((2560,), eps=1e-05, elementwise_affine=True)\n",
            "        (mlp): BTLMMLP(\n",
            "          (c_fc): Linear8bitLt(in_features=2560, out_features=6826, bias=True)\n",
            "          (c_fc2): Linear8bitLt(in_features=2560, out_features=6826, bias=True)\n",
            "          (c_proj): Linear8bitLt(in_features=6826, out_features=2560, bias=True)\n",
            "          (act): SwiGLUActivation()\n",
            "          (dropout): Dropout(p=0.0, inplace=False)\n",
            "        )\n",
            "      )\n",
            "    )\n",
            "    (ln_f): LayerNorm((2560,), eps=1e-05, elementwise_affine=True)\n",
            "    (relative_pe): AlibiPositionEmbeddingLayer()\n",
            "  )\n",
            "  (lm_head): Linear(in_features=2560, out_features=50257, bias=False)\n",
            ")\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "list_vars = model.state_dict()\n",
        "for name in list_vars.keys():\n",
        "    print(name, \"=>\", list_vars[name])\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "IP0eQEnnYPiX",
        "outputId": "54e56c69-0741-4669-d97c-6b5a5a518940"
      },
      "execution_count": 4,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "transformer.wte.weight => tensor([[ 0.0200,  0.0442,  0.0562,  ...,  0.0173, -0.0238, -0.0889],\n",
            "        [-0.0259,  0.0170, -0.0221,  ..., -0.0752, -0.0635,  0.0947],\n",
            "        [-0.0276,  0.1846,  0.1533,  ..., -0.0195,  0.0299,  0.0796],\n",
            "        ...,\n",
            "        [ 0.1182,  0.1523,  0.0742,  ..., -0.1162,  0.0177,  0.0991],\n",
            "        [ 0.0220, -0.0579,  0.0125,  ..., -0.0576,  0.0327,  0.0211],\n",
            "        [ 0.0508, -0.0217,  0.0278,  ..., -0.0308, -0.0378,  0.0013]],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.0.ln_1.weight => tensor([0.6445, 0.7344, 0.6133,  ..., 0.6484, 0.7070, 0.7148], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.0.ln_1.bias => tensor([ 0.0288,  0.0082, -0.0771,  ...,  0.0284, -0.0391,  0.0233],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.0.attn.c_attn.weight => tensor([[ 15,   3, -15,  ...,  67,  40,  29],\n",
            "        [ -4,  43,  26,  ...,   9,  15,  19],\n",
            "        [ 41,  33,   6,  ..., -20, -27, -35],\n",
            "        ...,\n",
            "        [ 27, -51,   6,  ...,  -1,  76, -31],\n",
            "        [-17,   3,  20,  ..., -55,  15, -58],\n",
            "        [ 40,  29,   2,  ...,  39,  39, -19]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.0.attn.c_attn.bias => tensor([-0.0408,  0.0349,  0.0292,  ..., -0.0286, -0.7773, -0.0108],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.0.attn.c_attn.SCB => tensor([0.0693, 0.0830, 0.0669,  ..., 0.0708, 0.1543, 0.0742], device='cuda:0')\n",
            "transformer.h.0.attn.c_attn.weight_format => col_turing\n",
            "transformer.h.0.attn.c_proj.weight => tensor([[ 12, -39, -38,  ...,  68, -45, -30],\n",
            "        [ 52, -20, -48,  ...,  59, -32,  70],\n",
            "        [ 35,  71, -81,  ..., -29,  13, -66],\n",
            "        ...,\n",
            "        [-11,   3,  44,  ..., -38, -83,  30],\n",
            "        [ -6,  -8, -15,  ...,  30,   3, -32],\n",
            "        [  5,  52,  31,  ...,  43, -30,  -8]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.0.attn.c_proj.bias => tensor([ 0.5938, -0.7500, -0.0056,  ..., -0.4355,  0.4648,  0.4414],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.0.attn.c_proj.SCB => tensor([0.1226, 0.1001, 0.0830,  ..., 0.1030, 0.0938, 0.0688], device='cuda:0')\n",
            "transformer.h.0.attn.c_proj.weight_format => col_turing\n",
            "transformer.h.0.ln_2.weight => tensor([0.5117, 0.5352, 0.5156,  ..., 0.5078, 0.5195, 0.5391], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.0.ln_2.bias => tensor([-0.1426,  0.1338,  0.0157,  ...,  0.0571, -0.0947, -0.1455],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.0.mlp.c_fc.weight => tensor([[  17,   71, -105,  ...,   33,   15,    9],\n",
            "        [ -15,   13,    6,  ...,  -23,   66,  -86],\n",
            "        [ -21,  -37,  -62,  ...,  -17,   24,  -50],\n",
            "        ...,\n",
            "        [  42,   15,    1,  ...,  -20,   18,   24],\n",
            "        [  10,  -15,   14,  ...,  -20,   -1,   -1],\n",
            "        [   2,   -1,   -2,  ...,    0,    0,    0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.0.mlp.c_fc.bias => tensor([ 0.1357, -0.0933,  0.0977,  ..., -0.3086, -0.0693,  0.0530],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.0.mlp.c_fc.SCB => tensor([0.1245, 0.1108, 0.1602,  ..., 0.1060, 0.1113, 0.1113], device='cuda:0')\n",
            "transformer.h.0.mlp.c_fc.weight_format => col_turing\n",
            "transformer.h.0.mlp.c_fc2.weight => tensor([[ 17, -43,  -8,  ...,  -8,  67,  64],\n",
            "        [ 25, -40,  -8,  ...,   8, -30,  61],\n",
            "        [-59,  44, -16,  ..., -37, -50,  25],\n",
            "        ...,\n",
            "        [-24, -42,  -3,  ..., -40,  20,  33],\n",
            "        [ 21,  25, -10,  ...,  37,  54, -11],\n",
            "        [ 32,  16,  17,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.0.mlp.c_fc2.bias => tensor([-0.6328, -0.6250,  0.1611,  ..., -0.2500, -0.8984, -0.8008],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.0.mlp.c_fc2.SCB => tensor([0.1172, 0.1069, 0.1060,  ..., 0.1367, 0.1299, 0.1289], device='cuda:0')\n",
            "transformer.h.0.mlp.c_fc2.weight_format => col_turing\n",
            "transformer.h.0.mlp.c_proj.weight => tensor([[ 32,  43,  34,  ..., -48, -28, 103],\n",
            "        [-20, -35, -21,  ...,  66,  21,  23],\n",
            "        [-32,  19, -11,  ...,  -2, -24,  -1],\n",
            "        ...,\n",
            "        [  0,   0,   0,  ...,   0,   0,   0],\n",
            "        [ -6,  -1, -15,  ...,   0,   0,   0],\n",
            "        [  0,   0,   0,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.0.mlp.c_proj.bias => tensor([-0.7422,  0.3691, -0.2695,  ...,  0.2910,  0.3066, -0.3066],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.0.mlp.c_proj.SCB => tensor([0.1328, 0.1484, 0.1279,  ..., 0.1270, 0.1299, 0.1216], device='cuda:0')\n",
            "transformer.h.0.mlp.c_proj.weight_format => col_turing\n",
            "transformer.h.1.ln_1.weight => tensor([0.8438, 0.9102, 0.8672,  ..., 0.8750, 0.8672, 0.9648], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.1.ln_1.bias => tensor([ 0.0544,  0.0525,  0.0654,  ..., -0.0067, -0.1738, -0.0200],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.1.attn.c_attn.weight => tensor([[  50,   -3,   -9,  ...,   24,  -10,    2],\n",
            "        [ -27,  -29,   -4,  ...,  -17,  -17,   31],\n",
            "        [ -31,   19,   40,  ...,  -20,  -12,  -37],\n",
            "        ...,\n",
            "        [  98, -108,   -8,  ...,    2,    9,    3],\n",
            "        [ -11,  -60,   -9,  ...,  -19,  -48,   23],\n",
            "        [  24,  -21,   14,  ...,   -7,  -14,   55]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.1.attn.c_attn.bias => tensor([-1.4844,  0.0466, -0.4785,  ..., -0.0747, -0.0608, -0.4160],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.1.attn.c_attn.SCB => tensor([0.1465, 0.1328, 0.1279,  ..., 0.1050, 0.0933, 0.0674], device='cuda:0')\n",
            "transformer.h.1.attn.c_attn.weight_format => col_turing\n",
            "transformer.h.1.attn.c_proj.weight => tensor([[ 39, -61, -16,  ...,  37, -11,   3],\n",
            "        [ 51, -58,  14,  ...,   2,  45,  25],\n",
            "        [ 51,  42,  -3,  ..., -51,  17, -27],\n",
            "        ...,\n",
            "        [ 14,  14, -23,  ...,  14,  16,   9],\n",
            "        [ 12,  34,  16,  ...,   4,  24, -13],\n",
            "        [-17,  18,  13,  ...,  62, -13, -39]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.1.attn.c_proj.bias => tensor([ 0.1816,  0.0825, -0.3301,  ...,  0.1484, -0.1641,  0.5312],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.1.attn.c_proj.SCB => tensor([0.1260, 0.0879, 0.0796,  ..., 0.0869, 0.0908, 0.0791], device='cuda:0')\n",
            "transformer.h.1.attn.c_proj.weight_format => col_turing\n",
            "transformer.h.1.ln_2.weight => tensor([0.5352, 0.6094, 0.5977,  ..., 0.5625, 0.6016, 0.6211], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.1.ln_2.bias => tensor([ 0.0344,  0.0225,  0.0618,  ..., -0.0493,  0.0116, -0.1426],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.1.mlp.c_fc.weight => tensor([[ 11,  56,   8,  ...,  20,  38, -61],\n",
            "        [-41, -37,  26,  ...,  46, -10,   4],\n",
            "        [ 29,  20, -29,  ...,  -8,  15,  75],\n",
            "        ...,\n",
            "        [ 53,  66, -38,  ...,  16,  16,   8],\n",
            "        [-44,   5,   6,  ..., -12, -37,  10],\n",
            "        [-29, -33, -32,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.1.mlp.c_fc.bias => tensor([ 0.2041, -0.1729,  0.1299,  ...,  0.2246, -0.1216,  0.1445],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.1.mlp.c_fc.SCB => tensor([0.1680, 0.1328, 0.2500,  ..., 0.1206, 0.1167, 0.1128], device='cuda:0')\n",
            "transformer.h.1.mlp.c_fc.weight_format => col_turing\n",
            "transformer.h.1.mlp.c_fc2.weight => tensor([[ -13,  -66,  -31,  ...,   47,   11,   24],\n",
            "        [ -15,   -7,   15,  ...,  -13,   10,    6],\n",
            "        [   7,  -54,   14,  ...,  -32,   -6,  -57],\n",
            "        ...,\n",
            "        [ -16,  -40,  -52,  ...,   51,  -24,   12],\n",
            "        [  16,   17,  -14,  ...,  -14,    6,  -10],\n",
            "        [ -76,   -2, -109,  ...,    0,    0,    0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.1.mlp.c_fc2.bias => tensor([-0.9844, -0.8047, -0.4961,  ..., -0.6406, -0.7461, -0.4609],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.1.mlp.c_fc2.SCB => tensor([0.1235, 0.1172, 0.1631,  ..., 0.1099, 0.1177, 0.1055], device='cuda:0')\n",
            "transformer.h.1.mlp.c_fc2.weight_format => col_turing\n",
            "transformer.h.1.mlp.c_proj.weight => tensor([[  7, -15,   3,  ...,  25, -45,  40],\n",
            "        [  5,  45,  28,  ..., -14,  12, -15],\n",
            "        [ 16,   4, -31,  ..., -17, -33, -15],\n",
            "        ...,\n",
            "        [  0,   0,   0,  ...,   0,   0,   0],\n",
            "        [  7, -21,  40,  ...,   0,   0,   0],\n",
            "        [  0,   0,   0,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.1.mlp.c_proj.bias => tensor([ 0.3750,  0.1338,  0.1177,  ..., -0.1943,  0.3184, -0.3828],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.1.mlp.c_proj.SCB => tensor([0.1475, 0.1406, 0.1328,  ..., 0.1318, 0.1279, 0.1504], device='cuda:0')\n",
            "transformer.h.1.mlp.c_proj.weight_format => col_turing\n",
            "transformer.h.2.ln_1.weight => tensor([0.9844, 1.0859, 1.0703,  ..., 0.9453, 0.9258, 1.0156], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.2.ln_1.bias => tensor([-0.0258,  0.1226,  0.0175,  ..., -0.0105, -0.0170, -0.0311],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.2.attn.c_attn.weight => tensor([[ 41, -32, -41,  ..., -43, -37, -50],\n",
            "        [ -3,  37, -51,  ...,  14, -35, -33],\n",
            "        [ 33, -19,   6,  ..., -52,  49,   6],\n",
            "        ...,\n",
            "        [-14, -67, -14,  ..., -25,  60,  16],\n",
            "        [-44, -14,  50,  ...,  33,  -5, -64],\n",
            "        [-22,  -4,   0,  ...,  50, -33,  -9]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.2.attn.c_attn.bias => tensor([-0.9570,  0.2344,  0.7422,  ...,  0.0215, -0.4238, -1.2500],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.2.attn.c_attn.SCB => tensor([0.1318, 0.1396, 0.1777,  ..., 0.0535, 0.0771, 0.0693], device='cuda:0')\n",
            "transformer.h.2.attn.c_attn.weight_format => col_turing\n",
            "transformer.h.2.attn.c_proj.weight => tensor([[ 37, -32, -41,  ..., -33,  14,  26],\n",
            "        [ 21, -14,  -4,  ..., -38,   4,  47],\n",
            "        [ 58,  72,  24,  ...,  -7, -47, -67],\n",
            "        ...,\n",
            "        [  7,  11,  44,  ...,  21,  37, -47],\n",
            "        [  4,   6,   4,  ..., -11, -35, -59],\n",
            "        [ 12,  37,  12,  ...,   8,   7, -31]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.2.attn.c_proj.bias => tensor([ 0.1602,  0.1592, -0.1699,  ..., -0.0430,  0.1406, -0.5312],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.2.attn.c_proj.SCB => tensor([0.0811, 0.0776, 0.0967,  ..., 0.0889, 0.0825, 0.0898], device='cuda:0')\n",
            "transformer.h.2.attn.c_proj.weight_format => col_turing\n",
            "transformer.h.2.ln_2.weight => tensor([0.4805, 0.6445, 0.6719,  ..., 0.5781, 0.6445, 0.6992], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.2.ln_2.bias => tensor([-0.0011,  0.0074,  0.0491,  ...,  0.0035, -0.0540,  0.1182],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.2.mlp.c_fc.weight => tensor([[  7, -15,  36,  ...,  51, -84, -22],\n",
            "        [  5,  -9, -56,  ...,  23,   6,   2],\n",
            "        [ 23,  46,  77,  ...,   7,  11,  31],\n",
            "        ...,\n",
            "        [ -9,   4,   1,  ..., -25,   9,  17],\n",
            "        [ 44, -10,  -4,  ...,   1,  37,   9],\n",
            "        [-49,  35,  11,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.2.mlp.c_fc.bias => tensor([-0.0601, -0.0957,  0.2500,  ...,  0.0879, -0.0811,  0.0053],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.2.mlp.c_fc.SCB => tensor([0.1162, 0.1187, 0.1104,  ..., 0.1216, 0.1235, 0.1133], device='cuda:0')\n",
            "transformer.h.2.mlp.c_fc.weight_format => col_turing\n",
            "transformer.h.2.mlp.c_fc2.weight => tensor([[  8,  28,  23,  ...,  -5,  59,  -2],\n",
            "        [  2,  22,  33,  ...,  -1,  -3,   3],\n",
            "        [-27, -70, -45,  ...,  28,  39, -10],\n",
            "        ...,\n",
            "        [ -2, -59,  -3,  ..., -59, -45,  -6],\n",
            "        [-14, -29,  35,  ...,   4,  57,   5],\n",
            "        [  5,  32,  -1,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.2.mlp.c_fc2.bias => tensor([-1.0625, -0.9023, -0.6836,  ..., -1.0234, -1.0156, -1.1094],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.2.mlp.c_fc2.SCB => tensor([0.1226, 0.1206, 0.1021,  ..., 0.1094, 0.1138, 0.1279], device='cuda:0')\n",
            "transformer.h.2.mlp.c_fc2.weight_format => col_turing\n",
            "transformer.h.2.mlp.c_proj.weight => tensor([[ -1,  18,  22,  ...,   0,  20,  61],\n",
            "        [-11, -18,  24,  ...,  10,  11, -70],\n",
            "        [ 21,  51, -18,  ...,  46,  36, -29],\n",
            "        ...,\n",
            "        [  0,   0,   0,  ...,   0,   0,   0],\n",
            "        [-64, -21,  17,  ...,   0,   0,   0],\n",
            "        [  0,   0,   0,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.2.mlp.c_proj.bias => tensor([-0.2051,  0.8125, -0.2949,  ...,  0.0204, -0.0291,  0.2002],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.2.mlp.c_proj.SCB => tensor([0.1602, 0.1523, 0.1318,  ..., 0.1475, 0.1357, 0.1367], device='cuda:0')\n",
            "transformer.h.2.mlp.c_proj.weight_format => col_turing\n",
            "transformer.h.3.ln_1.weight => tensor([1.1484, 1.2812, 1.3047,  ..., 1.1016, 1.0781, 1.1406], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.3.ln_1.bias => tensor([ 0.0884, -0.2637,  0.1914,  ..., -0.0078, -0.0703,  0.0542],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.3.attn.c_attn.weight => tensor([[  7,   9,  25,  ...,  62,  20,  42],\n",
            "        [ -6,  -1,  41,  ...,  29,  32,  22],\n",
            "        [ -6, -75,  33,  ..., -11, 127,  33],\n",
            "        ...,\n",
            "        [ 20,   3, -22,  ...,  34,   5, -45],\n",
            "        [ -7,  20, -15,  ..., -20, -57,  -7],\n",
            "        [ 43,   6, -12,  ..., -46,  -5, -38]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.3.attn.c_attn.bias => tensor([-0.2754,  0.3555, -0.2090,  ...,  0.0835,  0.0564, -0.0864],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.3.attn.c_attn.SCB => tensor([0.1660, 0.1318, 0.1289,  ..., 0.1030, 0.0967, 0.1069], device='cuda:0')\n",
            "transformer.h.3.attn.c_attn.weight_format => col_turing\n",
            "transformer.h.3.attn.c_proj.weight => tensor([[  8,  16,  33,  ..., -18,  -5, -30],\n",
            "        [ 34,  66, -33,  ..., -20,  44,  21],\n",
            "        [ 25, -29,  53,  ...,  -6,  65,  17],\n",
            "        ...,\n",
            "        [ -7,  -2, -10,  ..., -36,  10,  -5],\n",
            "        [-29,  39,   5,  ...,  27,  37,  45],\n",
            "        [-35,  31, -15,  ...,  26,  14, -25]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.3.attn.c_proj.bias => tensor([ 0.1338,  0.2637,  0.4180,  ..., -0.0247,  0.0996, -0.3008],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.3.attn.c_proj.SCB => tensor([0.1191, 0.0879, 0.0952,  ..., 0.0786, 0.0952, 0.1001], device='cuda:0')\n",
            "transformer.h.3.attn.c_proj.weight_format => col_turing\n",
            "transformer.h.3.ln_2.weight => tensor([0.4980, 0.7227, 0.7617,  ..., 0.6445, 0.6953, 0.8242], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.3.ln_2.bias => tensor([-0.0168, -0.1777,  0.0124,  ..., -0.0107, -0.0466,  0.1289],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.3.mlp.c_fc.weight => tensor([[-22,   9,  -1,  ...,  38, -35,  -7],\n",
            "        [ -1,   6,  -7,  ..., -39,  16, -79],\n",
            "        [-29,  -4,  11,  ..., -68,  25,  -1],\n",
            "        ...,\n",
            "        [-35, -47,   8,  ..., -13, -39, -14],\n",
            "        [-28,  -4, -20,  ...,  53, -19, -26],\n",
            "        [ 10, -43, -23,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.3.mlp.c_fc.bias => tensor([-0.0199,  0.0125,  0.0427,  ..., -0.0620,  0.0303, -0.0175],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.3.mlp.c_fc.SCB => tensor([0.1230, 0.1118, 0.1069,  ..., 0.1396, 0.1396, 0.1191], device='cuda:0')\n",
            "transformer.h.3.mlp.c_fc.weight_format => col_turing\n",
            "transformer.h.3.mlp.c_fc2.weight => tensor([[ 45,  49, -12,  ...,  41,  37, -33],\n",
            "        [  3,   6, -93,  ..., -37,  20, -11],\n",
            "        [ 24, -30, -35,  ..., -12,  -6,  12],\n",
            "        ...,\n",
            "        [ 89, -20, -40,  ...,  -3, -12, -18],\n",
            "        [-37, -15,   3,  ...,  45,  76, -48],\n",
            "        [-18,  74,  -7,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.3.mlp.c_fc2.bias => tensor([-1.5625, -0.8984, -1.1484,  ..., -1.5000, -1.2188, -1.3203],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.3.mlp.c_fc2.SCB => tensor([0.1230, 0.1089, 0.1133,  ..., 0.1138, 0.1074, 0.1201], device='cuda:0')\n",
            "transformer.h.3.mlp.c_fc2.weight_format => col_turing\n",
            "transformer.h.3.mlp.c_proj.weight => tensor([[ 51, -11,  24,  ...,   4,  10,  26],\n",
            "        [ 10, -25,  -3,  ...,  23, -21,  -5],\n",
            "        [-35,   0,   3,  ...,  -9,  31,  17],\n",
            "        ...,\n",
            "        [  0,   0,   0,  ...,   0,   0,   0],\n",
            "        [ -2,  40, -21,  ...,   0,   0,   0],\n",
            "        [  0,   0,   0,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.3.mlp.c_proj.bias => tensor([-0.4141,  0.7773,  0.6016,  ...,  0.0864, -0.4141, -0.0967],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.3.mlp.c_proj.SCB => tensor([0.1338, 0.1133, 0.1187,  ..., 0.1406, 0.1377, 0.1426], device='cuda:0')\n",
            "transformer.h.3.mlp.c_proj.weight_format => col_turing\n",
            "transformer.h.4.ln_1.weight => tensor([1.3516, 1.4766, 1.5000,  ..., 1.3438, 1.2812, 1.3828], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.4.ln_1.bias => tensor([ 0.0056, -0.6016, -0.2295,  ..., -0.0483,  0.0371,  0.1641],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.4.attn.c_attn.weight => tensor([[ -1, -13, -19,  ...,  11,  13,  -8],\n",
            "        [-42,  23,  48,  ...,  -6, -60, -11],\n",
            "        [-15, -29,  15,  ..., -27,   4,  12],\n",
            "        ...,\n",
            "        [-38, -62,   3,  ...,  35,  19,  32],\n",
            "        [-14,  58,  56,  ..., -63,  20,  28],\n",
            "        [ 34,  31,   4,  ...,  73,  -7,  15]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.4.attn.c_attn.bias => tensor([-0.6445,  1.0391,  1.3516,  ..., -0.1641, -0.0425,  0.1270],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.4.attn.c_attn.SCB => tensor([0.1504, 0.1396, 0.1494,  ..., 0.1089, 0.0967, 0.1245], device='cuda:0')\n",
            "transformer.h.4.attn.c_attn.weight_format => col_turing\n",
            "transformer.h.4.attn.c_proj.weight => tensor([[ 16, -17,  -3,  ...,  77, -15, -28],\n",
            "        [  0,  -8, -24,  ...,  -9,   8,   4],\n",
            "        [ 27,  -5,  -7,  ...,  26, -20,   6],\n",
            "        ...,\n",
            "        [ 43,  -3,  53,  ...,  28,  13, -59],\n",
            "        [ 22, -37,  -6,  ...,  91, -30, -62],\n",
            "        [ 75,  48, -33,  ..., -13,  16, -62]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.4.attn.c_proj.bias => tensor([ 0.1543, -0.2314,  0.0996,  ..., -0.0688,  0.0140,  0.3613],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.4.attn.c_proj.SCB => tensor([0.1177, 0.0918, 0.0933,  ..., 0.0835, 0.0986, 0.1074], device='cuda:0')\n",
            "transformer.h.4.attn.c_proj.weight_format => col_turing\n",
            "transformer.h.4.ln_2.weight => tensor([0.4961, 0.7656, 0.7812,  ..., 0.6680, 0.7266, 0.8125], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.4.ln_2.bias => tensor([-0.0168, -0.2344, -0.1177,  ...,  0.0061,  0.0148,  0.0334],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.4.mlp.c_fc.weight => tensor([[-33, -36,  18,  ...,  60,  33,  41],\n",
            "        [ 11,  44, -26,  ...,  27, -63,  55],\n",
            "        [ 60,  19,  28,  ...,  -7, -25,  55],\n",
            "        ...,\n",
            "        [-22, -60,  -9,  ..., -53,   5, -21],\n",
            "        [  0, -33,  32,  ..., -44,  37,  16],\n",
            "        [ 21, -16,   9,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.4.mlp.c_fc.bias => tensor([-0.1846, -0.1758,  0.1309,  ...,  0.1758,  0.0801, -0.0393],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.4.mlp.c_fc.SCB => tensor([0.1211, 0.1104, 0.1216,  ..., 0.1738, 0.1133, 0.1167], device='cuda:0')\n",
            "transformer.h.4.mlp.c_fc.weight_format => col_turing\n",
            "transformer.h.4.mlp.c_fc2.weight => tensor([[ -3,  -2, -37,  ...,  46, -22, -17],\n",
            "        [ 24, -10, -69,  ..., -44, -21,  -9],\n",
            "        [-20,  52,   6,  ..., -41, -76,  29],\n",
            "        ...,\n",
            "        [ 37,  -9,  -5,  ...,  -5,  17, -23],\n",
            "        [ 34, -28,  -2,  ...,  34,  13, -15],\n",
            "        [-31,  -7, -30,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.4.mlp.c_fc2.bias => tensor([-0.8750, -0.9336, -0.9102,  ...,  0.0211, -1.1250, -1.1250],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.4.mlp.c_fc2.SCB => tensor([0.1279, 0.1138, 0.1045,  ..., 0.1357, 0.1279, 0.1094], device='cuda:0')\n",
            "transformer.h.4.mlp.c_fc2.weight_format => col_turing\n",
            "transformer.h.4.mlp.c_proj.weight => tensor([[ -4, -20,   5,  ..., -29, -13,  31],\n",
            "        [-10,  -2, -39,  ...,  31, -37,  28],\n",
            "        [ 10, -67,  -3,  ...,  37, -27,   3],\n",
            "        ...,\n",
            "        [  0,   0,   0,  ...,   0,   0,   0],\n",
            "        [ -8,  55,  23,  ...,   0,   0,   0],\n",
            "        [  0,   0,   0,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.4.mlp.c_proj.bias => tensor([-0.0972,  0.4922, -0.0957,  ...,  0.0039,  0.0359, -0.2139],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.4.mlp.c_proj.SCB => tensor([0.1533, 0.1299, 0.1260,  ..., 0.1348, 0.1279, 0.1187], device='cuda:0')\n",
            "transformer.h.4.mlp.c_proj.weight_format => col_turing\n",
            "transformer.h.5.ln_1.weight => tensor([1.3906, 1.6406, 1.6562,  ..., 1.4297, 1.4219, 1.5156], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.5.ln_1.bias => tensor([-0.0089, -0.6406, -0.2188,  ..., -0.0320,  0.0225,  0.1406],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.5.attn.c_attn.weight => tensor([[-58, 102, -14,  ...,  48,  21,  -3],\n",
            "        [-14,   9,   1,  ...,   4,  24, -21],\n",
            "        [-43, -19,  27,  ...,  -7,  24, -12],\n",
            "        ...,\n",
            "        [ -6, -17, -14,  ..., -14,  15, -12],\n",
            "        [ 14,   8,   6,  ...,  44,  21,   9],\n",
            "        [ -5, -18,  59,  ..., -73, -25, -35]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.5.attn.c_attn.bias => tensor([-0.9922, -0.4434, -0.2812,  ..., -0.1113,  0.0322, -0.0303],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.5.attn.c_attn.SCB => tensor([0.1709, 0.1855, 0.1670,  ..., 0.1182, 0.1250, 0.1250], device='cuda:0')\n",
            "transformer.h.5.attn.c_attn.weight_format => col_turing\n",
            "transformer.h.5.attn.c_proj.weight => tensor([[-20,   4,   7,  ..., -11, -50,  27],\n",
            "        [ 22, -24, -15,  ..., -23,  20,   1],\n",
            "        [ -5,  -1,  32,  ...,  30, -11, -32],\n",
            "        ...,\n",
            "        [-18, -16,   0,  ...,  51, -38,  15],\n",
            "        [ 35, -23, -50,  ...,  44, -30,  46],\n",
            "        [  2, -17, -34,  ...,  44,  58, -26]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.5.attn.c_proj.bias => tensor([-0.0786,  0.2441,  0.1445,  ..., -0.0796,  0.0288, -0.1514],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.5.attn.c_proj.SCB => tensor([0.1089, 0.1021, 0.1108,  ..., 0.0845, 0.1147, 0.1006], device='cuda:0')\n",
            "transformer.h.5.attn.c_proj.weight_format => col_turing\n",
            "transformer.h.5.ln_2.weight => tensor([0.5195, 0.8477, 0.8867,  ..., 0.7070, 0.7852, 0.9453], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.5.ln_2.bias => tensor([ 0.0144, -0.3477, -0.1094,  ..., -0.0109,  0.0287,  0.1045],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.5.mlp.c_fc.weight => tensor([[  0, -73,  85,  ...,   6, -25,  12],\n",
            "        [ 16,  28, -40,  ..., -16, -48, -47],\n",
            "        [-20,  -2,  11,  ...,  23, -13,  -4],\n",
            "        ...,\n",
            "        [ 61,  22,  53,  ...,   3, -48,   0],\n",
            "        [ 69,  11,  45,  ...,  61, -29,  -2],\n",
            "        [ 24, -55, -39,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.5.mlp.c_fc.bias => tensor([-0.1680, -0.1055,  0.2520,  ...,  0.4844,  0.0181,  0.0219],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.5.mlp.c_fc.SCB => tensor([0.1260, 0.1367, 0.1143,  ..., 0.1455, 0.1240, 0.1260], device='cuda:0')\n",
            "transformer.h.5.mlp.c_fc.weight_format => col_turing\n",
            "transformer.h.5.mlp.c_fc2.weight => tensor([[  6,  21, -16,  ...,   5, -53,  23],\n",
            "        [  8, -28,   9,  ...,  36,  10,  22],\n",
            "        [  4, -33, -13,  ...,   4,  30, -19],\n",
            "        ...,\n",
            "        [  0, -26,  32,  ..., -19, -29, -22],\n",
            "        [ 47, -48, -27,  ..., -13,  37, -18],\n",
            "        [-15,  -7,   4,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.5.mlp.c_fc2.bias => tensor([-1.0469, -1.1875, -1.8359,  ...,  0.3242, -1.4141, -1.2656],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.5.mlp.c_fc2.SCB => tensor([0.1260, 0.1191, 0.1216,  ..., 0.0654, 0.1426, 0.1289], device='cuda:0')\n",
            "transformer.h.5.mlp.c_fc2.weight_format => col_turing\n",
            "transformer.h.5.mlp.c_proj.weight => tensor([[ 17, -48,  12,  ...,  36,  40, -67],\n",
            "        [ 52,  50, -21,  ...,  23, -59,  59],\n",
            "        [-34,  16,  17,  ..., -42, -29,  48],\n",
            "        ...,\n",
            "        [  0,   0,   0,  ...,   0,   0,   0],\n",
            "        [ -6, -47,  12,  ...,   0,   0,   0],\n",
            "        [  0,   0,   0,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.5.mlp.c_proj.bias => tensor([-0.6055,  0.5000,  0.2656,  ..., -0.0659,  0.0188, -0.1055],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.5.mlp.c_proj.SCB => tensor([0.1309, 0.1240, 0.1484,  ..., 0.1514, 0.1670, 0.1211], device='cuda:0')\n",
            "transformer.h.5.mlp.c_proj.weight_format => col_turing\n",
            "transformer.h.6.ln_1.weight => tensor([1.5391, 1.7812, 1.8906,  ..., 1.5000, 1.4688, 1.6484], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.6.ln_1.bias => tensor([ 0.0815, -0.7734, -0.3301,  ..., -0.0099,  0.0498,  0.1680],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.6.attn.c_attn.weight => tensor([[ 19,  34,   6,  ...,  72, -26, -84],\n",
            "        [-29,  37, -37,  ..., -14, -58,  16],\n",
            "        [-21,  38,  -1,  ..., -79, -19, -28],\n",
            "        ...,\n",
            "        [-39,  -4,   7,  ...,  29,  18, -64],\n",
            "        [ 20,  -7, -10,  ...,  -3, -55,  42],\n",
            "        [ 47,  -9, -26,  ...,  78,   6, -27]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.6.attn.c_attn.bias => tensor([ 0.2734,  0.0593, -0.1963,  ...,  0.0498,  0.1777,  0.0608],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.6.attn.c_attn.SCB => tensor([0.1289, 0.1309, 0.1152,  ..., 0.0664, 0.1074, 0.0708], device='cuda:0')\n",
            "transformer.h.6.attn.c_attn.weight_format => col_turing\n",
            "transformer.h.6.attn.c_proj.weight => tensor([[  4,  20, -23,  ...,  -9, -10,  13],\n",
            "        [ 13, -21, -14,  ...,  22,  29,  19],\n",
            "        [ 28,  23,  19,  ..., -24,  17,   8],\n",
            "        ...,\n",
            "        [-13, -23,   6,  ..., -24,  12, -12],\n",
            "        [-57,  -2, -29,  ..., -25,  50,  -8],\n",
            "        [ -5,  -9,  -9,  ...,   7,  33,  19]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.6.attn.c_proj.bias => tensor([ 0.2598,  0.1226, -0.1816,  ...,  0.0850,  0.0391,  0.1982],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.6.attn.c_proj.SCB => tensor([0.1250, 0.0962, 0.0962,  ..., 0.0898, 0.0918, 0.1050], device='cuda:0')\n",
            "transformer.h.6.attn.c_proj.weight_format => col_turing\n",
            "transformer.h.6.ln_2.weight => tensor([0.5156, 0.8828, 0.8945,  ..., 0.7578, 0.8320, 1.0078], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.6.ln_2.bias => tensor([ 0.0312, -0.3965, -0.0923,  ..., -0.0024,  0.0253,  0.0718],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.6.mlp.c_fc.weight => tensor([[-41,  10,  20,  ...,  16,  34,  19],\n",
            "        [ 14,  46,  -3,  ..., -29,   1,  -3],\n",
            "        [  6,  18,  28,  ...,  10,  62,  -5],\n",
            "        ...,\n",
            "        [ 11,   1, -17,  ...,  18, -53, -16],\n",
            "        [ -2, -11, -13,  ..., -21,  -4,  24],\n",
            "        [-41,   0, -36,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.6.mlp.c_fc.bias => tensor([-0.0194, -0.2656,  0.1445,  ..., -0.0496, -0.0115,  0.0579],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.6.mlp.c_fc.SCB => tensor([0.1289, 0.1387, 0.1611,  ..., 0.1270, 0.1309, 0.1318], device='cuda:0')\n",
            "transformer.h.6.mlp.c_fc.weight_format => col_turing\n",
            "transformer.h.6.mlp.c_fc2.weight => tensor([[ 20,  44,  13,  ...,  25,  -6,  55],\n",
            "        [-17,   7,  21,  ...,  30, -28, -26],\n",
            "        [-39,  67,   3,  ...,  55,   5, -25],\n",
            "        ...,\n",
            "        [ -1,  -3, -15,  ..., -27,  -1,  21],\n",
            "        [ 13,  23,  42,  ..., -48, -14,  11],\n",
            "        [ 18,  -9, -20,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.6.mlp.c_fc2.bias => tensor([-1.1719, -0.9688, -0.9688,  ..., -1.0859, -0.9141, -1.4141],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.6.mlp.c_fc2.SCB => tensor([0.1133, 0.1206, 0.1177,  ..., 0.0972, 0.1030, 0.1230], device='cuda:0')\n",
            "transformer.h.6.mlp.c_fc2.weight_format => col_turing\n",
            "transformer.h.6.mlp.c_proj.weight => tensor([[ 43, -58,  10,  ...,   9, -35,   3],\n",
            "        [ 36, -22, -33,  ..., -54,   5,  45],\n",
            "        [ 58,  -4,  -7,  ..., -44,  31, -43],\n",
            "        ...,\n",
            "        [  0,   0,   0,  ...,   0,   0,   0],\n",
            "        [ 15,  27, -15,  ...,   0,   0,   0],\n",
            "        [  0,   0,   0,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.6.mlp.c_proj.bias => tensor([-0.4629,  0.4902,  0.2930,  ..., -0.0162,  0.0447,  0.0059],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.6.mlp.c_proj.SCB => tensor([0.1514, 0.1543, 0.1406,  ..., 0.1348, 0.1338, 0.1387], device='cuda:0')\n",
            "transformer.h.6.mlp.c_proj.weight_format => col_turing\n",
            "transformer.h.7.ln_1.weight => tensor([1.6094, 1.8906, 2.0625,  ..., 1.6719, 1.6719, 1.7656], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.7.ln_1.bias => tensor([ 0.0342, -0.7891, -0.3242,  ..., -0.0060,  0.0459,  0.1631],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.7.attn.c_attn.weight => tensor([[ 18,  18, -57,  ..., -33,  -5, -22],\n",
            "        [  8, -57,  -4,  ...,  33,  14,  40],\n",
            "        [ 12,  43, -27,  ..., -25,  -8,  13],\n",
            "        ...,\n",
            "        [ 65,  42, -19,  ...,  38, -12, -55],\n",
            "        [  0, -38, -47,  ...,  24,  11,   5],\n",
            "        [ 24,  63,   9,  ...,  21,   4,  40]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.7.attn.c_attn.bias => tensor([ 0.2070,  0.5586, -1.0469,  ...,  0.3203,  0.1191,  0.0442],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.7.attn.c_attn.SCB => tensor([0.1367, 0.1201, 0.1396,  ..., 0.1201, 0.1416, 0.1289], device='cuda:0')\n",
            "transformer.h.7.attn.c_attn.weight_format => col_turing\n",
            "transformer.h.7.attn.c_proj.weight => tensor([[  28,    1,  -22,  ...,   18,    0,  -13],\n",
            "        [  -2,   -1,  -12,  ...,  -14,  -20,  -37],\n",
            "        [   0,   -1,  -29,  ...,    6,  -10,   -9],\n",
            "        ...,\n",
            "        [  14,  -22,   28,  ...,    4,  -44,   34],\n",
            "        [ -14, -109,    1,  ...,   74,  -17,   23],\n",
            "        [  22,  -51,    5,  ...,   -7,   11,  -17]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.7.attn.c_proj.bias => tensor([-0.1699,  0.4590,  0.2256,  ...,  0.1187, -0.0059, -0.0649],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.7.attn.c_proj.SCB => tensor([0.1221, 0.1021, 0.0928,  ..., 0.0908, 0.0908, 0.1025], device='cuda:0')\n",
            "transformer.h.7.attn.c_proj.weight_format => col_turing\n",
            "transformer.h.7.ln_2.weight => tensor([0.5117, 0.8906, 0.9062,  ..., 0.7422, 0.8203, 0.9961], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.7.ln_2.bias => tensor([ 0.0258, -0.3945, -0.1436,  ..., -0.0069,  0.0115,  0.1074],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.7.mlp.c_fc.weight => tensor([[ 12, -30,  16,  ..., -26,  13,  -9],\n",
            "        [ 43,  60, -30,  ..., -26,  40, -32],\n",
            "        [ 15,  -9,  19,  ..., -43,  16,   0],\n",
            "        ...,\n",
            "        [  1, -75, -19,  ..., -14,  18, -16],\n",
            "        [ 12, -47, -20,  ..., -58,  -5,  44],\n",
            "        [-17, -69,  15,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.7.mlp.c_fc.bias => tensor([-0.0525, -0.1953,  0.0981,  ..., -0.0510, -0.0967,  0.2969],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.7.mlp.c_fc.SCB => tensor([0.1216, 0.1279, 0.1631,  ..., 0.1108, 0.1338, 0.1680], device='cuda:0')\n",
            "transformer.h.7.mlp.c_fc.weight_format => col_turing\n",
            "transformer.h.7.mlp.c_fc2.weight => tensor([[-23,  38,  -7,  ..., -35,  52, -52],\n",
            "        [ 28, -53,  -6,  ...,  29,  82,  -4],\n",
            "        [-12, -47,  69,  ..., -34,  11,  13],\n",
            "        ...,\n",
            "        [ -4, -81,  11,  ..., -22,  12, -19],\n",
            "        [ -8, -49, -47,  ...,  34,  47,  22],\n",
            "        [-49,  55, -16,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.7.mlp.c_fc2.bias => tensor([-1.1328, -1.1875, -1.0312,  ...,  0.7148, -1.0312, -1.4609],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.7.mlp.c_fc2.SCB => tensor([0.1250, 0.1147, 0.1089,  ..., 0.1025, 0.1177, 0.1206], device='cuda:0')\n",
            "transformer.h.7.mlp.c_fc2.weight_format => col_turing\n",
            "transformer.h.7.mlp.c_proj.weight => tensor([[-22,  16,   4,  ...,  29,  48,  18],\n",
            "        [-44,  32,  -8,  ...,  35,  30, -37],\n",
            "        [-16, -14,  29,  ...,  26,  30, -32],\n",
            "        ...,\n",
            "        [  0,   0,   0,  ...,   0,   0,   0],\n",
            "        [ 54,  51, -34,  ...,   0,   0,   0],\n",
            "        [  0,   0,   0,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.7.mlp.c_proj.bias => tensor([-1.2422,  0.5234,  0.3496,  ...,  0.0991,  0.1416, -0.0052],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.7.mlp.c_proj.SCB => tensor([0.1553, 0.1338, 0.1309,  ..., 0.1318, 0.1738, 0.1406], device='cuda:0')\n",
            "transformer.h.7.mlp.c_proj.weight_format => col_turing\n",
            "transformer.h.8.ln_1.weight => tensor([1.4297, 1.7188, 1.8281,  ..., 1.5000, 1.5078, 1.5938], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.8.ln_1.bias => tensor([ 0.1162, -0.7422, -0.2832,  ..., -0.0579,  0.0067,  0.1494],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.8.attn.c_attn.weight => tensor([[-64,  21,  30,  ...,  27,  10,  32],\n",
            "        [  3,  32, -18,  ..., -14, -14,   4],\n",
            "        [ 27, -15,  -7,  ...,  14,  30,   6],\n",
            "        ...,\n",
            "        [-38,  14,  44,  ...,  21,  26,  -9],\n",
            "        [-19, -10, -17,  ...,  32,  50,  47],\n",
            "        [ 10, -52,  31,  ...,  14, -13, -13]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.8.attn.c_attn.bias => tensor([-0.1206, -0.3984,  0.0898,  ..., -0.0593,  0.0515,  0.0952],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.8.attn.c_attn.SCB => tensor([0.1338, 0.1494, 0.1660,  ..., 0.1050, 0.1079, 0.1279], device='cuda:0')\n",
            "transformer.h.8.attn.c_attn.weight_format => col_turing\n",
            "transformer.h.8.attn.c_proj.weight => tensor([[  9,  -1,  28,  ...,  13,  19,  10],\n",
            "        [ 39,   4, -15,  ..., -29,  -5, -38],\n",
            "        [-18,  19,   7,  ...,  -8,   2,   4],\n",
            "        ...,\n",
            "        [-30, -44,   0,  ..., -64,  30,  -3],\n",
            "        [  7,  -1,  -5,  ..., -29, -38, -47],\n",
            "        [-22,  -5,   0,  ...,  34,  10,   8]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.8.attn.c_proj.bias => tensor([ 0.2734,  0.7383,  0.1123,  ..., -0.1709,  0.0674, -0.1973],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.8.attn.c_proj.SCB => tensor([0.1235, 0.0972, 0.0972,  ..., 0.0864, 0.0928, 0.1147], device='cuda:0')\n",
            "transformer.h.8.attn.c_proj.weight_format => col_turing\n",
            "transformer.h.8.ln_2.weight => tensor([0.5195, 0.8867, 0.9102,  ..., 0.7695, 0.8125, 1.0312], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.8.ln_2.bias => tensor([ 0.0099, -0.4277, -0.1226,  ..., -0.0096, -0.0047,  0.1299],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.8.mlp.c_fc.weight => tensor([[-37,   3,  27,  ...,  13,  21,  17],\n",
            "        [ 16,  -2,  35,  ..., -33, -14, -47],\n",
            "        [-14,  19,  15,  ..., -30,   2,   7],\n",
            "        ...,\n",
            "        [-21,  45,   8,  ..., -24,  11, -28],\n",
            "        [ 15,  24, -11,  ..., -38, -37,   4],\n",
            "        [ 18,  -9,  -5,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.8.mlp.c_fc.bias => tensor([-0.0427,  0.2891,  0.0620,  ...,  2.3281, -0.7383,  0.1328],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.8.mlp.c_fc.SCB => tensor([0.1680, 0.1562, 0.1260,  ..., 0.0879, 0.1309, 0.1348], device='cuda:0')\n",
            "transformer.h.8.mlp.c_fc.weight_format => col_turing\n",
            "transformer.h.8.mlp.c_fc2.weight => tensor([[-17, -18,   9,  ..., -24,  18,  53],\n",
            "        [ -2,  19,  -9,  ..., -41,  17,  68],\n",
            "        [-40,  -9,  -4,  ...,  74,  44,  35],\n",
            "        ...,\n",
            "        [  8, -40,  51,  ..., -28, -28, -12],\n",
            "        [ 27,  22, -56,  ...,  41,  20,   8],\n",
            "        [ 70, -25,  -9,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.8.mlp.c_fc2.bias => tensor([-1.1172, -1.3438, -1.2891,  ..., -0.4941, -0.3652, -1.2656],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.8.mlp.c_fc2.SCB => tensor([0.1064, 0.1128, 0.1177,  ..., 0.1143, 0.1099, 0.1245], device='cuda:0')\n",
            "transformer.h.8.mlp.c_fc2.weight_format => col_turing\n",
            "transformer.h.8.mlp.c_proj.weight => tensor([[ 24, -17,   5,  ...,  -8,  16,  21],\n",
            "        [ 13, -17, -48,  ..., -27,  -9, -16],\n",
            "        [ -3,   7,  20,  ...,  13, -23, -10],\n",
            "        ...,\n",
            "        [  0,   0,   0,  ...,   0,   0,   0],\n",
            "        [  1,  31,  13,  ...,   0,   0,   0],\n",
            "        [  0,   0,   0,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.8.mlp.c_proj.bias => tensor([-0.9258,  0.5781,  0.2061,  ..., -0.0283, -0.0243,  0.3496],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.8.mlp.c_proj.SCB => tensor([0.1152, 0.1553, 0.1426,  ..., 0.1533, 0.1318, 0.1328], device='cuda:0')\n",
            "transformer.h.8.mlp.c_proj.weight_format => col_turing\n",
            "transformer.h.9.ln_1.weight => tensor([1.6641, 2.0781, 2.2344,  ..., 1.7656, 1.7891, 1.8516], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.9.ln_1.bias => tensor([ 0.0820, -0.8203, -0.3457,  ..., -0.0569,  0.0393,  0.1699],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.9.attn.c_attn.weight => tensor([[-37,   8,  50,  ..., -25,  24,  15],\n",
            "        [-11,  24, -50,  ...,  11, -61, -14],\n",
            "        [-27, -37,  16,  ...,  13, -18,  52],\n",
            "        ...,\n",
            "        [  1, -26, -34,  ...,  -8,  26,  22],\n",
            "        [-23,  13, -41,  ..., -64, -33, -14],\n",
            "        [ 34,  27,  21,  ..., -41, -60,  -6]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.9.attn.c_attn.bias => tensor([-0.4395, -0.4844, -0.8438,  ..., -0.0304,  0.0082, -0.0062],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.9.attn.c_attn.SCB => tensor([0.1191, 0.1221, 0.1128,  ..., 0.1074, 0.1099, 0.0957], device='cuda:0')\n",
            "transformer.h.9.attn.c_attn.weight_format => col_turing\n",
            "transformer.h.9.attn.c_proj.weight => tensor([[ 18,  -6,  21,  ..., -13, -45,  -3],\n",
            "        [ 21, -46, -26,  ..., -22, -10,   3],\n",
            "        [  2,  22,  50,  ..., -24,  29,  10],\n",
            "        ...,\n",
            "        [-37,  35,  33,  ...,  20,  13, -23],\n",
            "        [  3,  -8,  -2,  ...,  17, -54, -11],\n",
            "        [ -5, -26,  49,  ..., -31,  -9,  -9]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.9.attn.c_proj.bias => tensor([ 0.0042,  0.6445,  0.0703,  ..., -0.0354, -0.1455, -0.2402],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.9.attn.c_proj.SCB => tensor([0.1455, 0.1035, 0.1055,  ..., 0.1099, 0.0918, 0.1201], device='cuda:0')\n",
            "transformer.h.9.attn.c_proj.weight_format => col_turing\n",
            "transformer.h.9.ln_2.weight => tensor([0.5547, 0.9375, 0.9375,  ..., 0.8008, 0.8359, 1.0625], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.9.ln_2.bias => tensor([ 0.0258, -0.3887, -0.1455,  ..., -0.0293,  0.0444,  0.1406],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.9.mlp.c_fc.weight => tensor([[-15,  21, -93,  ..., -22, -15,  28],\n",
            "        [ 41,   2,  58,  ..., -24,  35, -19],\n",
            "        [ 45,  27, -24,  ..., -87,  25,  61],\n",
            "        ...,\n",
            "        [ -9, -35,  19,  ..., -18,  -1, -56],\n",
            "        [-27, -21, -22,  ..., -54, -40, -11],\n",
            "        [ 39,  31,  -2,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.9.mlp.c_fc.bias => tensor([ 0.0273,  0.2559,  0.0071,  ...,  0.0188, -0.0615,  0.1011],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.9.mlp.c_fc.SCB => tensor([0.1426, 0.1611, 0.1582,  ..., 0.1157, 0.1187, 0.1328], device='cuda:0')\n",
            "transformer.h.9.mlp.c_fc.weight_format => col_turing\n",
            "transformer.h.9.mlp.c_fc2.weight => tensor([[-12, -39,   3,  ...,  -6,  26,  27],\n",
            "        [-15,   9, -19,  ...,  17,   8,  31],\n",
            "        [-31,  25,  18,  ..., -18, -30,  -3],\n",
            "        ...,\n",
            "        [-12, -21, 115,  ...,  14,   3, -21],\n",
            "        [-42, -41, -41,  ...,  55, -18, -30],\n",
            "        [ 40,  47, -20,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.9.mlp.c_fc2.bias => tensor([-1.1719, -1.3203, -1.2109,  ..., -0.2539, -0.8398, -1.6875],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.9.mlp.c_fc2.SCB => tensor([0.1235, 0.1279, 0.1436,  ..., 0.1128, 0.1377, 0.1260], device='cuda:0')\n",
            "transformer.h.9.mlp.c_fc2.weight_format => col_turing\n",
            "transformer.h.9.mlp.c_proj.weight => tensor([[-39, -36, -11,  ..., -21,  37,  42],\n",
            "        [ 15,  30,  15,  ...,   2,  -3, -46],\n",
            "        [  3,  43, -17,  ..., -35,  20,   6],\n",
            "        ...,\n",
            "        [  0,   0,   0,  ...,   0,   0,   0],\n",
            "        [ 13, -19, -10,  ...,   0,   0,   0],\n",
            "        [  0,   0,   0,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.9.mlp.c_proj.bias => tensor([-0.7422,  0.7305,  0.8750,  ..., -0.0864,  0.0791,  0.0178],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.9.mlp.c_proj.SCB => tensor([0.1338, 0.1475, 0.1533,  ..., 0.1445, 0.1348, 0.1445], device='cuda:0')\n",
            "transformer.h.9.mlp.c_proj.weight_format => col_turing\n",
            "transformer.h.10.ln_1.weight => tensor([1.6250, 2.0938, 2.3594,  ..., 1.7812, 1.7422, 1.9453], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.10.ln_1.bias => tensor([ 0.1182, -0.8086, -0.3477,  ..., -0.0044,  0.0356,  0.1787],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.10.attn.c_attn.weight => tensor([[-29, -55, -21,  ...,  44, -28,  70],\n",
            "        [ -4,  21,   0,  ...,   8,  64,  -8],\n",
            "        [ -1,  -6, -43,  ...,  18,   5,  25],\n",
            "        ...,\n",
            "        [ 70,  43,  26,  ...,  33,   0,   9],\n",
            "        [-14, -45,   0,  ..., -42, -60,  68],\n",
            "        [-43,   0,  -1,  ..., -13,  25,  38]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.10.attn.c_attn.bias => tensor([-0.0283,  3.3125, -1.5938,  ..., -0.0084,  0.0566, -0.3145],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.10.attn.c_attn.SCB => tensor([0.1328, 0.1240, 0.1436,  ..., 0.1475, 0.1367, 0.1187], device='cuda:0')\n",
            "transformer.h.10.attn.c_attn.weight_format => col_turing\n",
            "transformer.h.10.attn.c_proj.weight => tensor([[ 17, -10,  19,  ..., -16, -24, -31],\n",
            "        [-47, -32, -24,  ...,  17, -12,   1],\n",
            "        [ -6, -31, -17,  ...,   1,  -8, -23],\n",
            "        ...,\n",
            "        [  4, -41,  15,  ...,  36,  17,  28],\n",
            "        [ 11,  25,  17,  ...,  -9,  37,  -5],\n",
            "        [ 23, -15,  14,  ..., -15,   8,  35]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.10.attn.c_proj.bias => tensor([ 0.0557,  0.4082,  0.2402,  ...,  0.0325,  0.0483, -0.0874],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.10.attn.c_proj.SCB => tensor([0.1177, 0.1177, 0.1079,  ..., 0.1040, 0.1001, 0.1172], device='cuda:0')\n",
            "transformer.h.10.attn.c_proj.weight_format => col_turing\n",
            "transformer.h.10.ln_2.weight => tensor([0.5664, 0.9688, 0.9297,  ..., 0.8008, 0.8555, 1.0547], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.10.ln_2.bias => tensor([ 0.0474, -0.3730, -0.1216,  ...,  0.0016,  0.0232,  0.1069],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.10.mlp.c_fc.weight => tensor([[  6, -48, -23,  ...,  10, -22,  26],\n",
            "        [ 38,  41,  38,  ...,  28,  -9,  28],\n",
            "        [ 11, -49, -32,  ...,  21, -36, -38],\n",
            "        ...,\n",
            "        [  2,  51,  48,  ...,  -8,  27,  16],\n",
            "        [-26, -64,   8,  ..., -37,  -6,   3],\n",
            "        [ 25,  13,  88,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.10.mlp.c_fc.bias => tensor([ 0.1069,  0.2080, -0.1348,  ...,  0.2852,  0.1328,  0.0214],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.10.mlp.c_fc.SCB => tensor([0.1660, 0.1309, 0.1885,  ..., 0.1289, 0.1631, 0.1230], device='cuda:0')\n",
            "transformer.h.10.mlp.c_fc.weight_format => col_turing\n",
            "transformer.h.10.mlp.c_fc2.weight => tensor([[ 27, -22,  12,  ...,  31,   6, -15],\n",
            "        [ -5, -68, -32,  ...,  -4, -35,   5],\n",
            "        [-27,  31, -24,  ..., -51,  11, -44],\n",
            "        ...,\n",
            "        [  3, -16,   0,  ..., -33,  43,  17],\n",
            "        [ 42,   2, -17,  ..., -36,  41, -19],\n",
            "        [  9,  40, -31,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.10.mlp.c_fc2.bias => tensor([-1.0703, -0.5938, -1.5625,  ..., -0.8750, -1.0234, -0.8750],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.10.mlp.c_fc2.SCB => tensor([0.1021, 0.1040, 0.1094,  ..., 0.0947, 0.1069, 0.1030], device='cuda:0')\n",
            "transformer.h.10.mlp.c_fc2.weight_format => col_turing\n",
            "transformer.h.10.mlp.c_proj.weight => tensor([[-33,  -2,  20,  ...,  25,  -7,  -3],\n",
            "        [-14,  63,  19,  ...,  23,   1, -24],\n",
            "        [-23, -19,  51,  ..., -13,  31,  17],\n",
            "        ...,\n",
            "        [  0,   0,   0,  ...,   0,   0,   0],\n",
            "        [-14, -41, -37,  ...,   0,   0,   0],\n",
            "        [  0,   0,   0,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.10.mlp.c_proj.bias => tensor([-0.5078,  0.6953,  0.8398,  ...,  0.1631,  0.1816, -0.2148],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.10.mlp.c_proj.SCB => tensor([0.1455, 0.1338, 0.1357,  ..., 0.1787, 0.1475, 0.1543], device='cuda:0')\n",
            "transformer.h.10.mlp.c_proj.weight_format => col_turing\n",
            "transformer.h.11.ln_1.weight => tensor([1.7266, 2.0625, 2.3594,  ..., 1.7969, 1.7266, 1.9844], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.11.ln_1.bias => tensor([ 0.1221, -0.7891, -0.3320,  ...,  0.0194,  0.0603,  0.2100],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.11.attn.c_attn.weight => tensor([[-31, -30, -17,  ...,  12, -20, -41],\n",
            "        [  3,  42,  41,  ...,   5, -25,  34],\n",
            "        [ 11,  44, -64,  ...,  -4,  28,  11],\n",
            "        ...,\n",
            "        [-13,  34,   2,  ..., -16,  16,  -9],\n",
            "        [ 20,   5,  32,  ...,   8, -43, -19],\n",
            "        [-12,  36,  -1,  ...,   4, -19,  20]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.11.attn.c_attn.bias => tensor([ 0.4043,  0.5820,  0.2734,  ..., -0.1211, -0.2383,  0.1147],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.11.attn.c_attn.SCB => tensor([0.1357, 0.1387, 0.1206,  ..., 0.1416, 0.1738, 0.1523], device='cuda:0')\n",
            "transformer.h.11.attn.c_attn.weight_format => col_turing\n",
            "transformer.h.11.attn.c_proj.weight => tensor([[ -39,    2,   -4,  ...,  -27,  -11,    4],\n",
            "        [ -18,   46,  -41,  ...,  -17,  -35,  -32],\n",
            "        [ -26,  -14,  -11,  ...,   18,  -34,  -12],\n",
            "        ...,\n",
            "        [ -10,  -35,    5,  ...,  -51,   31,  -55],\n",
            "        [  20,  -17,  -68,  ...,  -40, -127,  -10],\n",
            "        [  41,   42,   87,  ...,   14,  -11,   -5]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.11.attn.c_proj.bias => tensor([ 0.1670,  0.6211,  0.1660,  ...,  0.3848,  0.0238, -0.4375],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.11.attn.c_proj.SCB => tensor([0.1021, 0.1270, 0.1396,  ..., 0.1094, 0.0991, 0.1484], device='cuda:0')\n",
            "transformer.h.11.attn.c_proj.weight_format => col_turing\n",
            "transformer.h.11.ln_2.weight => tensor([0.6133, 0.9844, 0.9727,  ..., 0.8398, 0.8711, 1.0938], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.11.ln_2.bias => tensor([ 0.0474, -0.3945, -0.1650,  ..., -0.0269,  0.0243,  0.1504],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.11.mlp.c_fc.weight => tensor([[  1, -27,   3,  ..., -11,  -6,  36],\n",
            "        [  9,  22,  25,  ..., -12,  17,  13],\n",
            "        [ 62,  72,   1,  ..., -45,  13,  33],\n",
            "        ...,\n",
            "        [ 55,  -7, -44,  ..., -24,  -4,  14],\n",
            "        [-19,  19, -39,  ...,  32, -20, -17],\n",
            "        [-39,  11,  46,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.11.mlp.c_fc.bias => tensor([-1.3672, -0.8438,  0.9453,  ...,  0.4062,  1.1641, -0.2383],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.11.mlp.c_fc.SCB => tensor([0.1035, 0.1177, 0.1396,  ..., 0.1309, 0.1270, 0.1406], device='cuda:0')\n",
            "transformer.h.11.mlp.c_fc.weight_format => col_turing\n",
            "transformer.h.11.mlp.c_fc2.weight => tensor([[-24, -55, -24,  ..., -20, -47,  -9],\n",
            "        [-34,  10,  31,  ...,   5,  -1,  13],\n",
            "        [-20, -20, -87,  ..., -47,  88,  28],\n",
            "        ...,\n",
            "        [ -7,  11,  20,  ...,  24, -58, -29],\n",
            "        [-18,  40,  10,  ...,   7,   1,  22],\n",
            "        [-65,  47,  10,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.11.mlp.c_fc2.bias => tensor([-0.0432, -0.2812, -0.5586,  ..., -0.4160, -1.5156,  0.5898],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.11.mlp.c_fc2.SCB => tensor([0.1055, 0.1064, 0.1289,  ..., 0.1128, 0.1729, 0.1030], device='cuda:0')\n",
            "transformer.h.11.mlp.c_fc2.weight_format => col_turing\n",
            "transformer.h.11.mlp.c_proj.weight => tensor([[-29,  -1, -12,  ...,  15,  36,  49],\n",
            "        [  9,  25, -10,  ...,  -6, -39, -23],\n",
            "        [ 15,  24,   0,  ...,  17,   4,   9],\n",
            "        ...,\n",
            "        [  0,   0,   0,  ...,   0,   0,   0],\n",
            "        [ 18,  10,  -6,  ...,   0,   0,   0],\n",
            "        [  0,   0,   0,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.11.mlp.c_proj.bias => tensor([-0.1553,  1.0391,  0.6445,  ...,  0.5078,  0.2324, -0.4258],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.11.mlp.c_proj.SCB => tensor([0.1436, 0.1426, 0.1230,  ..., 0.1553, 0.1338, 0.1377], device='cuda:0')\n",
            "transformer.h.11.mlp.c_proj.weight_format => col_turing\n",
            "transformer.h.12.ln_1.weight => tensor([1.7734, 2.3906, 2.5781,  ..., 2.0625, 1.9453, 2.2031], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.12.ln_1.bias => tensor([ 0.1523, -0.8750, -0.4160,  ...,  0.0112,  0.0408,  0.2285],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.12.attn.c_attn.weight => tensor([[-30, -38, -10,  ...,   4,  13,  12],\n",
            "        [ -8,  55,  35,  ..., -30, -22, -11],\n",
            "        [-27,  40, -30,  ...,   3, -27,  -6],\n",
            "        ...,\n",
            "        [ -2, -17,  48,  ..., -26, -23,  -9],\n",
            "        [-38,   8, -24,  ...,   4,  47,  28],\n",
            "        [ 36, -74,   9,  ...,  29,  56,  49]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.12.attn.c_attn.bias => tensor([-0.5234,  0.2832,  0.0142,  ...,  0.2852,  0.1572,  0.0349],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.12.attn.c_attn.SCB => tensor([0.1230, 0.1426, 0.1069,  ..., 0.1533, 0.1631, 0.1328], device='cuda:0')\n",
            "transformer.h.12.attn.c_attn.weight_format => col_turing\n",
            "transformer.h.12.attn.c_proj.weight => tensor([[ -4,   2,  -8,  ...,   4, -27,  30],\n",
            "        [-45,  -5,  17,  ..., -27,  -1, -56],\n",
            "        [ 11, -11, -21,  ...,  17, -10,  26],\n",
            "        ...,\n",
            "        [-48, -30, -79,  ...,  26, -23, -51],\n",
            "        [-34,  -9, -33,  ..., -75, -29, -66],\n",
            "        [-67,  42,  71,  ..., -41, -46,  -4]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.12.attn.c_proj.bias => tensor([ 0.3125,  0.8789,  0.2578,  ...,  0.6914, -0.1079, -0.6328],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.12.attn.c_proj.SCB => tensor([0.1270, 0.1455, 0.1240,  ..., 0.1147, 0.1250, 0.1221], device='cuda:0')\n",
            "transformer.h.12.attn.c_proj.weight_format => col_turing\n",
            "transformer.h.12.ln_2.weight => tensor([0.6680, 1.0391, 1.0156,  ..., 0.8906, 0.9258, 1.1250], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.12.ln_2.bias => tensor([ 0.0530, -0.4414, -0.1670,  ..., -0.0309,  0.0403,  0.1709],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.12.mlp.c_fc.weight => tensor([[-54,  -2,   3,  ...,  -2, -57, -66],\n",
            "        [ -4,  -8,  10,  ..., 106,  16, -27],\n",
            "        [ 26,  24,  27,  ...,  36, -15,  46],\n",
            "        ...,\n",
            "        [ 48,  25, -20,  ...,  -7,  10,  14],\n",
            "        [ 40,  14,  -6,  ..., -46,  25, -36],\n",
            "        [ 58,  71, -38,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.12.mlp.c_fc.bias => tensor([-0.4238, -0.8984, -0.3281,  ...,  0.2061, -0.0933,  0.2539],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.12.mlp.c_fc.SCB => tensor([0.1416, 0.1094, 0.1118,  ..., 0.1187, 0.1602, 0.1328], device='cuda:0')\n",
            "transformer.h.12.mlp.c_fc.weight_format => col_turing\n",
            "transformer.h.12.mlp.c_fc2.weight => tensor([[-46, -54,  12,  ...,  12, -61, -36],\n",
            "        [ 54,  39,  21,  ...,   1,  -5,  47],\n",
            "        [ 37,  21, -79,  ..., -42, -38,  16],\n",
            "        ...,\n",
            "        [-15, -25,  43,  ...,  43,  74,   7],\n",
            "        [-29,  18,  28,  ...,  17, -24, -23],\n",
            "        [ 31,   0,  26,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.12.mlp.c_fc2.bias => tensor([-1.0391, -0.1348,  0.6992,  ...,  0.2676, -0.7617,  0.5547],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.12.mlp.c_fc2.SCB => tensor([0.1108, 0.1069, 0.0938,  ..., 0.1011, 0.1030, 0.1377], device='cuda:0')\n",
            "transformer.h.12.mlp.c_fc2.weight_format => col_turing\n",
            "transformer.h.12.mlp.c_proj.weight => tensor([[-20, -67, -42,  ...,  36,   5,  25],\n",
            "        [-10, -48,  -4,  ...,   1,  57, -11],\n",
            "        [  2,  21, -16,  ..., -25,  -2,  36],\n",
            "        ...,\n",
            "        [  0,   0,   0,  ...,   0,   0,   0],\n",
            "        [ 18, -27, -26,  ...,   0,   0,   0],\n",
            "        [  0,   0,   0,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.12.mlp.c_proj.bias => tensor([-0.1260,  0.9062,  0.7383,  ...,  0.4844,  0.1245, -0.4336],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.12.mlp.c_proj.SCB => tensor([0.1367, 0.1328, 0.1484,  ..., 0.1523, 0.1260, 0.1230], device='cuda:0')\n",
            "transformer.h.12.mlp.c_proj.weight_format => col_turing\n",
            "transformer.h.13.ln_1.weight => tensor([1.8594, 2.4219, 2.6094,  ..., 2.2031, 2.0938, 2.2344], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.13.ln_1.bias => tensor([ 0.1147, -0.9219, -0.4277,  ..., -0.0181,  0.0197,  0.3242],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.13.attn.c_attn.weight => tensor([[ 43,  56, -23,  ...,  43, -38,   3],\n",
            "        [ 22, -54,   8,  ...,  22,  60, -13],\n",
            "        [ 23,  39, -40,  ...,  -5,  10,   6],\n",
            "        ...,\n",
            "        [  7,  22,  18,  ...,  27, -14,  34],\n",
            "        [-16, -32, -16,  ...,  -4,  27,   6],\n",
            "        [-32,  80,  27,  ..., -42,  50,  11]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.13.attn.c_attn.bias => tensor([ 0.0271, -0.2363,  0.0398,  ...,  0.2207,  0.3164,  0.0149],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.13.attn.c_attn.SCB => tensor([0.1289, 0.1226, 0.1172,  ..., 0.1553, 0.1572, 0.1357], device='cuda:0')\n",
            "transformer.h.13.attn.c_attn.weight_format => col_turing\n",
            "transformer.h.13.attn.c_proj.weight => tensor([[-25,  62,  47,  ..., -56,   1, -35],\n",
            "        [-32,  23,  14,  ..., -39,  43,   9],\n",
            "        [ 17,  -2,   8,  ...,  34, -17, -44],\n",
            "        ...,\n",
            "        [ 56, -48,  27,  ..., -35, -24, -33],\n",
            "        [ 26, -20, -13,  ..., -44, -30,  55],\n",
            "        [ 57,  51, -16,  ..., -11,  11,  -1]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.13.attn.c_proj.bias => tensor([ 0.3711,  1.1797,  0.2354,  ...,  0.7148, -0.2832, -0.7383],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.13.attn.c_proj.SCB => tensor([0.1138, 0.1187, 0.1089,  ..., 0.1133, 0.1045, 0.1108], device='cuda:0')\n",
            "transformer.h.13.attn.c_proj.weight_format => col_turing\n",
            "transformer.h.13.ln_2.weight => tensor([0.7227, 1.0625, 1.0391,  ..., 0.9453, 0.9531, 1.1484], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.13.ln_2.bias => tensor([ 0.0315, -0.4707, -0.1436,  ..., -0.0593,  0.0459,  0.2266],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.13.mlp.c_fc.weight => tensor([[ -16,   18,  -77,  ...,    4,   24,   21],\n",
            "        [  13,   18,  -41,  ...,   11,  -23,  -53],\n",
            "        [  55,    2,    3,  ...,    9, -106,   22],\n",
            "        ...,\n",
            "        [  34,    1,  -13,  ...,  -10,   -5,   20],\n",
            "        [ -69,   45,   -2,  ...,   30,    9,   19],\n",
            "        [  -8,    9,   -4,  ...,    0,    0,    0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.13.mlp.c_fc.bias => tensor([-1.5000, -0.5039, -0.7617,  ..., -0.3613, -0.1162, -0.1172],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.13.mlp.c_fc.SCB => tensor([0.1162, 0.1050, 0.1309,  ..., 0.1108, 0.1328, 0.1631], device='cuda:0')\n",
            "transformer.h.13.mlp.c_fc.weight_format => col_turing\n",
            "transformer.h.13.mlp.c_fc2.weight => tensor([[-49,   2,  39,  ...,  59, -29,  -4],\n",
            "        [ 37, -50,  -7,  ..., -14,  -8, -36],\n",
            "        [-10,   0,  43,  ...,   0, -12, -19],\n",
            "        ...,\n",
            "        [ 13, -13, -11,  ..., -66,  18,  48],\n",
            "        [-35,  50, -46,  ..., -67,   6, -17],\n",
            "        [-15, -25, -43,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.13.mlp.c_fc2.bias => tensor([ 0.4668,  0.0500, -0.4453,  ..., -0.3965, -0.9258, -0.7969],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.13.mlp.c_fc2.SCB => tensor([0.1060, 0.1079, 0.1040,  ..., 0.0986, 0.1035, 0.1177], device='cuda:0')\n",
            "transformer.h.13.mlp.c_fc2.weight_format => col_turing\n",
            "transformer.h.13.mlp.c_proj.weight => tensor([[-58, -24,  42,  ...,  27, -21,  35],\n",
            "        [-40, -29, -31,  ..., -38, -11, -27],\n",
            "        [  4,   1,  -9,  ..., -14,  28,  85],\n",
            "        ...,\n",
            "        [  0,   0,   0,  ...,   0,   0,   0],\n",
            "        [-21, -38,  18,  ...,   0,   0,   0],\n",
            "        [  0,   0,   0,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.13.mlp.c_proj.bias => tensor([ 0.1069,  1.0234,  0.5703,  ...,  0.4395,  0.1279, -0.1514],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.13.mlp.c_proj.SCB => tensor([0.1455, 0.1572, 0.1377,  ..., 0.1357, 0.1484, 0.1494], device='cuda:0')\n",
            "transformer.h.13.mlp.c_proj.weight_format => col_turing\n",
            "transformer.h.14.ln_1.weight => tensor([1.9453, 2.3750, 2.7188,  ..., 2.1719, 2.0938, 2.3281], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.14.ln_1.bias => tensor([ 0.0566, -0.9219, -0.3457,  ..., -0.0605,  0.0718,  0.3555],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.14.attn.c_attn.weight => tensor([[ 23,  -2, -19,  ...,  63,  30,   1],\n",
            "        [-42,  26,   5,  ...,   6, -36,  23],\n",
            "        [  3, -32,   7,  ...,   9, -97, -34],\n",
            "        ...,\n",
            "        [-59,   9, -52,  ...,  11, -32,   2],\n",
            "        [ 12,  10, -11,  ...,  15, -17,  19],\n",
            "        [ 80, -43, -13,  ...,  31,  52,   9]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.14.attn.c_attn.bias => tensor([ 0.1523, -0.2891, -0.1138,  ...,  0.1064, -0.1279,  0.0669],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.14.attn.c_attn.SCB => tensor([0.1089, 0.1060, 0.1270,  ..., 0.1260, 0.1348, 0.1309], device='cuda:0')\n",
            "transformer.h.14.attn.c_attn.weight_format => col_turing\n",
            "transformer.h.14.attn.c_proj.weight => tensor([[ 13, -28,  -9,  ..., -34, -50,  -1],\n",
            "        [-19,  12,  35,  ..., -57,  -6,  21],\n",
            "        [ 33,   8,  22,  ..., -38,  41, -27],\n",
            "        ...,\n",
            "        [-60,  76,  -9,  ..., -25,  -9,  19],\n",
            "        [-51, -13,  -4,  ...,  -5,  -6, -12],\n",
            "        [ 12,  22,  17,  ...,  -8,   4,  73]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.14.attn.c_proj.bias => tensor([ 0.2451,  1.2031,  0.9375,  ...,  0.2383, -0.2451, -1.4297],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.14.attn.c_proj.SCB => tensor([0.1104, 0.1377, 0.1113,  ..., 0.1001, 0.1260, 0.1318], device='cuda:0')\n",
            "transformer.h.14.attn.c_proj.weight_format => col_turing\n",
            "transformer.h.14.ln_2.weight => tensor([0.8516, 1.1406, 1.0781,  ..., 1.0391, 1.0391, 1.2188], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.14.ln_2.bias => tensor([ 0.0356, -0.4980, -0.2051,  ..., -0.0347,  0.0540,  0.2969],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.14.mlp.c_fc.weight => tensor([[ 34, -32,  58,  ...,  -5,   5,  -8],\n",
            "        [-11,  43,  -2,  ...,  14,  39, -23],\n",
            "        [ 30,  39,  34,  ..., -23, -13,  48],\n",
            "        ...,\n",
            "        [ 15,  -4,  22,  ...,   6,  10, -12],\n",
            "        [ 25, -45,   1,  ...,  36, -31,  20],\n",
            "        [-25, -10,   3,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.14.mlp.c_fc.bias => tensor([ 0.2734,  0.3379,  0.7461,  ...,  0.0908, -0.1504, -0.2969],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.14.mlp.c_fc.SCB => tensor([0.1406, 0.1104, 0.1641,  ..., 0.1367, 0.1260, 0.1553], device='cuda:0')\n",
            "transformer.h.14.mlp.c_fc.weight_format => col_turing\n",
            "transformer.h.14.mlp.c_fc2.weight => tensor([[ 20,  27,  -6,  ...,  23, -26,  14],\n",
            "        [-16,   4,  -8,  ...,  44, -17,  34],\n",
            "        [ 26,   5,  61,  ...,   6,  -1,  46],\n",
            "        ...,\n",
            "        [ 23,  37, -43,  ...,  15, -47, -16],\n",
            "        [ -2,  24,   8,  ...,   4, -13,   2],\n",
            "        [-23,  48,   7,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.14.mlp.c_fc2.bias => tensor([-1.2344, -0.9609, -1.0625,  ..., -0.4629, -0.9648, -1.3672],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.14.mlp.c_fc2.SCB => tensor([0.1211, 0.1328, 0.1172,  ..., 0.0928, 0.1211, 0.1177], device='cuda:0')\n",
            "transformer.h.14.mlp.c_fc2.weight_format => col_turing\n",
            "transformer.h.14.mlp.c_proj.weight => tensor([[100,  58,  31,  ...,  -3,  23, -22],\n",
            "        [ 42,  33,   9,  ..., -36,  14,  42],\n",
            "        [ 33, -18,  26,  ..., -25,  45,  31],\n",
            "        ...,\n",
            "        [  0,   0,   0,  ...,   0,   0,   0],\n",
            "        [ 15,  13, -23,  ...,   0,   0,   0],\n",
            "        [  0,   0,   0,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.14.mlp.c_proj.bias => tensor([ 0.2305,  1.1953,  0.9570,  ...,  0.1777, -0.2041, -0.6953],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.14.mlp.c_proj.SCB => tensor([0.1260, 0.1367, 0.1885,  ..., 0.1387, 0.1309, 0.1338], device='cuda:0')\n",
            "transformer.h.14.mlp.c_proj.weight_format => col_turing\n",
            "transformer.h.15.ln_1.weight => tensor([2.3438, 2.7031, 2.9844,  ..., 2.5781, 2.5625, 2.6406], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.15.ln_1.bias => tensor([ 0.0486, -1.0156, -0.5508,  ..., -0.0403,  0.0684,  0.4453],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.15.attn.c_attn.weight => tensor([[ 32,  31,  35,  ..., -14,  -1, -18],\n",
            "        [ 73,  20,  23,  ...,  14, -13, -37],\n",
            "        [ -5,  45, -29,  ..., -22, -18, -32],\n",
            "        ...,\n",
            "        [-38, -22,  23,  ..., -18,   1,  12],\n",
            "        [-40,  21, -24,  ...,  23,  -2,  56],\n",
            "        [  4,  27, -19,  ...,  32,   9, -20]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.15.attn.c_attn.bias => tensor([ 0.2363,  0.5430, -0.3340,  ...,  0.0962,  0.0615,  0.1885],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.15.attn.c_attn.SCB => tensor([0.1348, 0.1367, 0.1079,  ..., 0.1387, 0.1885, 0.1592], device='cuda:0')\n",
            "transformer.h.15.attn.c_attn.weight_format => col_turing\n",
            "transformer.h.15.attn.c_proj.weight => tensor([[ -5, -32, -58,  ...,   2,  -4, -36],\n",
            "        [ -1,  45,   6,  ..., -30, -34,  -5],\n",
            "        [ 18,   5,  23,  ...,  14,  32, -10],\n",
            "        ...,\n",
            "        [ 21, -31, -27,  ...,  17,  31,   7],\n",
            "        [  8,  16, -15,  ...,  16, -43,  12],\n",
            "        [  2, -42,  22,  ...,  12,  -1, -65]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.15.attn.c_proj.bias => tensor([-0.0752,  1.3203,  0.8906,  ...,  0.0128, -0.3750, -1.0703],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.15.attn.c_proj.SCB => tensor([0.1196, 0.1279, 0.1348,  ..., 0.1475, 0.1445, 0.1748], device='cuda:0')\n",
            "transformer.h.15.attn.c_proj.weight_format => col_turing\n",
            "transformer.h.15.ln_2.weight => tensor([0.9570, 1.2578, 1.1562,  ..., 1.1406, 1.1719, 1.3438], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.15.ln_2.bias => tensor([ 0.0679, -0.5352, -0.2930,  ..., -0.0306,  0.0537,  0.3418],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.15.mlp.c_fc.weight => tensor([[  8, -10,  31,  ...,   3,  -3,   3],\n",
            "        [ -8,  -3,  53,  ...,  -8,  11,   3],\n",
            "        [ -5, -15,  -8,  ...,  -8,  25,  35],\n",
            "        ...,\n",
            "        [ 37,  40,   6,  ...,  54, -40,  45],\n",
            "        [-34,  15,  -3,  ..., -24, -15,   9],\n",
            "        [  9, -37,  38,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.15.mlp.c_fc.bias => tensor([-1.0078, -0.2012, -0.3906,  ..., -0.5312, -0.4688,  0.1924],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.15.mlp.c_fc.SCB => tensor([0.1338, 0.1143, 0.1367,  ..., 0.1260, 0.1416, 0.1357], device='cuda:0')\n",
            "transformer.h.15.mlp.c_fc.weight_format => col_turing\n",
            "transformer.h.15.mlp.c_fc2.weight => tensor([[ 10,   4,  16,  ...,  44, -31, -33],\n",
            "        [ 18,  38,  16,  ...,  32, -42,  -8],\n",
            "        [ 79,  16,  45,  ...,  16, -13,  43],\n",
            "        ...,\n",
            "        [ -6, -20,   9,  ..., -24,  45,  13],\n",
            "        [  5,  20, -17,  ...,  54,  -4,  -3],\n",
            "        [ 62,  46, -31,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.15.mlp.c_fc2.bias => tensor([-0.2852,  0.7539, -1.9688,  ..., -0.6133, -0.2695, -0.1523],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.15.mlp.c_fc2.SCB => tensor([0.1143, 0.1001, 0.1235,  ..., 0.1138, 0.0986, 0.1079], device='cuda:0')\n",
            "transformer.h.15.mlp.c_fc2.weight_format => col_turing\n",
            "transformer.h.15.mlp.c_proj.weight => tensor([[  15,   -7,   93,  ...,   78,   35,  -34],\n",
            "        [  37,    3,   30,  ...,   32,  -21,  -55],\n",
            "        [  17,  -40,  -16,  ...,  -31,   25, -100],\n",
            "        ...,\n",
            "        [   0,    0,    0,  ...,    0,    0,    0],\n",
            "        [   7,  -18,   -1,  ...,    0,    0,    0],\n",
            "        [   0,    0,    0,  ...,    0,    0,    0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.15.mlp.c_proj.bias => tensor([-0.1074,  1.0156,  0.7539,  ...,  0.0854, -0.3184, -0.4023],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.15.mlp.c_proj.SCB => tensor([0.1348, 0.1367, 0.1396,  ..., 0.1289, 0.1562, 0.1650], device='cuda:0')\n",
            "transformer.h.15.mlp.c_proj.weight_format => col_turing\n",
            "transformer.h.16.ln_1.weight => tensor([2.5938, 2.7188, 2.9219,  ..., 2.7344, 2.6094, 2.6406], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.16.ln_1.bias => tensor([ 0.0132, -0.9961, -0.5586,  ..., -0.1104,  0.1143,  0.4746],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.16.attn.c_attn.weight => tensor([[ 22,  89, -33,  ..., -90,   5, -28],\n",
            "        [ 12, -87,  44,  ..., -36, -82,  47],\n",
            "        [ 48,  -4, -16,  ...,  28,  41,  57],\n",
            "        ...,\n",
            "        [ -3,  60,  14,  ..., -15,   7,  27],\n",
            "        [ 23,  -1, -30,  ...,  19,  -3,   5],\n",
            "        [ -7, -51,  36,  ...,  29,  32,   9]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.16.attn.c_attn.bias => tensor([-0.2871,  0.3301, -0.0664,  ...,  0.1924, -0.0479, -0.0903],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.16.attn.c_attn.SCB => tensor([0.1133, 0.1128, 0.0991,  ..., 0.1680, 0.1621, 0.1611], device='cuda:0')\n",
            "transformer.h.16.attn.c_attn.weight_format => col_turing\n",
            "transformer.h.16.attn.c_proj.weight => tensor([[ -7,   5, -22,  ...,  49,  18,  -4],\n",
            "        [ -9,  38,   4,  ..., -25, -32,  23],\n",
            "        [  4,  15,  28,  ..., -16, -25, -68],\n",
            "        ...,\n",
            "        [-43, -58,  45,  ...,   9,  46,  42],\n",
            "        [ 38, -24,   8,  ...,  -5,  -3,  -7],\n",
            "        [ 58, -14,   2,  ..., -58, -27,  -4]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.16.attn.c_proj.bias => tensor([ 0.1074,  1.2500,  1.2500,  ..., -0.0427, -0.6172, -1.1719],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.16.attn.c_proj.SCB => tensor([0.1484, 0.1270, 0.1357,  ..., 0.1357, 0.1396, 0.1309], device='cuda:0')\n",
            "transformer.h.16.attn.c_proj.weight_format => col_turing\n",
            "transformer.h.16.ln_2.weight => tensor([1.0078, 1.2969, 1.1953,  ..., 1.2266, 1.2109, 1.4062], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.16.ln_2.bias => tensor([ 0.0150, -0.5312, -0.3398,  ..., -0.0337,  0.1182,  0.3711],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.16.mlp.c_fc.weight => tensor([[ -2,  -4,   7,  ...,  30, -47,  40],\n",
            "        [  3, -12, -37,  ..., -34,   4,  40],\n",
            "        [  5,  -2,  28,  ...,  20,  46,  28],\n",
            "        ...,\n",
            "        [ 52, -13,  23,  ..., -25,  36,  33],\n",
            "        [-23,   7, -82,  ...,  60,  43,  16],\n",
            "        [ 10,  -9,  39,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.16.mlp.c_fc.bias => tensor([ 0.5820, -0.6094,  0.4609,  ...,  0.0036, -0.6016, -0.0092],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.16.mlp.c_fc.SCB => tensor([0.1299, 0.1108, 0.1299,  ..., 0.1328, 0.1260, 0.1416], device='cuda:0')\n",
            "transformer.h.16.mlp.c_fc.weight_format => col_turing\n",
            "transformer.h.16.mlp.c_fc2.weight => tensor([[-45, -20,  22,  ..., -22,  18,  -6],\n",
            "        [ 25, -12,  21,  ...,  -1,   7, -11],\n",
            "        [ -5, -59, -17,  ...,  -2, -18, -18],\n",
            "        ...,\n",
            "        [-46,  13,  -5,  ..., -24,  31, -26],\n",
            "        [ 58, -14,  16,  ...,   8,  62,   6],\n",
            "        [ 38,  57,  13,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.16.mlp.c_fc2.bias => tensor([ 0.0135,  0.1040, -0.9453,  ...,  0.3945, -0.4961, -0.7539],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.16.mlp.c_fc2.SCB => tensor([0.1270, 0.1240, 0.1396,  ..., 0.1279, 0.1128, 0.1089], device='cuda:0')\n",
            "transformer.h.16.mlp.c_fc2.weight_format => col_turing\n",
            "transformer.h.16.mlp.c_proj.weight => tensor([[  7, -16, -59,  ...,  12, -13,  10],\n",
            "        [-33, -35,  31,  ..., -83, -11,  -4],\n",
            "        [-12,  17,  -6,  ...,   7, -36, -10],\n",
            "        ...,\n",
            "        [  0,   0,   0,  ...,   0,   0,   0],\n",
            "        [ 23, -14,   8,  ...,   0,   0,   0],\n",
            "        [  0,   0,   0,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.16.mlp.c_proj.bias => tensor([-0.1069,  1.2422,  1.3281,  ..., -0.0088, -0.4102, -0.7930],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.16.mlp.c_proj.SCB => tensor([0.1318, 0.1533, 0.1543,  ..., 0.1357, 0.1377, 0.1406], device='cuda:0')\n",
            "transformer.h.16.mlp.c_proj.weight_format => col_turing\n",
            "transformer.h.17.ln_1.weight => tensor([2.5938, 2.6250, 2.8594,  ..., 2.6875, 2.5312, 2.4375], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.17.ln_1.bias => tensor([ 0.0537, -0.9922, -0.5391,  ..., -0.1138,  0.0583,  0.4805],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.17.attn.c_attn.weight => tensor([[-29, -30,   9,  ..., -19, -23,   4],\n",
            "        [  8,  43,  34,  ..., -34,  63, -25],\n",
            "        [-16, -23,   3,  ..., -37,  31, -17],\n",
            "        ...,\n",
            "        [-44, -31,  22,  ...,  11,   2, -17],\n",
            "        [ -1, -17, -45,  ...,  -5,  64, -17],\n",
            "        [ 41,   1,  60,  ...,  27,   9, -33]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.17.attn.c_attn.bias => tensor([-0.6992,  1.0156, -0.7656,  ..., -0.0496,  0.1777,  0.4277],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.17.attn.c_attn.SCB => tensor([0.1270, 0.1270, 0.1187,  ..., 0.1689, 0.1680, 0.1924], device='cuda:0')\n",
            "transformer.h.17.attn.c_attn.weight_format => col_turing\n",
            "transformer.h.17.attn.c_proj.weight => tensor([[  8,  10, -14,  ..., -21, -30,   5],\n",
            "        [-14,  38, -25,  ...,  39, -57, -11],\n",
            "        [-23,  -6, -30,  ...,  13,  15, -13],\n",
            "        ...,\n",
            "        [-22,  -1,  64,  ..., -45,  23,  -6],\n",
            "        [ -2, -22,  45,  ..., -10,  52, 108],\n",
            "        [  6, -10,  61,  ...,  28, -55, -85]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.17.attn.c_proj.bias => tensor([ 0.0967,  1.3516,  1.3750,  ...,  0.0231, -0.7461, -1.0781],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.17.attn.c_proj.SCB => tensor([0.1104, 0.1455, 0.1357,  ..., 0.1504, 0.1221, 0.1357], device='cuda:0')\n",
            "transformer.h.17.attn.c_proj.weight_format => col_turing\n",
            "transformer.h.17.ln_2.weight => tensor([1.0781, 1.3359, 1.2891,  ..., 1.2969, 1.2891, 1.4766], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.17.ln_2.bias => tensor([ 0.0496, -0.5742, -0.3223,  ..., -0.0396,  0.0845,  0.3984],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.17.mlp.c_fc.weight => tensor([[ 61,  -3, -35,  ...,  31, -14, -53],\n",
            "        [  9, -39,  25,  ...,  -9, -13, -17],\n",
            "        [ 31,  28,  63,  ..., -36,  64, -10],\n",
            "        ...,\n",
            "        [-43, -31,  56,  ...,  23,  11,  -4],\n",
            "        [ 20,   5, -29,  ...,  -3, -19,  12],\n",
            "        [ -4,   0, -96,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.17.mlp.c_fc.bias => tensor([-1.1406,  0.2617,  0.3066,  ..., -1.5000, -0.3281,  0.8359],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.17.mlp.c_fc.SCB => tensor([0.1230, 0.1270, 0.1289,  ..., 0.1836, 0.1582, 0.1426], device='cuda:0')\n",
            "transformer.h.17.mlp.c_fc.weight_format => col_turing\n",
            "transformer.h.17.mlp.c_fc2.weight => tensor([[ -3,  62,  34,  ...,   3, -17,   5],\n",
            "        [-16, -18,   2,  ...,   9,   6,  -9],\n",
            "        [ 21, -33, -44,  ..., -28,  13,  16],\n",
            "        ...,\n",
            "        [ 35,  49,  29,  ...,  -2,   2,  76],\n",
            "        [ 13, -14,  -2,  ..., -12, -59, -82],\n",
            "        [ 24, -59,  22,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.17.mlp.c_fc2.bias => tensor([-0.7188, -0.3926, -1.3984,  ..., -0.8516, -0.9062, -0.9844],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.17.mlp.c_fc2.SCB => tensor([0.1138, 0.1064, 0.1050,  ..., 0.1069, 0.1035, 0.1196], device='cuda:0')\n",
            "transformer.h.17.mlp.c_fc2.weight_format => col_turing\n",
            "transformer.h.17.mlp.c_proj.weight => tensor([[-16,  -1,  10,  ..., -13,   2,  44],\n",
            "        [-22, -24, -10,  ...,  17, -10,  29],\n",
            "        [ -4, -50, -11,  ...,  15,  30, -19],\n",
            "        ...,\n",
            "        [  0,   0,   0,  ...,   0,   0,   0],\n",
            "        [-14,  -8,  24,  ...,   0,   0,   0],\n",
            "        [  0,   0,   0,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.17.mlp.c_proj.bias => tensor([-0.1201,  1.2422,  1.1719,  ...,  0.1719, -0.5078, -0.9961],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.17.mlp.c_proj.SCB => tensor([0.1338, 0.1641, 0.1631,  ..., 0.1416, 0.1299, 0.1465], device='cuda:0')\n",
            "transformer.h.17.mlp.c_proj.weight_format => col_turing\n",
            "transformer.h.18.ln_1.weight => tensor([2.6250, 2.5781, 2.7812,  ..., 2.7188, 2.6406, 2.5469], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.18.ln_1.bias => tensor([ 0.0190, -1.0000, -0.5352,  ..., -0.1084,  0.0908,  0.5625],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.18.attn.c_attn.weight => tensor([[ 10,  43,  16,  ...,   2,  -4, -27],\n",
            "        [-13,  26,   3,  ..., -71, -24,  16],\n",
            "        [ -9,   0,   7,  ..., -64,   4, -12],\n",
            "        ...,\n",
            "        [  6,   0, -18,  ...,   6,   9, -22],\n",
            "        [ 19,  -4, -43,  ...,  20,  81, -59],\n",
            "        [ 32, -18, -36,  ...,  -4, -24,  -7]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.18.attn.c_attn.bias => tensor([-1.5781, -0.3613,  0.0120,  ...,  0.1123,  0.1455,  0.3730],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.18.attn.c_attn.SCB => tensor([0.1064, 0.1064, 0.1133,  ..., 0.1924, 0.1562, 0.1436], device='cuda:0')\n",
            "transformer.h.18.attn.c_attn.weight_format => col_turing\n",
            "transformer.h.18.attn.c_proj.weight => tensor([[ -30,  -37,   54,  ...,  -16,  -13,   38],\n",
            "        [ -33,   39,   13,  ...,  -36,  -32,  -35],\n",
            "        [  -9,    4,    5,  ...,   -5,   19,  -47],\n",
            "        ...,\n",
            "        [ -42,  -30,  -19,  ...,  -29,   56,   12],\n",
            "        [-103,   64,   36,  ...,   -7,  -21,   28],\n",
            "        [  -8,    4,   49,  ...,  -26,  -14,   36]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.18.attn.c_proj.bias => tensor([ 0.0679,  1.4922,  1.3047,  ..., -0.1357, -0.8359, -1.7031],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.18.attn.c_proj.SCB => tensor([0.1445, 0.1128, 0.1426,  ..., 0.1572, 0.1318, 0.1289], device='cuda:0')\n",
            "transformer.h.18.attn.c_proj.weight_format => col_turing\n",
            "transformer.h.18.ln_2.weight => tensor([1.1875, 1.4141, 1.3516,  ..., 1.3672, 1.3906, 1.5312], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.18.ln_2.bias => tensor([ 0.0184, -0.6445, -0.3418,  ..., -0.0515,  0.0762,  0.4609],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.18.mlp.c_fc.weight => tensor([[  8,   2,  58,  ...,  35,  45,   4],\n",
            "        [ 45,  13, -13,  ...,  -8,  -8,  -8],\n",
            "        [-55,  23,  10,  ...,  79, 107,  10],\n",
            "        ...,\n",
            "        [ 22, -23, -21,  ...,  -6,  14,   4],\n",
            "        [-10,  33,   3,  ..., -31,  -6, -19],\n",
            "        [-17,  16,   5,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.18.mlp.c_fc.bias => tensor([-0.3809, -0.8828, -0.0894,  ..., -0.6406,  0.8711,  0.4199],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.18.mlp.c_fc.SCB => tensor([0.1289, 0.1084, 0.1162,  ..., 0.1357, 0.1406, 0.1299], device='cuda:0')\n",
            "transformer.h.18.mlp.c_fc.weight_format => col_turing\n",
            "transformer.h.18.mlp.c_fc2.weight => tensor([[-21,  75, -19,  ...,  42,  36,  13],\n",
            "        [ 11,   3,  97,  ...,  -8,  15,   0],\n",
            "        [ 50,  27,   8,  ...,  39,  -5, -25],\n",
            "        ...,\n",
            "        [ 51, -13, -39,  ..., -16, -24,  -7],\n",
            "        [  1,  41,  37,  ..., -35, -17,  -1],\n",
            "        [ 28,  49,  -6,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.18.mlp.c_fc2.bias => tensor([-1.2344,  0.0386, -0.1924,  ..., -0.3301, -1.6719, -1.1484],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.18.mlp.c_fc2.SCB => tensor([0.1187, 0.1055, 0.1079,  ..., 0.1030, 0.1289, 0.1211], device='cuda:0')\n",
            "transformer.h.18.mlp.c_fc2.weight_format => col_turing\n",
            "transformer.h.18.mlp.c_proj.weight => tensor([[ 32,  10, -18,  ...,   6,  10, -16],\n",
            "        [  4, -13, -66,  ...,   8, -16,  27],\n",
            "        [-41,  -5,   5,  ..., -13,  17,  20],\n",
            "        ...,\n",
            "        [  0,   0,   0,  ...,   0,   0,   0],\n",
            "        [-41, -15, -34,  ...,   0,   0,   0],\n",
            "        [  0,   0,   0,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.18.mlp.c_proj.bias => tensor([ 0.1172,  1.5859,  1.1016,  ..., -0.2217, -0.6289, -1.4688],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.18.mlp.c_proj.SCB => tensor([0.1396, 0.1611, 0.1523,  ..., 0.1436, 0.1406, 0.1348], device='cuda:0')\n",
            "transformer.h.18.mlp.c_proj.weight_format => col_turing\n",
            "transformer.h.19.ln_1.weight => tensor([2.7656, 2.6250, 2.8750,  ..., 2.6719, 2.7344, 2.5156], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.19.ln_1.bias => tensor([-9.2697e-04, -1.0547e+00, -6.1328e-01,  ..., -5.2490e-02,\n",
            "         1.3867e-01,  6.7578e-01], device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.19.attn.c_attn.weight => tensor([[  5,  70,  48,  ...,   4, -24, -22],\n",
            "        [-12, -33, -18,  ..., -26, -16, -26],\n",
            "        [ -6, -15,  19,  ...,  -1,  32,  -7],\n",
            "        ...,\n",
            "        [-39, -22,  63,  ..., -14,  45, -36],\n",
            "        [-42,  20,  -9,  ...,  39,  28, -12],\n",
            "        [-35,  29,  41,  ...,   7, -13,   7]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.19.attn.c_attn.bias => tensor([-0.9141,  0.9297, -0.3223,  ...,  0.5898,  0.6914, -0.0337],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.19.attn.c_attn.SCB => tensor([0.1074, 0.1216, 0.1167,  ..., 0.1553, 0.1514, 0.1484], device='cuda:0')\n",
            "transformer.h.19.attn.c_attn.weight_format => col_turing\n",
            "transformer.h.19.attn.c_proj.weight => tensor([[-24, -18, -23,  ...,   6,  13,  -7],\n",
            "        [ 14,   0, -24,  ...,  25,  20,  20],\n",
            "        [ -8,  20,  18,  ...,  -7, -30, -13],\n",
            "        ...,\n",
            "        [ 81,  36, -44,  ..., -38,  -2, -11],\n",
            "        [ -4, -27,  23,  ...,  10, -50,  -3],\n",
            "        [-12,   7,   9,  ..., -39, -51,  35]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.19.attn.c_proj.bias => tensor([ 0.2109,  1.8047,  1.0859,  ..., -0.4863, -1.0000, -2.1875],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.19.attn.c_proj.SCB => tensor([0.1504, 0.1523, 0.1367,  ..., 0.1279, 0.1338, 0.1318], device='cuda:0')\n",
            "transformer.h.19.attn.c_proj.weight_format => col_turing\n",
            "transformer.h.19.ln_2.weight => tensor([1.2969, 1.5156, 1.4844,  ..., 1.4922, 1.4688, 1.6250], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.19.ln_2.bias => tensor([-0.0167, -0.7109, -0.3691,  ..., -0.0099,  0.1475,  0.6406],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.19.mlp.c_fc.weight => tensor([[-41, -18,  14,  ...,  12,   3,  14],\n",
            "        [-28, -81,   0,  ..., -48,  24,  34],\n",
            "        [ -1, -18, -11,  ..., -22, -48,  33],\n",
            "        ...,\n",
            "        [  3,  26, -14,  ..., -25,  12,  60],\n",
            "        [ 28,   5,   1,  ...,  32,   0, -14],\n",
            "        [ 14,  30,  -5,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.19.mlp.c_fc.bias => tensor([-0.4375, -1.0156,  0.2617,  ...,  0.2363,  0.8203, -1.1719],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.19.mlp.c_fc.SCB => tensor([0.1177, 0.1367, 0.1318,  ..., 0.1128, 0.1211, 0.1787], device='cuda:0')\n",
            "transformer.h.19.mlp.c_fc.weight_format => col_turing\n",
            "transformer.h.19.mlp.c_fc2.weight => tensor([[  58,  -36,   40,  ...,   44,  -34,  -27],\n",
            "        [-113,  -15,  -39,  ...,  -25,   28,   38],\n",
            "        [  24,   14,   14,  ...,    7,  -32,  -10],\n",
            "        ...,\n",
            "        [  15,   44,   -7,  ...,  -14,  -19,  -21],\n",
            "        [   8,    7,  -25,  ...,  -37,   28,   10],\n",
            "        [  63,   12,   -9,  ...,    0,    0,    0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.19.mlp.c_fc2.bias => tensor([-1.7578, -0.3535, -1.7891,  ..., -0.6250, -1.0625, -2.3125],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.19.mlp.c_fc2.SCB => tensor([0.1060, 0.0962, 0.1074,  ..., 0.0967, 0.0996, 0.1128], device='cuda:0')\n",
            "transformer.h.19.mlp.c_fc2.weight_format => col_turing\n",
            "transformer.h.19.mlp.c_proj.weight => tensor([[ 46,  10,  20,  ...,  -9,   4, -24],\n",
            "        [-40,   4, -22,  ...,  53, -16, -56],\n",
            "        [-15, -32,  15,  ...,  29, -10, -13],\n",
            "        ...,\n",
            "        [  0,   0,   0,  ...,   0,   0,   0],\n",
            "        [ 35,  25,  60,  ...,   0,   0,   0],\n",
            "        [  0,   0,   0,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.19.mlp.c_proj.bias => tensor([ 0.0208,  1.9453,  1.2344,  ..., -0.2773, -1.3438, -1.6406],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.19.mlp.c_proj.SCB => tensor([0.1318, 0.1416, 0.1475,  ..., 0.1631, 0.1270, 0.1504], device='cuda:0')\n",
            "transformer.h.19.mlp.c_proj.weight_format => col_turing\n",
            "transformer.h.20.ln_1.weight => tensor([2.8125, 2.6719, 2.8125,  ..., 2.7812, 2.7031, 2.5312], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.20.ln_1.bias => tensor([-0.0156, -1.1406, -0.5664,  ..., -0.0273,  0.1177,  0.7461],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.20.attn.c_attn.weight => tensor([[ -6,  -3,  -8,  ..., -49,  11, -12],\n",
            "        [ -3, -18,  -2,  ...,  22,  13,   4],\n",
            "        [ 50,   5, -32,  ...,   3,  46, -27],\n",
            "        ...,\n",
            "        [-53, -20, -63,  ..., -15,  36, -44],\n",
            "        [ -4, -29, -22,  ..., -67,   1,  41],\n",
            "        [ 35,  25,  -1,  ...,  -1, -35,  22]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.20.attn.c_attn.bias => tensor([-0.1338,  0.4707, -0.4609,  ..., -0.0835,  0.0225,  0.1533],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.20.attn.c_attn.SCB => tensor([0.0952, 0.1182, 0.1064,  ..., 0.1523, 0.1689, 0.1602], device='cuda:0')\n",
            "transformer.h.20.attn.c_attn.weight_format => col_turing\n",
            "transformer.h.20.attn.c_proj.weight => tensor([[  2,  16,   7,  ...,  12, -57, -39],\n",
            "        [ -5,  39,  27,  ...,  -3,   2, -19],\n",
            "        [ -3,  43,  29,  ...,  -6,   5,   7],\n",
            "        ...,\n",
            "        [ 54, -40, -16,  ..., -50,  42, -43],\n",
            "        [ -9,   2,  16,  ...,  85,  31,  -9],\n",
            "        [-29,   5, -41,  ..., -27, -33, -24]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.20.attn.c_proj.bias => tensor([ 0.5742,  1.7812,  0.4805,  ...,  0.1157, -1.4766, -1.8984],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.20.attn.c_proj.SCB => tensor([0.1309, 0.1377, 0.1426,  ..., 0.1562, 0.1230, 0.1328], device='cuda:0')\n",
            "transformer.h.20.attn.c_proj.weight_format => col_turing\n",
            "transformer.h.20.ln_2.weight => tensor([1.4062, 1.6094, 1.5547,  ..., 1.5625, 1.5781, 1.6953], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.20.ln_2.bias => tensor([-0.0703, -0.8203, -0.3223,  ..., -0.0197,  0.1689,  0.6367],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.20.mlp.c_fc.weight => tensor([[ 42,  12, -19,  ..., -54,   5,  53],\n",
            "        [ 59,  -1, -29,  ..., -37,  26, -14],\n",
            "        [  6,  30,  -6,  ..., -62,  -4, -21],\n",
            "        ...,\n",
            "        [-59,  84,   0,  ..., -14,  22,  39],\n",
            "        [-10, -23,  18,  ..., -32, -51,  93],\n",
            "        [ -8,  32,  42,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.20.mlp.c_fc.bias => tensor([ 1.2734, -0.7812,  0.7188,  ...,  0.5234, -0.3730, -0.3359],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.20.mlp.c_fc.SCB => tensor([0.1816, 0.1162, 0.1504,  ..., 0.1553, 0.1309, 0.1738], device='cuda:0')\n",
            "transformer.h.20.mlp.c_fc.weight_format => col_turing\n",
            "transformer.h.20.mlp.c_fc2.weight => tensor([[ -5,  23,  49,  ..., -12,  16,  26],\n",
            "        [-18,  16,  -5,  ...,  23,  10,  15],\n",
            "        [ -7,   4,  30,  ..., -37,  26,  23],\n",
            "        ...,\n",
            "        [-22,   0,  -6,  ...,   9, -27, -20],\n",
            "        [-18,   1,  15,  ..., -16, -15, -34],\n",
            "        [-28,  11,   7,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.20.mlp.c_fc2.bias => tensor([-2.0156, -0.9492, -1.6719,  ..., -2.3594, -1.9141, -1.8359],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.20.mlp.c_fc2.SCB => tensor([0.1465, 0.0986, 0.1445,  ..., 0.1206, 0.1050, 0.1143], device='cuda:0')\n",
            "transformer.h.20.mlp.c_fc2.weight_format => col_turing\n",
            "transformer.h.20.mlp.c_proj.weight => tensor([[-10,  12,  -3,  ..., -52,  15,  32],\n",
            "        [-13,   6, -23,  ...,  -2, -26, -28],\n",
            "        [  2,   5, -21,  ...,   0,  32, -11],\n",
            "        ...,\n",
            "        [  0,   0,   0,  ...,   0,   0,   0],\n",
            "        [ 24, -12,  42,  ...,   0,   0,   0],\n",
            "        [  0,   0,   0,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.20.mlp.c_proj.bias => tensor([ 0.4629,  1.6484,  0.0869,  ...,  0.1660, -1.2188, -1.8672],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.20.mlp.c_proj.SCB => tensor([0.1436, 0.1338, 0.1553,  ..., 0.1445, 0.1562, 0.1602], device='cuda:0')\n",
            "transformer.h.20.mlp.c_proj.weight_format => col_turing\n",
            "transformer.h.21.ln_1.weight => tensor([2.9375, 2.5781, 2.8125,  ..., 2.8125, 2.8438, 2.4844], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.21.ln_1.bias => tensor([-0.0854, -1.1328, -0.5117,  ...,  0.0267,  0.1089,  0.7695],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.21.attn.c_attn.weight => tensor([[  27,   18,  -50,  ...,  -35,  -44,   16],\n",
            "        [ -37,  -23,  -46,  ...,  -28,   19,   20],\n",
            "        [ -20,   20,    4,  ...,  -42,    4,   47],\n",
            "        ...,\n",
            "        [ -37,  -10,   61,  ...,    8,  -41,   17],\n",
            "        [  -9,  -54,   10,  ...,   -4,  -10,  -13],\n",
            "        [-107,   35,  -23,  ...,  -18,  -27,   -4]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.21.attn.c_attn.bias => tensor([-0.5977, -0.1094, -0.3672,  ...,  0.0762,  0.4785,  0.3125],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.21.attn.c_attn.SCB => tensor([0.1133, 0.0938, 0.1289,  ..., 0.1689, 0.1436, 0.1514], device='cuda:0')\n",
            "transformer.h.21.attn.c_attn.weight_format => col_turing\n",
            "transformer.h.21.attn.c_proj.weight => tensor([[-34,  -9, -24,  ...,   7,   8,  -8],\n",
            "        [-32, -14,  27,  ...,  -6, -18,   6],\n",
            "        [ 45,  43, -17,  ...,  23,  31,  21],\n",
            "        ...,\n",
            "        [ 12,   3, -12,  ...,   3, -43,  45],\n",
            "        [ 15,   2,  20,  ...,  18,  25,  -6],\n",
            "        [  7, -28, -20,  ..., -27,  -2,  10]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.21.attn.c_proj.bias => tensor([ 0.8984,  1.0234,  0.4277,  ...,  0.4062, -0.8711, -1.7031],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.21.attn.c_proj.SCB => tensor([0.1699, 0.1768, 0.1611,  ..., 0.1445, 0.1465, 0.1855], device='cuda:0')\n",
            "transformer.h.21.attn.c_proj.weight_format => col_turing\n",
            "transformer.h.21.ln_2.weight => tensor([1.6172, 1.7422, 1.7031,  ..., 1.7188, 1.7188, 1.8516], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.21.ln_2.bias => tensor([-0.0840, -0.7812, -0.2715,  ..., -0.0190,  0.1348,  0.7422],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.21.mlp.c_fc.weight => tensor([[  9,   2, -42,  ..., -25,  11, -14],\n",
            "        [  6, -36, -25,  ...,  16,   9,  -7],\n",
            "        [-14,  14, -42,  ...,   0,  28,  13],\n",
            "        ...,\n",
            "        [-37, -25,  62,  ...,  16, -50,  12],\n",
            "        [  7,  39, -15,  ..., -49,  13, -56],\n",
            "        [-72, -16,  18,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.21.mlp.c_fc.bias => tensor([ 0.1357, -0.7109, -0.0046,  ..., -0.6953, -0.0011,  0.1011],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.21.mlp.c_fc.SCB => tensor([0.1650, 0.1191, 0.1289,  ..., 0.1167, 0.1416, 0.1187], device='cuda:0')\n",
            "transformer.h.21.mlp.c_fc.weight_format => col_turing\n",
            "transformer.h.21.mlp.c_fc2.weight => tensor([[-58, -13,  31,  ...,   2, -28,  53],\n",
            "        [-11, -19, -25,  ..., -51,  -9,  27],\n",
            "        [ -2,   3,  -3,  ..., -12,  13, -71],\n",
            "        ...,\n",
            "        [ 21, -64, -27,  ...,   7, -52, -25],\n",
            "        [ 24,  43,  15,  ...,  53,   9,  18],\n",
            "        [-25,   8,  14,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.21.mlp.c_fc2.bias => tensor([-4.1875, -1.4141, -2.2188,  ..., -1.5312, -2.3125, -1.8672],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.21.mlp.c_fc2.SCB => tensor([0.1357, 0.1099, 0.1143,  ..., 0.0991, 0.1138, 0.0952], device='cuda:0')\n",
            "transformer.h.21.mlp.c_fc2.weight_format => col_turing\n",
            "transformer.h.21.mlp.c_proj.weight => tensor([[  7,  -3,  19,  ...,  23,  11,   1],\n",
            "        [-27,   4,  19,  ...,  25, -10, -34],\n",
            "        [-18,  13,  -3,  ...,  34,  45, -22],\n",
            "        ...,\n",
            "        [  0,   0,   0,  ...,   0,   0,   0],\n",
            "        [-56, -22,  16,  ...,   0,   0,   0],\n",
            "        [  0,   0,   0,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.21.mlp.c_proj.bias => tensor([ 1.0547,  1.1562,  0.3262,  ...,  0.4648, -0.8789, -1.8125],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.21.mlp.c_proj.SCB => tensor([0.1504, 0.1514, 0.1475,  ..., 0.1445, 0.1270, 0.1533], device='cuda:0')\n",
            "transformer.h.21.mlp.c_proj.weight_format => col_turing\n",
            "transformer.h.22.ln_1.weight => tensor([3.2969, 2.7031, 2.9531,  ..., 2.9219, 2.9219, 2.4531], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.22.ln_1.bias => tensor([-0.0908, -1.0938, -0.4590,  ..., -0.0391,  0.1240,  0.7070],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.22.attn.c_attn.weight => tensor([[ 10, -77, -14,  ...,  -6,  12,  -1],\n",
            "        [ 14,  26,  -9,  ..., -16,  18, -48],\n",
            "        [ 22, -12,   1,  ..., -70,  27,  48],\n",
            "        ...,\n",
            "        [  2,  -8,  -7,  ...,   1, -34,  -3],\n",
            "        [-31,  15, -16,  ...,  61,  27, -46],\n",
            "        [ 26, -40,  18,  ..., -47, -12, -22]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.22.attn.c_attn.bias => tensor([ 0.8633,  0.0698, -0.1689,  ..., -0.4688,  0.0645,  0.0781],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.22.attn.c_attn.SCB => tensor([0.1191, 0.1270, 0.1152,  ..., 0.2207, 0.2793, 0.2412], device='cuda:0')\n",
            "transformer.h.22.attn.c_attn.weight_format => col_turing\n",
            "transformer.h.22.attn.c_proj.weight => tensor([[ 35,  34,   5,  ...,   0, -25,  15],\n",
            "        [ 36, -12,  -8,  ..., -41, -28, -45],\n",
            "        [  5, -35,  11,  ...,  26,   9,  46],\n",
            "        ...,\n",
            "        [ 22, -43, -27,  ..., -76,  40,  44],\n",
            "        [-70,   9,  15,  ...,  40,  32,  32],\n",
            "        [-28,  -4,  60,  ...,  11, -56,  27]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.22.attn.c_proj.bias => tensor([ 1.0469,  1.1875,  0.1621,  ...,  0.6484, -0.8789, -1.3750],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.22.attn.c_proj.SCB => tensor([0.1475, 0.1289, 0.1445,  ..., 0.1582, 0.1436, 0.1719], device='cuda:0')\n",
            "transformer.h.22.attn.c_proj.weight_format => col_turing\n",
            "transformer.h.22.ln_2.weight => tensor([1.7344, 1.8594, 1.8203,  ..., 1.8516, 1.8438, 1.9453], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.22.ln_2.bias => tensor([-0.1377, -0.7539, -0.1963,  ..., -0.0430,  0.1543,  0.7383],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.22.mlp.c_fc.weight => tensor([[ 28, -24, -49,  ...,  82,  -9,  -8],\n",
            "        [ 37,  75,   1,  ...,  16, -17,  85],\n",
            "        [ 18,  -4,  53,  ...,   2,  17, -43],\n",
            "        ...,\n",
            "        [ -8,  -8, -10,  ...,  22,  23,  -7],\n",
            "        [ -3, -43,  19,  ..., -49,  11, -25],\n",
            "        [-40,   7,  11,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.22.mlp.c_fc.bias => tensor([-1.2360e-03, -1.0391e+00,  1.8945e-01,  ..., -8.9453e-01,\n",
            "         2.6953e-01, -1.4922e+00], device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.22.mlp.c_fc.SCB => tensor([0.1396, 0.1108, 0.1338,  ..., 0.1279, 0.1777, 0.1260], device='cuda:0')\n",
            "transformer.h.22.mlp.c_fc.weight_format => col_turing\n",
            "transformer.h.22.mlp.c_fc2.weight => tensor([[ 25, -29,  19,  ...,  54,  41, -14],\n",
            "        [-26,  18,  -3,  ...,  17,  12, -15],\n",
            "        [-21,  49,  13,  ...,  14,   5,  32],\n",
            "        ...,\n",
            "        [-24, -18, -15,  ...,   9,  58, -74],\n",
            "        [ 42, -42,  63,  ...,  13,   4,  11],\n",
            "        [ -6, -17, -22,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.22.mlp.c_fc2.bias => tensor([-3.2969, -1.2891, -2.5156,  ..., -1.9375, -2.4688, -2.0156],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.22.mlp.c_fc2.SCB => tensor([0.1299, 0.1138, 0.1328,  ..., 0.1338, 0.1040, 0.0991], device='cuda:0')\n",
            "transformer.h.22.mlp.c_fc2.weight_format => col_turing\n",
            "transformer.h.22.mlp.c_proj.weight => tensor([[ -3,   3,  -9,  ...,  24,  34,  34],\n",
            "        [ 24,  19,   3,  ..., -13, -29,  -7],\n",
            "        [ -4,   7,   2,  ..., -38,  10, -18],\n",
            "        ...,\n",
            "        [  0,   0,   0,  ...,   0,   0,   0],\n",
            "        [ 35,  22, -15,  ...,   0,   0,   0],\n",
            "        [  0,   0,   0,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.22.mlp.c_proj.bias => tensor([ 0.5234,  0.8555, -0.0288,  ...,  0.6680, -0.6484, -1.5547],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.22.mlp.c_proj.SCB => tensor([0.1348, 0.1426, 0.1592,  ..., 0.1514, 0.1406, 0.1426], device='cuda:0')\n",
            "transformer.h.22.mlp.c_proj.weight_format => col_turing\n",
            "transformer.h.23.ln_1.weight => tensor([3.4375, 2.7500, 2.9531,  ..., 2.9531, 2.9688, 2.4688], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.23.ln_1.bias => tensor([-0.0986, -1.1172, -0.4062,  ..., -0.0664,  0.1436,  0.7227],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.23.attn.c_attn.weight => tensor([[ 33,  25,  -5,  ...,  26,  26,   7],\n",
            "        [  1,  10,   1,  ...,  12,  45,  50],\n",
            "        [ -6, -52,  16,  ...,  23,  -9,  15],\n",
            "        ...,\n",
            "        [-14,   7,   8,  ..., -59, -12,   4],\n",
            "        [-10, -21,  19,  ...,  32,  57,  42],\n",
            "        [ 14, -26, -16,  ...,  41,  18,  23]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.23.attn.c_attn.bias => tensor([ 0.5195,  0.1377, -0.5234,  ..., -0.2949, -0.0532, -0.3809],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.23.attn.c_attn.SCB => tensor([0.1064, 0.1138, 0.1147,  ..., 0.1924, 0.1533, 0.1729], device='cuda:0')\n",
            "transformer.h.23.attn.c_attn.weight_format => col_turing\n",
            "transformer.h.23.attn.c_proj.weight => tensor([[-46,   1,   3,  ..., -19,  -8, -40],\n",
            "        [ 35,   2,  32,  ...,  17,   5, -19],\n",
            "        [-31,   5,   1,  ...,  25, -12,  32],\n",
            "        ...,\n",
            "        [ 34,   1, -37,  ...,  -7,  -9, -25],\n",
            "        [ 40, -43, 102,  ...,   6,  26,  -8],\n",
            "        [-18, -13,  76,  ..., -13,  76, -44]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.23.attn.c_proj.bias => tensor([ 0.7109,  0.7461,  0.3867,  ...,  0.7578, -0.8828, -1.3984],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.23.attn.c_proj.SCB => tensor([0.1270, 0.1924, 0.1660,  ..., 0.1416, 0.1777, 0.1445], device='cuda:0')\n",
            "transformer.h.23.attn.c_proj.weight_format => col_turing\n",
            "transformer.h.23.ln_2.weight => tensor([1.8438, 1.9453, 1.9531,  ..., 1.9297, 1.9219, 1.9766], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.23.ln_2.bias => tensor([-0.1914, -0.7734, -0.2109,  ..., -0.0874,  0.1289,  0.7617],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.23.mlp.c_fc.weight => tensor([[-67, -32,   4,  ...,   3,   5,   4],\n",
            "        [ 29,  62, -10,  ...,  10,  23,  15],\n",
            "        [ 11,  11,  27,  ..., -61,  44,  16],\n",
            "        ...,\n",
            "        [  3,  54,   9,  ..., -32,  32,  12],\n",
            "        [-40,  14, -17,  ..., -61,  37, -16],\n",
            "        [ 29,   4, -23,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.23.mlp.c_fc.bias => tensor([ 0.1147, -3.5156,  0.1562,  ..., -0.8242,  0.2275,  0.5195],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.23.mlp.c_fc.SCB => tensor([0.1196, 0.2715, 0.1309,  ..., 0.1270, 0.1318, 0.1191], device='cuda:0')\n",
            "transformer.h.23.mlp.c_fc.weight_format => col_turing\n",
            "transformer.h.23.mlp.c_fc2.weight => tensor([[ -39,  -14,  -47,  ...,   10,  -36, -101],\n",
            "        [ -14,  -20,   31,  ...,   -1,  -10,  -18],\n",
            "        [ -12,  -37,   39,  ...,   69,  -29,   -2],\n",
            "        ...,\n",
            "        [  -8,  -23,  -12,  ...,   28,  -28,    0],\n",
            "        [  35,    6,  -11,  ...,  -18,   22,  -30],\n",
            "        [ -59,   13,   23,  ...,    0,    0,    0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.23.mlp.c_fc2.bias => tensor([-2.5469, -2.5781, -1.7500,  ..., -2.3594, -1.4531, -2.7344],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.23.mlp.c_fc2.SCB => tensor([0.1387, 0.1338, 0.1299,  ..., 0.1123, 0.1089, 0.1069], device='cuda:0')\n",
            "transformer.h.23.mlp.c_fc2.weight_format => col_turing\n",
            "transformer.h.23.mlp.c_proj.weight => tensor([[   3,  -26,   55,  ...,  -39,   -1,  -16],\n",
            "        [ -22,   30,  -54,  ...,   -7,   -7,   61],\n",
            "        [   7,   -9,   16,  ...,  -16,   -4,    1],\n",
            "        ...,\n",
            "        [   0,    0,    0,  ...,    0,    0,    0],\n",
            "        [-101,   39,  -58,  ...,    0,    0,    0],\n",
            "        [   0,    0,    0,  ...,    0,    0,    0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.23.mlp.c_proj.bias => tensor([ 0.3125,  0.9688,  0.4785,  ...,  0.2812, -0.4746, -1.1328],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.23.mlp.c_proj.SCB => tensor([0.1260, 0.1328, 0.1543,  ..., 0.1426, 0.1465, 0.1602], device='cuda:0')\n",
            "transformer.h.23.mlp.c_proj.weight_format => col_turing\n",
            "transformer.h.24.ln_1.weight => tensor([3.5625, 2.8281, 3.0625,  ..., 3.0781, 3.2500, 2.5312], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.24.ln_1.bias => tensor([-0.1670, -1.1016, -0.3223,  ..., -0.1001,  0.2070,  0.7305],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.24.attn.c_attn.weight => tensor([[ -6,  -1, -35,  ...,  39,  49,   6],\n",
            "        [-31, -24, -27,  ...,  23, -39,  18],\n",
            "        [-34,  38,  24,  ...,  17,  23,  43],\n",
            "        ...,\n",
            "        [-10,  -4, -28,  ...,  13,  13,  57],\n",
            "        [ 73, -54, -29,  ...,  22, -35,  36],\n",
            "        [ 31,  34,  37,  ..., -56,   9, -47]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.24.attn.c_attn.bias => tensor([-0.4004, -0.6094, -0.6172,  ..., -0.1436,  0.1738,  0.2129],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.24.attn.c_attn.SCB => tensor([0.1147, 0.1089, 0.1182,  ..., 0.1543, 0.1650, 0.1611], device='cuda:0')\n",
            "transformer.h.24.attn.c_attn.weight_format => col_turing\n",
            "transformer.h.24.attn.c_proj.weight => tensor([[ 13, -16, -20,  ..., -11, -43,  20],\n",
            "        [-36,  20,   4,  ...,   6,  13,   4],\n",
            "        [  3,  22,   3,  ...,  10,  18,   1],\n",
            "        ...,\n",
            "        [ -9,  33,   8,  ..., -54,  54,  -2],\n",
            "        [-89, -33, -45,  ...,  46,   8,  17],\n",
            "        [-29,  -2,  -1,  ...,  38,  82, -47]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.24.attn.c_proj.bias => tensor([ 0.7188,  0.8945,  0.2559,  ...,  0.0364, -0.7969, -1.2812],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.24.attn.c_proj.SCB => tensor([0.1533, 0.1816, 0.1748,  ..., 0.1309, 0.1768, 0.1475], device='cuda:0')\n",
            "transformer.h.24.attn.c_proj.weight_format => col_turing\n",
            "transformer.h.24.ln_2.weight => tensor([1.9297, 2.0156, 2.0156,  ..., 2.0312, 2.0312, 2.0312], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.24.ln_2.bias => tensor([-0.1621, -0.8477, -0.2383,  ..., -0.0408,  0.1562,  0.8203],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.24.mlp.c_fc.weight => tensor([[ 74,  59,   2,  ...,  -5,  11, -59],\n",
            "        [ 51,  39, -14,  ...,  -9, -55,   9],\n",
            "        [ 30,  -5,  -6,  ...,  20,  40, -17],\n",
            "        ...,\n",
            "        [-33,  -3, -41,  ..., -14, -41, -21],\n",
            "        [ 24,  -9, -39,  ...,  12,   5, -51],\n",
            "        [ 17,   5,  43,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.24.mlp.c_fc.bias => tensor([-0.2100, -2.7969,  0.3594,  ...,  0.3828, -0.4824, -0.3320],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.24.mlp.c_fc.SCB => tensor([0.1299, 0.2041, 0.1455,  ..., 0.1152, 0.1221, 0.1631], device='cuda:0')\n",
            "transformer.h.24.mlp.c_fc.weight_format => col_turing\n",
            "transformer.h.24.mlp.c_fc2.weight => tensor([[-61, -42, -58,  ..., -25, -51,  97],\n",
            "        [-49, -36,   2,  ..., -32,   1, -29],\n",
            "        [ 21, -22,  -1,  ..., -47, -36, -10],\n",
            "        ...,\n",
            "        [ 59,  12,  -3,  ...,  48,   2,  40],\n",
            "        [ 43,  16,  24,  ...,  13,  19, -69],\n",
            "        [-18,  31, -75,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.24.mlp.c_fc2.bias => tensor([-2.0938, -1.4141, -1.8281,  ..., -1.4062, -1.5078, -2.1562],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.24.mlp.c_fc2.SCB => tensor([0.1147, 0.1270, 0.1016,  ..., 0.1060, 0.1045, 0.1279], device='cuda:0')\n",
            "transformer.h.24.mlp.c_fc2.weight_format => col_turing\n",
            "transformer.h.24.mlp.c_proj.weight => tensor([[-51, -17, -34,  ...,  10, -31,  32],\n",
            "        [ 27, -34,  10,  ...,  28, -49,  44],\n",
            "        [ 31, -49,  -1,  ..., -66,  59,   1],\n",
            "        ...,\n",
            "        [  0,   0,   0,  ...,   0,   0,   0],\n",
            "        [ 64,  13, -39,  ...,   0,   0,   0],\n",
            "        [  0,   0,   0,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.24.mlp.c_proj.bias => tensor([ 0.4219,  0.6602, -0.1289,  ...,  0.0493, -0.2354, -0.8164],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.24.mlp.c_proj.SCB => tensor([0.1416, 0.1377, 0.1309,  ..., 0.1465, 0.1523, 0.1445], device='cuda:0')\n",
            "transformer.h.24.mlp.c_proj.weight_format => col_turing\n",
            "transformer.h.25.ln_1.weight => tensor([3.5156, 2.8750, 3.0000,  ..., 2.9844, 2.9844, 2.5469], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.25.ln_1.bias => tensor([-0.1377, -1.0625, -0.2695,  ..., -0.1191,  0.1865,  0.7305],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.25.attn.c_attn.weight => tensor([[-29,  27, -26,  ...,   3,  28, -22],\n",
            "        [-40,  43, -45,  ...,  43,  -3, -16],\n",
            "        [-32,   4,  40,  ..., -32,  -3,  31],\n",
            "        ...,\n",
            "        [ -7, -12, -44,  ...,  16, -23,   2],\n",
            "        [ -2,   7, -22,  ..., -48, -23, -38],\n",
            "        [ 24, -38, -49,  ...,  42,  -9,  73]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.25.attn.c_attn.bias => tensor([ 0.3105, -0.4238, -0.5938,  ..., -0.4160,  0.0299,  0.1660],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.25.attn.c_attn.SCB => tensor([0.1147, 0.1030, 0.1084,  ..., 0.1289, 0.1650, 0.1777], device='cuda:0')\n",
            "transformer.h.25.attn.c_attn.weight_format => col_turing\n",
            "transformer.h.25.attn.c_proj.weight => tensor([[-14,  36, -67,  ...,   5,  -2,  10],\n",
            "        [ 13,  18,   5,  ..., -14, -29, -10],\n",
            "        [-13,  -9,   8,  ..., -31, -19,   9],\n",
            "        ...,\n",
            "        [ 24,  16, -14,  ...,  35,  34, -80],\n",
            "        [ 32, -46,   4,  ..., -35, -54,  -6],\n",
            "        [ 10, -16,  15,  ...,  84, -25, -28]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.25.attn.c_proj.bias => tensor([ 0.4727,  0.3086, -0.1035,  ...,  0.3164, -0.4004, -0.4258],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.25.attn.c_proj.SCB => tensor([0.1445, 0.1660, 0.1416,  ..., 0.1484, 0.1660, 0.1729], device='cuda:0')\n",
            "transformer.h.25.attn.c_proj.weight_format => col_turing\n",
            "transformer.h.25.ln_2.weight => tensor([2.0625, 2.0781, 2.0938,  ..., 2.0625, 2.1406, 2.1094], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.25.ln_2.bias => tensor([-0.1680, -0.8008, -0.2158,  ...,  0.0024,  0.0859,  0.6992],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.25.mlp.c_fc.weight => tensor([[ -14,   25,  -22,  ...,   38,  -56,  -58],\n",
            "        [  -3,  -22,   -2,  ...,  -13,  -30,  -26],\n",
            "        [   3,   75,   66,  ...,  -18,  -33,   30],\n",
            "        ...,\n",
            "        [  31,  -63,   28,  ...,  -22,   -5,    6],\n",
            "        [  35,   -8,  -80,  ...,   30,  -59,   -4],\n",
            "        [  -4,   39, -100,  ...,    0,    0,    0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.25.mlp.c_fc.bias => tensor([-0.1177, -3.1250,  0.3008,  ...,  0.0947, -0.7539, -0.0981],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.25.mlp.c_fc.SCB => tensor([0.1377, 0.1230, 0.1133,  ..., 0.1177, 0.1133, 0.1406], device='cuda:0')\n",
            "transformer.h.25.mlp.c_fc.weight_format => col_turing\n",
            "transformer.h.25.mlp.c_fc2.weight => tensor([[-100,    7,   18,  ...,  -45,   10,  -18],\n",
            "        [  -2,   52,   36,  ...,   22,  -42,   29],\n",
            "        [  51,   34,   22,  ...,    5,   58,   -3],\n",
            "        ...,\n",
            "        [  -7,    9,  -36,  ...,   55,   12,  -44],\n",
            "        [  58,   -5,   -4,  ...,   38,  -24,   16],\n",
            "        [  33,  -23,   -5,  ...,    0,    0,    0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.25.mlp.c_fc2.bias => tensor([-2.2031, -0.4395, -1.5000,  ..., -2.1562, -1.7500, -2.4688],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.25.mlp.c_fc2.SCB => tensor([0.1040, 0.1182, 0.1094,  ..., 0.1201, 0.1201, 0.1206], device='cuda:0')\n",
            "transformer.h.25.mlp.c_fc2.weight_format => col_turing\n",
            "transformer.h.25.mlp.c_proj.weight => tensor([[-17, -37,  -1,  ..., -20,   4,  33],\n",
            "        [  6, -31,  36,  ..., -16,  24,  22],\n",
            "        [-11,  -2, -30,  ..., -27,  11,  16],\n",
            "        ...,\n",
            "        [  0,   0,   0,  ...,   0,   0,   0],\n",
            "        [  7, -56,  -2,  ...,   0,   0,   0],\n",
            "        [  0,   0,   0,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.25.mlp.c_proj.bias => tensor([ 0.2578,  0.7461, -0.4531,  ...,  0.5273, -0.3398, -0.5117],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.25.mlp.c_proj.SCB => tensor([0.1338, 0.1357, 0.1338,  ..., 0.1377, 0.1289, 0.1348], device='cuda:0')\n",
            "transformer.h.25.mlp.c_proj.weight_format => col_turing\n",
            "transformer.h.26.ln_1.weight => tensor([3.9219, 3.0000, 3.2500,  ..., 3.2344, 3.2969, 2.5781], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.26.ln_1.bias => tensor([-0.1270, -1.0703, -0.2637,  ..., -0.1367,  0.2383,  0.6016],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.26.attn.c_attn.weight => tensor([[-34, -21,  -8,  ..., -73,   7,   6],\n",
            "        [-46, -48, -38,  ...,   5, -56, -30],\n",
            "        [ 19,   9, -16,  ...,  34,  15, -14],\n",
            "        ...,\n",
            "        [-55, -35,  14,  ...,  53,  51, -12],\n",
            "        [ 32,   5, -28,  ...,  -3,  41,   9],\n",
            "        [-19, -34,   8,  ..., -17, -46,  -9]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.26.attn.c_attn.bias => tensor([ 0.0554, -0.6719, -0.2148,  ...,  0.1240, -0.3496,  0.2715],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.26.attn.c_attn.SCB => tensor([0.1045, 0.1060, 0.1289,  ..., 0.1562, 0.1572, 0.1846], device='cuda:0')\n",
            "transformer.h.26.attn.c_attn.weight_format => col_turing\n",
            "transformer.h.26.attn.c_proj.weight => tensor([[-32,  15,   6,  ...,  59,  36,  -6],\n",
            "        [-15, -25, -14,  ..., -19,   5,  53],\n",
            "        [-14,   2, -21,  ..., -27,  24,  23],\n",
            "        ...,\n",
            "        [ 40,  13, -11,  ...,  22, -27,  -5],\n",
            "        [  2,  13, -80,  ...,  33,   2,  67],\n",
            "        [-22, -17,  27,  ...,  28,  -9, -17]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.26.attn.c_proj.bias => tensor([ 0.9219,  0.0540,  0.0112,  ...,  0.1953, -0.6094, -0.4023],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.26.attn.c_proj.SCB => tensor([0.1602, 0.1748, 0.1826,  ..., 0.1621, 0.1465, 0.1660], device='cuda:0')\n",
            "transformer.h.26.attn.c_proj.weight_format => col_turing\n",
            "transformer.h.26.ln_2.weight => tensor([2.1562, 2.1406, 2.1406,  ..., 2.1406, 2.1094, 2.2031], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.26.ln_2.bias => tensor([-0.1562, -0.7344, -0.1582,  ..., -0.0260,  0.1348,  0.6094],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.26.mlp.c_fc.weight => tensor([[-37,  16,  33,  ..., -62, -14,  32],\n",
            "        [-13, -34,  -8,  ...,  -6,  40,   1],\n",
            "        [ 20, -13,   4,  ...,   5,  51,  -9],\n",
            "        ...,\n",
            "        [  7, -28, -28,  ...,   2, -43,  13],\n",
            "        [-26, -76,  -6,  ..., -74, -12, -49],\n",
            "        [ 20, -66, -30,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.26.mlp.c_fc.bias => tensor([ 0.6250,  0.6523,  0.1152,  ...,  0.7539, -3.9844,  0.8164],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.26.mlp.c_fc.SCB => tensor([0.1553, 0.2295, 0.1289,  ..., 0.1084, 0.1484, 0.1963], device='cuda:0')\n",
            "transformer.h.26.mlp.c_fc.weight_format => col_turing\n",
            "transformer.h.26.mlp.c_fc2.weight => tensor([[-18,   0,  26,  ..., -10,   9,  21],\n",
            "        [ 20, -42,   1,  ..., -42, -20,   4],\n",
            "        [  3,  36,  17,  ...,  36, -48,  34],\n",
            "        ...,\n",
            "        [  6,  11,   7,  ...,  -8, -44, -45],\n",
            "        [ 18,  16,   4,  ...,  30,  16,  -7],\n",
            "        [-21, -12,   3,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.26.mlp.c_fc2.bias => tensor([-1.7812,  0.4082, -1.8516,  ..., -1.0625, -1.2266, -0.3809],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.26.mlp.c_fc2.SCB => tensor([0.1147, 0.1475, 0.1484,  ..., 0.1235, 0.1543, 0.1484], device='cuda:0')\n",
            "transformer.h.26.mlp.c_fc2.weight_format => col_turing\n",
            "transformer.h.26.mlp.c_proj.weight => tensor([[ -3,  27, -22,  ..., -64,  52,  -6],\n",
            "        [  2, -23, -46,  ..., -43, -12, -44],\n",
            "        [ 20, -21,  -3,  ...,  54,   9,  19],\n",
            "        ...,\n",
            "        [  0,   0,   0,  ...,   0,   0,   0],\n",
            "        [ 16, -27, -19,  ...,   0,   0,   0],\n",
            "        [  0,   0,   0,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.26.mlp.c_proj.bias => tensor([ 0.8906, -0.0562, -0.4180,  ...,  0.0854, -0.8594, -0.5625],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.26.mlp.c_proj.SCB => tensor([0.1387, 0.1318, 0.1338,  ..., 0.1494, 0.1406, 0.1494], device='cuda:0')\n",
            "transformer.h.26.mlp.c_proj.weight_format => col_turing\n",
            "transformer.h.27.ln_1.weight => tensor([4.0312, 3.0625, 3.3125,  ..., 3.3281, 3.3906, 2.7031], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.27.ln_1.bias => tensor([-0.2354, -1.0391, -0.1650,  ..., -0.1895,  0.2852,  0.6016],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.27.attn.c_attn.weight => tensor([[ 107,   17,   24,  ...,   54,   47,   41],\n",
            "        [ -30,   51,   -1,  ...,  -19,    0,  -39],\n",
            "        [  30,   28, -117,  ...,   22,  -14,   84],\n",
            "        ...,\n",
            "        [  -9,  -22,   -9,  ...,   58,  -29,   12],\n",
            "        [   8,   10,   68,  ...,   28,  -57,  -10],\n",
            "        [  35,   23,   62,  ...,  -24,   12,   57]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.27.attn.c_attn.bias => tensor([-0.6172, -0.0830, -0.4844,  ..., -0.4141,  0.1396, -0.5312],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.27.attn.c_attn.SCB => tensor([0.1206, 0.1079, 0.1035,  ..., 0.1494, 0.1660, 0.1719], device='cuda:0')\n",
            "transformer.h.27.attn.c_attn.weight_format => col_turing\n",
            "transformer.h.27.attn.c_proj.weight => tensor([[-14,  10,  11,  ...,  27,  46, -52],\n",
            "        [-37, -28, -34,  ...,  16, -45, -18],\n",
            "        [ 15, -38, -50,  ...,  38,  20,  -3],\n",
            "        ...,\n",
            "        [-41,  29,   2,  ..., -14,   1, -27],\n",
            "        [-54,  40, -29,  ...,  35, -55,  19],\n",
            "        [  2,  30,   9,  ..., -10, -68, -11]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.27.attn.c_proj.bias => tensor([ 1.5234, -0.2930, -0.4395,  ...,  0.1553, -1.1641, -0.3438],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.27.attn.c_proj.SCB => tensor([0.1748, 0.1719, 0.1680,  ..., 0.1826, 0.1611, 0.1367], device='cuda:0')\n",
            "transformer.h.27.attn.c_proj.weight_format => col_turing\n",
            "transformer.h.27.ln_2.weight => tensor([2.1875, 2.1719, 2.1875,  ..., 2.2344, 2.2031, 2.2188], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.27.ln_2.bias => tensor([-0.2656, -0.7695,  0.0048,  ..., -0.0415,  0.3184,  0.6016],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.27.mlp.c_fc.weight => tensor([[-17,  10,   3,  ..., -20,  -4,   7],\n",
            "        [ 36,   5, -14,  ..., -18,   6,  40],\n",
            "        [-20,  -9,  -7,  ...,  24,   8, -39],\n",
            "        ...,\n",
            "        [ -4,  30,  -2,  ..., -19,  55,  44],\n",
            "        [ -2,   1,  -7,  ...,  14,   8,  71],\n",
            "        [ 39,  54,  31,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.27.mlp.c_fc.bias => tensor([ 0.4883, -1.4141,  0.3301,  ...,  0.2344, -1.3203,  0.2139],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.27.mlp.c_fc.SCB => tensor([0.1377, 0.1250, 0.1299,  ..., 0.1196, 0.1729, 0.1377], device='cuda:0')\n",
            "transformer.h.27.mlp.c_fc.weight_format => col_turing\n",
            "transformer.h.27.mlp.c_fc2.weight => tensor([[ -54,  -33,   19,  ...,  -30,  -32,   50],\n",
            "        [ -17,  -14,    2,  ...,  -26,  -62,   -1],\n",
            "        [  25,  -20,  -19,  ...,  -71,    1,  -14],\n",
            "        ...,\n",
            "        [ -21,  -42,  -10,  ...,  -17,   21,   39],\n",
            "        [  16,   23,  -41,  ...,  -58,  -16,  -22],\n",
            "        [  53,   70, -103,  ...,    0,    0,    0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.27.mlp.c_fc2.bias => tensor([-1.5625, -0.7500, -1.0781,  ..., -1.8438, -0.7734, -1.9219],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.27.mlp.c_fc2.SCB => tensor([0.1064, 0.1289, 0.1123,  ..., 0.1250, 0.1001, 0.1250], device='cuda:0')\n",
            "transformer.h.27.mlp.c_fc2.weight_format => col_turing\n",
            "transformer.h.27.mlp.c_proj.weight => tensor([[ 14,  31,  43,  ..., -36, -13, -20],\n",
            "        [-21,  -5,  40,  ...,  28, -68, -20],\n",
            "        [ 50,  10,  11,  ..., -49,  -8, -23],\n",
            "        ...,\n",
            "        [  0,   0,   0,  ...,   0,   0,   0],\n",
            "        [-45, -40,  44,  ...,   0,   0,   0],\n",
            "        [  0,   0,   0,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.27.mlp.c_proj.bias => tensor([ 1.2500, -0.4141, -0.7422,  ...,  0.1943, -0.7461, -0.3965],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.27.mlp.c_proj.SCB => tensor([0.1426, 0.1621, 0.1377,  ..., 0.1348, 0.1367, 0.1309], device='cuda:0')\n",
            "transformer.h.27.mlp.c_proj.weight_format => col_turing\n",
            "transformer.h.28.ln_1.weight => tensor([4.1875, 3.3281, 3.4219,  ..., 3.6562, 3.6250, 2.9219], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.28.ln_1.bias => tensor([-0.3750, -0.8750, -0.1836,  ..., -0.2773,  0.3984,  0.6914],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.28.attn.c_attn.weight => tensor([[-22,   1, -25,  ...,  -6,  23,  27],\n",
            "        [ -2,  53,  -1,  ...,  15,  78,   3],\n",
            "        [-48, -40, -16,  ..., -38,  22,  18],\n",
            "        ...,\n",
            "        [-19,  -5,   1,  ..., -30,  61,  30],\n",
            "        [ -4, -39,  34,  ...,  27,  -4,   8],\n",
            "        [-11,  34, -29,  ...,   8,  44,   2]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.28.attn.c_attn.bias => tensor([-0.4961,  0.6328, -0.5898,  ...,  0.0113,  0.5234, -0.7031],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.28.attn.c_attn.SCB => tensor([0.0918, 0.0918, 0.0977,  ..., 0.1650, 0.2256, 0.1514], device='cuda:0')\n",
            "transformer.h.28.attn.c_attn.weight_format => col_turing\n",
            "transformer.h.28.attn.c_proj.weight => tensor([[-39,  62, -45,  ..., -58,  25, -23],\n",
            "        [ -8,  26, -16,  ...,  38,   7,   1],\n",
            "        [-20,  -4,  13,  ...,   8,  -4, -45],\n",
            "        ...,\n",
            "        [-12,  10,  -6,  ...,  46, -51,   7],\n",
            "        [ 17,  31,   9,  ...,  12,   0, -17],\n",
            "        [-53,  29,  40,  ...,   9,  50,  33]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.28.attn.c_proj.bias => tensor([ 1.4219, -0.7969, -0.4102,  ...,  0.1934, -0.6328, -0.2480],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.28.attn.c_proj.SCB => tensor([0.1455, 0.1738, 0.1670,  ..., 0.1631, 0.1709, 0.1504], device='cuda:0')\n",
            "transformer.h.28.attn.c_proj.weight_format => col_turing\n",
            "transformer.h.28.ln_2.weight => tensor([2.2344, 2.2969, 2.2656,  ..., 2.3125, 2.3125, 2.2812], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.28.ln_2.bias => tensor([-0.3613, -0.6680,  0.0625,  ..., -0.0459,  0.3340,  0.4766],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.28.mlp.c_fc.weight => tensor([[-22, -31, -45,  ..., -27,  55,  35],\n",
            "        [ 14,  15, -63,  ..., -32, -27, -13],\n",
            "        [-22, -41,  11,  ..., -62, -18,  21],\n",
            "        ...,\n",
            "        [ 30,  30, -13,  ...,  67, -31,  17],\n",
            "        [ 16,  31,  19,  ...,  64,  16,  24],\n",
            "        [-46, -10, -16,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.28.mlp.c_fc.bias => tensor([ 0.2109, -0.2969,  0.1357,  ...,  0.2285, -0.1729, -0.1396],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.28.mlp.c_fc.SCB => tensor([0.1475, 0.1338, 0.1230,  ..., 0.1445, 0.1367, 0.1289], device='cuda:0')\n",
            "transformer.h.28.mlp.c_fc.weight_format => col_turing\n",
            "transformer.h.28.mlp.c_fc2.weight => tensor([[ 43, -12,  36,  ...,  -4, -33, -16],\n",
            "        [-24,  21,  35,  ...,   6, -52, -28],\n",
            "        [ -5,  30,  -4,  ..., -18,  37, -30],\n",
            "        ...,\n",
            "        [-37, -58,  -4,  ..., -31,  16,   0],\n",
            "        [  8,   7, -34,  ...,  -9,  21, -35],\n",
            "        [  4,  33,  25,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.28.mlp.c_fc2.bias => tensor([-1.9062, -0.3301, -1.3984,  ..., -1.8281, -1.4453, -1.7891],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.28.mlp.c_fc2.SCB => tensor([0.1245, 0.0957, 0.1152,  ..., 0.1191, 0.1064, 0.1123], device='cuda:0')\n",
            "transformer.h.28.mlp.c_fc2.weight_format => col_turing\n",
            "transformer.h.28.mlp.c_proj.weight => tensor([[-81,  47,  -2,  ...,  14,   1, -25],\n",
            "        [ 10,   5,  14,  ..., -18,  -2,  34],\n",
            "        [-20,  48, -14,  ...,   9,  37,   5],\n",
            "        ...,\n",
            "        [  0,   0,   0,  ...,   0,   0,   0],\n",
            "        [-59, -84,  53,  ...,   0,   0,   0],\n",
            "        [  0,   0,   0,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.28.mlp.c_proj.bias => tensor([ 1.5234, -0.2832, -0.1338,  ...,  0.3477, -0.5781, -0.0306],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.28.mlp.c_proj.SCB => tensor([0.1406, 0.1475, 0.1387,  ..., 0.1650, 0.1641, 0.1562], device='cuda:0')\n",
            "transformer.h.28.mlp.c_proj.weight_format => col_turing\n",
            "transformer.h.29.ln_1.weight => tensor([3.9688, 3.3125, 3.4219,  ..., 3.5781, 3.6406, 3.0312], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.29.ln_1.bias => tensor([-0.4258, -0.7930, -0.0091,  ..., -0.1582,  0.6172,  0.5469],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.29.attn.c_attn.weight => tensor([[ 19,  24, -39,  ..., -28,  10,  23],\n",
            "        [-28,  -2,  26,  ...,  11, -47,  23],\n",
            "        [-51,  17,  24,  ..., -15, -14,  23],\n",
            "        ...,\n",
            "        [ -4,  54, -35,  ..., -30,  10,  67],\n",
            "        [ 22, -18,  20,  ...,  18, -29,   0],\n",
            "        [ 22,   4,  13,  ..., -23,  35,   4]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.29.attn.c_attn.bias => tensor([ 0.8984, -0.8164, -2.5312,  ...,  0.0271, -0.0232,  0.5742],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.29.attn.c_attn.SCB => tensor([0.1021, 0.1011, 0.1113,  ..., 0.1338, 0.1543, 0.1572], device='cuda:0')\n",
            "transformer.h.29.attn.c_attn.weight_format => col_turing\n",
            "transformer.h.29.attn.c_proj.weight => tensor([[ -8,   3, -40,  ..., -28,  18, -20],\n",
            "        [  8,  13,  20,  ..., -63, -29,  19],\n",
            "        [-13,  42, -70,  ...,   6, -14,  38],\n",
            "        ...,\n",
            "        [ 40, -24,  49,  ...,   0,  29,  46],\n",
            "        [  4,  -5,  15,  ..., -40, -27, -10],\n",
            "        [-14,  12,   9,  ...,  -8,  10,  40]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.29.attn.c_proj.bias => tensor([ 1.2031, -0.5664, -0.4355,  ...,  0.4062, -0.3965, -0.2090],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.29.attn.c_proj.SCB => tensor([0.1689, 0.1494, 0.1406,  ..., 0.1309, 0.1406, 0.1494], device='cuda:0')\n",
            "transformer.h.29.attn.c_proj.weight_format => col_turing\n",
            "transformer.h.29.ln_2.weight => tensor([2.3438, 2.3438, 2.3594,  ..., 2.3750, 2.3594, 2.3594], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.29.ln_2.bias => tensor([-0.4062, -0.5508,  0.0952,  ..., -0.0476,  0.3281,  0.4297],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.29.mlp.c_fc.weight => tensor([[-28,  35, -77,  ...,  17,  35,  78],\n",
            "        [-69, -17,   9,  ...,  47, -17, -21],\n",
            "        [-12, -44, -36,  ..., -52,  49,  71],\n",
            "        ...,\n",
            "        [ 50, -13, -21,  ...,  12,  19, -17],\n",
            "        [-58,  42,  11,  ...,  26, -12,  16],\n",
            "        [ 21,  13,  16,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.29.mlp.c_fc.bias => tensor([ 0.2158, -0.6719,  0.2520,  ..., -0.1846,  0.2090,  0.2910],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.29.mlp.c_fc.SCB => tensor([0.1182, 0.1299, 0.1338,  ..., 0.1226, 0.1167, 0.1152], device='cuda:0')\n",
            "transformer.h.29.mlp.c_fc.weight_format => col_turing\n",
            "transformer.h.29.mlp.c_fc2.weight => tensor([[ -38,   21,   30,  ...,  -62,  -24,   58],\n",
            "        [-100,   59,   19,  ...,  -31,  -50,   12],\n",
            "        [  -2,   53,   -1,  ...,  -19,  -11,   18],\n",
            "        ...,\n",
            "        [  10,  -26,   60,  ...,  -35,  -60,   -6],\n",
            "        [ -76,   21,   14,  ...,   -3,   20,    7],\n",
            "        [  17,   22,  -42,  ...,    0,    0,    0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.29.mlp.c_fc2.bias => tensor([-1.3828, -0.5703, -0.9961,  ..., -1.9219, -0.9766, -1.3203],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.29.mlp.c_fc2.SCB => tensor([0.1187, 0.0996, 0.1099,  ..., 0.1128, 0.1099, 0.1084], device='cuda:0')\n",
            "transformer.h.29.mlp.c_fc2.weight_format => col_turing\n",
            "transformer.h.29.mlp.c_proj.weight => tensor([[-18,  44, -36,  ...,   4,  -8, -64],\n",
            "        [ 17, -11,  17,  ..., -59,   7,   5],\n",
            "        [ 80,  10,  26,  ..., -57,  18,  24],\n",
            "        ...,\n",
            "        [  0,   0,   0,  ...,   0,   0,   0],\n",
            "        [ -8, -18,   4,  ...,   0,   0,   0],\n",
            "        [  0,   0,   0,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.29.mlp.c_proj.bias => tensor([ 0.3398, -0.4414, -0.4668,  ...,  0.2178, -0.3008, -0.1660],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.29.mlp.c_proj.SCB => tensor([0.1631, 0.1377, 0.1338,  ..., 0.1338, 0.1387, 0.1484], device='cuda:0')\n",
            "transformer.h.29.mlp.c_proj.weight_format => col_turing\n",
            "transformer.h.30.ln_1.weight => tensor([4.0938, 3.6406, 3.8594,  ..., 3.6875, 3.9062, 3.2656], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.30.ln_1.bias => tensor([-0.3477, -0.7930, -0.0114,  ..., -0.3301,  0.4824,  0.6367],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.30.attn.c_attn.weight => tensor([[  8,  -8,  24,  ...,  -8, -15,   4],\n",
            "        [-18,  51,  13,  ...,  66,  -8, -37],\n",
            "        [-48, -14,  48,  ...,  27, -32,  60],\n",
            "        ...,\n",
            "        [-40,   0,   4,  ...,   0, -17, -48],\n",
            "        [-33,  51,  13,  ..., -43, -77,  -2],\n",
            "        [ 16,  35, -29,  ...,  -4,  72, -16]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.30.attn.c_attn.bias => tensor([ 1.1484,  0.7109, -0.6484,  ..., -0.1021, -0.1689, -0.0566],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.30.attn.c_attn.SCB => tensor([0.1133, 0.0894, 0.1099,  ..., 0.1982, 0.1533, 0.2070], device='cuda:0')\n",
            "transformer.h.30.attn.c_attn.weight_format => col_turing\n",
            "transformer.h.30.attn.c_proj.weight => tensor([[ -2,  30,  -7,  ...,  19,  -5,   6],\n",
            "        [ 18,   9, -38,  ...,  -6,  -3,  10],\n",
            "        [  6, -10,  22,  ...,   2,  16,  15],\n",
            "        ...,\n",
            "        [  9,  16, -13,  ...,  23,   2,  -2],\n",
            "        [-34,  20, -11,  ...,  34, -12,  -2],\n",
            "        [ 33,  10, -33,  ...,  67, -32,   6]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.30.attn.c_proj.bias => tensor([ 0.3906, -0.4414, -0.6406,  ...,  0.4863, -0.3809, -0.3574],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.30.attn.c_proj.SCB => tensor([0.1641, 0.1729, 0.1475,  ..., 0.1699, 0.1914, 0.1406], device='cuda:0')\n",
            "transformer.h.30.attn.c_proj.weight_format => col_turing\n",
            "transformer.h.30.ln_2.weight => tensor([2.4062, 2.3594, 2.3750,  ..., 2.4062, 2.3438, 2.4375], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.30.ln_2.bias => tensor([-0.2910, -0.5039, -0.0723,  ..., -0.1406,  0.5391,  0.6016],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.30.mlp.c_fc.weight => tensor([[-60,  49,  -2,  ..., -15,  -8,  41],\n",
            "        [ -2,  41,  19,  ...,  31,   0, 116],\n",
            "        [-19,  16, -15,  ...,  -7,  -8,  19],\n",
            "        ...,\n",
            "        [ 31, -28, -63,  ..., -21,  20,  18],\n",
            "        [ 42, -49, -14,  ...,  19,  57, -14],\n",
            "        [-21,  18,  46,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.30.mlp.c_fc.bias => tensor([1.3438, 2.2500, 1.2734,  ..., 1.8828, 0.1104, 3.2188], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.30.mlp.c_fc.SCB => tensor([0.1494, 0.1504, 0.1348,  ..., 0.1406, 0.1201, 0.2256], device='cuda:0')\n",
            "transformer.h.30.mlp.c_fc.weight_format => col_turing\n",
            "transformer.h.30.mlp.c_fc2.weight => tensor([[ 24, -32,  39,  ...,  35, -16,  26],\n",
            "        [-24,  45, -15,  ..., -40,  10,  16],\n",
            "        [-43, -25, -25,  ..., -76, -16,  13],\n",
            "        ...,\n",
            "        [  1, -40,  72,  ..., -33,  83, -31],\n",
            "        [ -1, -28,   3,  ...,  79,   2,  27],\n",
            "        [ 46,  65, -15,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.30.mlp.c_fc2.bias => tensor([-4.0000, -1.8516, -1.1328,  ..., -1.6719, -0.7422, -3.0156],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.30.mlp.c_fc2.SCB => tensor([0.1172, 0.1030, 0.0991,  ..., 0.0962, 0.1104, 0.1455], device='cuda:0')\n",
            "transformer.h.30.mlp.c_fc2.weight_format => col_turing\n",
            "transformer.h.30.mlp.c_proj.weight => tensor([[ 10,  31,   8,  ...,  23,  36,   8],\n",
            "        [-17,   9, -13,  ..., -21, -31,   7],\n",
            "        [  0, -41, -16,  ...,  32, -32,  -8],\n",
            "        ...,\n",
            "        [  0,   0,   0,  ...,   0,   0,   0],\n",
            "        [  5, -27, -23,  ...,   0,   0,   0],\n",
            "        [  0,   0,   0,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.30.mlp.c_proj.bias => tensor([-0.9805, -0.2852, -0.3398,  ...,  0.3730,  0.3457,  0.0674],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.30.mlp.c_proj.SCB => tensor([0.1738, 0.1387, 0.1357,  ..., 0.1416, 0.1514, 0.1406], device='cuda:0')\n",
            "transformer.h.30.mlp.c_proj.weight_format => col_turing\n",
            "transformer.h.31.ln_1.weight => tensor([3.5312, 3.1562, 3.3594,  ..., 3.4219, 3.3125, 3.0156], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.31.ln_1.bias => tensor([-0.1592, -0.5352,  0.0977,  ..., -0.1797,  0.4180,  0.7227],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.31.attn.c_attn.weight => tensor([[ 26,  24, -32,  ..., -24, -15, -52],\n",
            "        [ -3,  50, -30,  ...,  23,  17, -41],\n",
            "        [-62, -12,  22,  ...,  63,   7,  64],\n",
            "        ...,\n",
            "        [  1, -33,   7,  ...,   3, -62,  13],\n",
            "        [ 45,  29,   1,  ...,  15, -11,   0],\n",
            "        [-35,  24,  38,  ...,   7,  15, -96]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.31.attn.c_attn.bias => tensor([-0.0337,  2.6250,  0.9219,  ...,  0.1709, -0.8320,  0.0928],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.31.attn.c_attn.SCB => tensor([0.1172, 0.0913, 0.1367,  ..., 0.1318, 0.1187, 0.1196], device='cuda:0')\n",
            "transformer.h.31.attn.c_attn.weight_format => col_turing\n",
            "transformer.h.31.attn.c_proj.weight => tensor([[-38,   8,  -9,  ...,  21,   0, -40],\n",
            "        [-16,  22,   8,  ...,   6,  42,  22],\n",
            "        [ 21,  38,  26,  ...,  52,  25,  20],\n",
            "        ...,\n",
            "        [ -6,  -8, -22,  ...,   6, -33,  51],\n",
            "        [-26,  39,  16,  ..., -38, -19,  11],\n",
            "        [-14,   2, -14,  ...,  12,  75, -32]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.31.attn.c_proj.bias => tensor([-0.5234, -0.1904,  0.1279,  ...,  0.7969,  0.3262, -0.4473],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.31.attn.c_proj.SCB => tensor([0.1621, 0.1621, 0.1426,  ..., 0.1572, 0.1367, 0.1309], device='cuda:0')\n",
            "transformer.h.31.attn.c_proj.weight_format => col_turing\n",
            "transformer.h.31.ln_2.weight => tensor([2.1562, 2.3125, 2.2500,  ..., 2.3281, 2.2188, 2.3438], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.h.31.ln_2.bias => tensor([ 0.0317, -0.3359,  0.0957,  ..., -0.2617,  0.2051,  0.3086],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.31.mlp.c_fc.weight => tensor([[ 79,  10, -29,  ..., -42, -70,  -7],\n",
            "        [  3,  46,  41,  ...,  -3,  10, -19],\n",
            "        [  0, -28, -34,  ...,   3,  -1, -28],\n",
            "        ...,\n",
            "        [ 22, -31, -20,  ...,  11, -11, -26],\n",
            "        [ 13,  60, -12,  ...,  20,  -7, -27],\n",
            "        [ 16,  28, -20,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.31.mlp.c_fc.bias => tensor([-0.8828,  3.7969,  7.2188,  ...,  2.3906, -6.4062,  3.0781],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.31.mlp.c_fc.SCB => tensor([0.1250, 0.1836, 0.2041,  ..., 0.1416, 0.1758, 0.2734], device='cuda:0')\n",
            "transformer.h.31.mlp.c_fc.weight_format => col_turing\n",
            "transformer.h.31.mlp.c_fc2.weight => tensor([[-26,  22,  33,  ...,  -6,  29, -30],\n",
            "        [ 40,  33, -29,  ..., -50,  -7, -21],\n",
            "        [ 31,   0, -12,  ..., -28,   5,  -4],\n",
            "        ...,\n",
            "        [-47,  -4,  52,  ...,   0,  -1, -23],\n",
            "        [-18,  55, -16,  ...,  31,  23,  -3],\n",
            "        [-37,  39, -36,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.31.mlp.c_fc2.bias => tensor([-6.3750, -2.6875, -2.3906,  ..., -1.5391, -2.8750, -2.5781],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.31.mlp.c_fc2.SCB => tensor([0.1191, 0.1245, 0.2314,  ..., 0.1079, 0.1973, 0.0991], device='cuda:0')\n",
            "transformer.h.31.mlp.c_fc2.weight_format => col_turing\n",
            "transformer.h.31.mlp.c_proj.weight => tensor([[-14,  -7,   4,  ..., -21,   5,   3],\n",
            "        [-20,  15,  38,  ...,  -7,  32, -42],\n",
            "        [-20,  -1,   7,  ...,  -7,   2,  17],\n",
            "        ...,\n",
            "        [  0,   0,   0,  ...,   0,   0,   0],\n",
            "        [ 44, -28,  24,  ...,   0,   0,   0],\n",
            "        [  0,   0,   0,  ...,   0,   0,   0]], device='cuda:0',\n",
            "       dtype=torch.int8)\n",
            "transformer.h.31.mlp.c_proj.bias => tensor([-0.2793, -0.0566, -0.2539,  ...,  0.3887,  0.1572,  1.1953],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.h.31.mlp.c_proj.SCB => tensor([0.1680, 0.1562, 0.1885,  ..., 0.1738, 0.1426, 0.1416], device='cuda:0')\n",
            "transformer.h.31.mlp.c_proj.weight_format => col_turing\n",
            "transformer.ln_f.weight => tensor([3.6094, 3.5781, 3.5781,  ..., 3.5156, 3.6562, 3.5938], device='cuda:0',\n",
            "       dtype=torch.bfloat16)\n",
            "transformer.ln_f.bias => tensor([ 0.0508, -0.2422, -0.0815,  ...,  0.0427,  0.0703,  0.5703],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n",
            "transformer.relative_pe.slopes => tensor([[0.8398],\n",
            "        [0.7070],\n",
            "        [0.5938],\n",
            "        [0.5000],\n",
            "        [0.4199],\n",
            "        [0.3535],\n",
            "        [0.2969],\n",
            "        [0.2500],\n",
            "        [0.2100],\n",
            "        [0.1768],\n",
            "        [0.1484],\n",
            "        [0.1250],\n",
            "        [0.1050],\n",
            "        [0.0884],\n",
            "        [0.0742],\n",
            "        [0.0625],\n",
            "        [0.0525],\n",
            "        [0.0442],\n",
            "        [0.0371],\n",
            "        [0.0312],\n",
            "        [0.0262],\n",
            "        [0.0221],\n",
            "        [0.0186],\n",
            "        [0.0156],\n",
            "        [0.0131],\n",
            "        [0.0110],\n",
            "        [0.0093],\n",
            "        [0.0078],\n",
            "        [0.0066],\n",
            "        [0.0055],\n",
            "        [0.0046],\n",
            "        [0.0039]], device='cuda:0', dtype=torch.bfloat16)\n",
            "lm_head.weight => tensor([[ 0.0200,  0.0442,  0.0562,  ...,  0.0173, -0.0238, -0.0889],\n",
            "        [-0.0259,  0.0170, -0.0221,  ..., -0.0752, -0.0635,  0.0947],\n",
            "        [-0.0276,  0.1846,  0.1533,  ..., -0.0195,  0.0299,  0.0796],\n",
            "        ...,\n",
            "        [ 0.1182,  0.1523,  0.0742,  ..., -0.1162,  0.0177,  0.0991],\n",
            "        [ 0.0220, -0.0579,  0.0125,  ..., -0.0576,  0.0327,  0.0211],\n",
            "        [ 0.0508, -0.0217,  0.0278,  ..., -0.0308, -0.0378,  0.0013]],\n",
            "       device='cuda:0', dtype=torch.bfloat16)\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import sys\n",
        "import os\n",
        "import struct\n",
        "import json\n",
        "\n",
        "import torch\n",
        "from transformers import AutoConfig\n",
        "\n",
        "config = AutoConfig.from_pretrained(\"cerebras/btlm-3b-8k-base\", trust_remote_code=True)\n",
        "hparams = config.to_dict()\n",
        "fname_out = \"btlm-3b.ggml.bin\"\n",
        "\n",
        "print(json.dumps(hparams, indent=4, sort_keys=True))\n",
        "\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "5OGkZ-a8hyui",
        "outputId": "ef706529-5bd8-4183-f43d-4c8d7eb44f23"
      },
      "execution_count": 5,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "{\n",
            "    \"_name_or_path\": \"cerebras/btlm-3b-8k-base\",\n",
            "    \"activation_function\": \"swiglu\",\n",
            "    \"add_cross_attention\": false,\n",
            "    \"architectures\": [\n",
            "        \"BTLMLMHeadModel\"\n",
            "    ],\n",
            "    \"attn_pdrop\": 0.0,\n",
            "    \"auto_map\": {\n",
            "        \"AutoConfig\": \"cerebras/btlm-3b-8k-base--configuration_btlm.BTLMConfig\",\n",
            "        \"AutoModel\": \"cerebras/btlm-3b-8k-base--modeling_btlm.BTLMModel\",\n",
            "        \"AutoModelForCausalLM\": \"cerebras/btlm-3b-8k-base--modeling_btlm.BTLMLMHeadModel\",\n",
            "        \"AutoModelForQuestionAnswering\": \"cerebras/btlm-3b-8k-base--modeling_btlm.BTLMForQuestionAnswering\",\n",
            "        \"AutoModelForSequenceClassification\": \"cerebras/btlm-3b-8k-base--modeling_btlm.BTLMForSequenceClassification\",\n",
            "        \"AutoModelForTokenClassification\": \"cerebras/btlm-3b-8k-base--modeling_btlm.BTLMForTokenClassification\"\n",
            "    },\n",
            "    \"bad_words_ids\": null,\n",
            "    \"begin_suppress_tokens\": null,\n",
            "    \"bos_token_id\": 50256,\n",
            "    \"chunk_size_feed_forward\": 0,\n",
            "    \"cross_attention_hidden_size\": null,\n",
            "    \"decoder_start_token_id\": null,\n",
            "    \"diversity_penalty\": 0.0,\n",
            "    \"do_sample\": false,\n",
            "    \"early_stopping\": false,\n",
            "    \"embd_pdrop\": 0.0,\n",
            "    \"encoder_no_repeat_ngram_size\": 0,\n",
            "    \"eos_token_id\": 50256,\n",
            "    \"exponential_decay_length_penalty\": null,\n",
            "    \"finetuning_task\": null,\n",
            "    \"forced_bos_token_id\": null,\n",
            "    \"forced_eos_token_id\": null,\n",
            "    \"id2label\": {\n",
            "        \"0\": \"LABEL_0\",\n",
            "        \"1\": \"LABEL_1\"\n",
            "    },\n",
            "    \"initializer_range\": 0.073,\n",
            "    \"is_decoder\": false,\n",
            "    \"is_encoder_decoder\": false,\n",
            "    \"label2id\": {\n",
            "        \"LABEL_0\": 0,\n",
            "        \"LABEL_1\": 1\n",
            "    },\n",
            "    \"layer_norm_epsilon\": 1e-05,\n",
            "    \"length_penalty\": 1.0,\n",
            "    \"max_length\": 20,\n",
            "    \"min_length\": 0,\n",
            "    \"model_type\": \"btlm\",\n",
            "    \"mup_embeddings_scale\": 14.6,\n",
            "    \"mup_output_alpha\": 2.22,\n",
            "    \"mup_scale_qk_dot_by_d\": true,\n",
            "    \"mup_width_scale\": 0.1,\n",
            "    \"n_embd\": 2560,\n",
            "    \"n_head\": 32,\n",
            "    \"n_inner\": 6826,\n",
            "    \"n_layer\": 32,\n",
            "    \"n_positions\": 8192,\n",
            "    \"no_repeat_ngram_size\": 0,\n",
            "    \"num_beam_groups\": 1,\n",
            "    \"num_beams\": 1,\n",
            "    \"num_return_sequences\": 1,\n",
            "    \"output_attentions\": false,\n",
            "    \"output_hidden_states\": false,\n",
            "    \"output_scores\": false,\n",
            "    \"pad_token_id\": null,\n",
            "    \"position_embedding_type\": \"alibi\",\n",
            "    \"prefix\": null,\n",
            "    \"problem_type\": null,\n",
            "    \"pruned_heads\": {},\n",
            "    \"remove_invalid_values\": false,\n",
            "    \"reorder_and_upcast_attn\": false,\n",
            "    \"repetition_penalty\": 1.0,\n",
            "    \"resid_pdrop\": 0.0,\n",
            "    \"return_dict\": true,\n",
            "    \"return_dict_in_generate\": false,\n",
            "    \"scale_attn_by_inverse_layer_idx\": false,\n",
            "    \"scale_attn_weights\": true,\n",
            "    \"sep_token_id\": null,\n",
            "    \"suppress_tokens\": null,\n",
            "    \"task_specific_params\": null,\n",
            "    \"temperature\": 1.0,\n",
            "    \"tf_legacy_loss\": false,\n",
            "    \"tie_encoder_decoder\": false,\n",
            "    \"tie_word_embeddings\": true,\n",
            "    \"tokenizer_class\": null,\n",
            "    \"top_k\": 50,\n",
            "    \"top_p\": 1.0,\n",
            "    \"torch_dtype\": \"bfloat16\",\n",
            "    \"torchscript\": false,\n",
            "    \"transformers_version\": \"4.31.0\",\n",
            "    \"typical_p\": 1.0,\n",
            "    \"use_bfloat16\": false,\n",
            "    \"use_cache\": true,\n",
            "    \"vocab_size\": 50257\n",
            "}\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import re\n",
        "import numpy as np\n",
        "\n",
        "fout = open(fname_out, \"wb\")\n",
        "\n",
        "fout.write(struct.pack(\"i\", 0x67676D6C))\n",
        "fout.write(struct.pack(\"i\", hparams[\"vocab_size\"]))\n",
        "fout.write(struct.pack(\"i\", hparams[\"n_positions\"]))\n",
        "fout.write(struct.pack(\"i\", hparams[\"n_embd\"]))\n",
        "fout.write(struct.pack(\"i\", hparams[\"n_head\"]))\n",
        "fout.write(struct.pack(\"i\", hparams[\"n_layer\"]))\n",
        "fout.write(struct.pack(\"i\", hparams[\"n_inner\"]))\n",
        "fout.write(struct.pack(\"i\", 1))\n",
        "\n",
        "for i in range(hparams[\"vocab_size\"]):\n",
        "  text = tokenizer.decode([i]).encode('utf-8')\n",
        "  fout.write(struct.pack(\"i\", len(text)))\n",
        "  fout.write(text)\n",
        "\n",
        "\n",
        "# for name in list_vars.keys():\n",
        "#     print(name, \"=>\", list_vars[name])\n",
        "\n",
        "\n",
        "for name in list_vars.keys():\n",
        "    if name[-14:] == \".weight_format\":\n",
        "      print(\"FOUND \" + name)\n",
        "      continue\n",
        "\n",
        "\n",
        "\n",
        "\n",
        "    print(\"Processing variable: \" + name)\n",
        "    data = list_vars[name].squeeze().cpu().type(dtype=torch.float16).numpy()\n",
        "    print(\" with shape: \", data.shape)\n",
        "\n",
        "    # rename headers to keep compatibility\n",
        "    if name == \"transformer.ln_f.weight\":\n",
        "        name = \"model/ln_f/g\"\n",
        "    elif name == \"transformer.ln_f.bias\":\n",
        "        name = \"model/ln_f/b\"\n",
        "    elif name == \"transformer.wte.weight\":\n",
        "        name = \"model/wte\"\n",
        "    elif name == \"transformer.wpe.weight\":\n",
        "        name = \"model/wpe\"\n",
        "    elif name == \"lm_head.weight\":\n",
        "        name = \"model/lm_head\"\n",
        "    elif name == \"transformer.relative_pe.slopes\":\n",
        "      name = \"model/relative_pe/slopes\"\n",
        "    elif re.match(r\"transformer.h\\.\\d+\\.ln_1\\.weight\", name):\n",
        "        i = re.findall(\"\\d+\", name)[0]\n",
        "        name = f\"model/h{i}/ln_1/g\"\n",
        "    elif re.match(r\"transformer.h\\.\\d+\\.ln_1\\.bias\", name):\n",
        "        i = re.findall(\"\\d+\", name)[0]\n",
        "        name = f\"model/h{i}/ln_1/b\"\n",
        "    elif re.match(r\"transformer.h\\.\\d+\\.attn\\.c_attn\\.weight\", name):\n",
        "        i = re.findall(\"\\d+\", name)[0]\n",
        "        name = f\"model/h{i}/attn/c_attn/w\"\n",
        "    elif re.match(r\"transformer.h\\.\\d+\\.attn\\.c_attn\\.bias\", name):\n",
        "        i = re.findall(\"\\d+\", name)[0]\n",
        "        name = f\"model/h{i}/attn/c_attn/b\"\n",
        "    elif re.match(r\"transformer.h\\.\\d+\\.attn\\.c_proj\\.weight\", name):\n",
        "        i = re.findall(\"\\d+\", name)[0]\n",
        "        name = f\"model/h{i}/attn/c_proj/w\"\n",
        "    elif re.match(r\"transformer.h.\\d+.attn.c_proj.bias\", name):\n",
        "        i = re.findall(\"\\d+\", name)[0]\n",
        "        name = f\"model/h{i}/attn/c_proj/b\"\n",
        "    elif re.match(r\"transformer.h.\\d+.ln_2.weight\", name):\n",
        "        i = re.findall(\"\\d+\", name)[0]\n",
        "        name = f\"model/h{i}/ln_2/g\"\n",
        "    elif re.match(r\"transformer.h.\\d+.ln_2.bias\", name):\n",
        "        i = re.findall(\"\\d+\", name)[0]\n",
        "        name = f\"model/h{i}/ln_2/b\"\n",
        "    elif re.match(r\"transformer.h.\\d+.mlp.c_fc.weight\", name):\n",
        "        i = re.findall(\"\\d+\", name)[0]\n",
        "        name = f\"model/h{i}/mlp/c_fc/w\"\n",
        "    elif re.match(r\"transformer.h.\\d+.mlp.c_fc.bias\", name):\n",
        "        i = re.findall(\"\\d+\", name)[0]\n",
        "        name = f\"model/h{i}/mlp/c_fc/b\"\n",
        "    elif re.match(r\"transformer.h.\\d+.mlp.c_proj.weight\", name):\n",
        "        i = re.findall(\"\\d+\", name)[0]\n",
        "        name = f\"model/h{i}/mlp/c_proj/w\"\n",
        "    elif re.match(r\"transformer.h.\\d+.mlp.c_proj.bias\", name):\n",
        "        i = re.findall(\"\\d+\", name)[0]\n",
        "        name = f\"model/h{i}/mlp/c_proj/b\"\n",
        "    # NEW\n",
        "    elif re.match(r\"transformer.h.\\d+.attn.c_proj.SCB\", name):\n",
        "        i = re.findall(\"\\d+\", name)[0]\n",
        "        name = f\"model/h{i}/attn/c_proj/scb\"\n",
        "    elif re.match(r\"transformer.h.\\d+.attn.c_attn.SCB\", name):\n",
        "        i = re.findall(\"\\d+\", name)[0]\n",
        "        name = f\"model/h{i}/attn/c_attn/scb\"\n",
        "    elif re.match(r\"transformer.h.\\d+.mlp.c_fc.SCB\", name):\n",
        "        i = re.findall(\"\\d+\", name)[0]\n",
        "        name = f\"model/h{i}/mlp/c_fc/scb\"\n",
        "    elif re.match(r\"transformer.h.\\d+.mlp.c_fc2.weight\", name):\n",
        "        i = re.findall(\"\\d+\", name)[0]\n",
        "        name = f\"model/h{i}/mlp/c_fc2/w\"\n",
        "    elif re.match(r\"transformer.h.\\d+.mlp.c_fc2.bias\", name):\n",
        "        i = re.findall(\"\\d+\", name)[0]\n",
        "        name = f\"model/h{i}/mlp/c_fc2/b\"\n",
        "    elif re.match(r\"transformer.h.\\d+.mlp.c_fc2.SCB\", name):\n",
        "        i = re.findall(\"\\d+\", name)[0]\n",
        "        name = f\"model/h{i}/mlp/c_fc2/scb\"\n",
        "    elif re.match(r\"transformer.h.\\d+.mlp.c_proj.SCB\", name):\n",
        "        i = re.findall(\"\\d+\", name)[0]\n",
        "        name = f\"model/h{i}/mlp/c_proj/scb\"\n",
        "\n",
        "    else:\n",
        "        print(\"Unrecognized variable name. %s\", name)\n",
        "\n",
        "\n",
        "    n_dims = len(data.shape);\n",
        "\n",
        "    # ftype == 0 -> float32, ftype == 1 -> float16\n",
        "    ftype = 1;\n",
        "    print(\"  Converting to float16\")\n",
        "    data = data.astype(np.float16)\n",
        "    ftype = 8\n",
        "\n",
        "\n",
        "    # for efficiency - transpose the projection matrices\n",
        "    # \"model/h.*/attn/c_attn/w\"\n",
        "    # \"model/h.*/attn/c_proj/w\"\n",
        "    # \"model/h.*/mlp/c_fc/w\"\n",
        "    # \"model/h.*/mlp/c_proj/w\"\n",
        "    if name[-14:] == \"/attn/c_attn/w\" or \\\n",
        "       name[-14:] == \"/attn/c_proj/w\" or \\\n",
        "       name[-11:] == \"/mlp/c_fc/w\" or \\\n",
        "       name[-13:] == \"/mlp/c_proj/w\":\n",
        "        print(\"  Transposing\")\n",
        "        data = data.transpose()\n",
        "\n",
        "    # header\n",
        "    str = name.encode('utf-8')\n",
        "    fout.write(struct.pack(\"iii\", n_dims, len(str), ftype))\n",
        "    for i in range(n_dims):\n",
        "        fout.write(struct.pack(\"i\", data.shape[n_dims - 1 - i]))\n",
        "    fout.write(str);\n",
        "\n",
        "    # data\n",
        "    data.tofile(fout)\n",
        "\n",
        "fout.close()\n",
        "\n",
        "print(\"Done. Output file: \" + fname_out)\n",
        "print(\"\")\n",
        "\n",
        "\n",
        "# write_binary()\n",
        "\n",
        "\n",
        "\n",
        "\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "nwykMxZFonZd",
        "outputId": "b28f3092-6659-4de0-a06a-2e87c66435d5"
      },
      "execution_count": 6,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Processing variable: transformer.wte.weight\n",
            " with shape:  (50257, 2560)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.0.ln_1.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.0.ln_1.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.0.attn.c_attn.weight\n",
            " with shape:  (7680, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.0.attn.c_attn.bias\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.0.attn.c_attn.SCB\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.0.attn.c_attn.weight_format\n",
            "Processing variable: transformer.h.0.attn.c_proj.weight\n",
            " with shape:  (2560, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.0.attn.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.0.attn.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.0.attn.c_proj.weight_format\n",
            "Processing variable: transformer.h.0.ln_2.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.0.ln_2.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.0.mlp.c_fc.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.0.mlp.c_fc.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.0.mlp.c_fc.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.0.mlp.c_fc.weight_format\n",
            "Processing variable: transformer.h.0.mlp.c_fc2.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.0.mlp.c_fc2.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.0.mlp.c_fc2.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.0.mlp.c_fc2.weight_format\n",
            "Processing variable: transformer.h.0.mlp.c_proj.weight\n",
            " with shape:  (2560, 6848)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.0.mlp.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.0.mlp.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.0.mlp.c_proj.weight_format\n",
            "Processing variable: transformer.h.1.ln_1.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.1.ln_1.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.1.attn.c_attn.weight\n",
            " with shape:  (7680, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.1.attn.c_attn.bias\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.1.attn.c_attn.SCB\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.1.attn.c_attn.weight_format\n",
            "Processing variable: transformer.h.1.attn.c_proj.weight\n",
            " with shape:  (2560, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.1.attn.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.1.attn.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.1.attn.c_proj.weight_format\n",
            "Processing variable: transformer.h.1.ln_2.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.1.ln_2.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.1.mlp.c_fc.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.1.mlp.c_fc.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.1.mlp.c_fc.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.1.mlp.c_fc.weight_format\n",
            "Processing variable: transformer.h.1.mlp.c_fc2.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.1.mlp.c_fc2.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.1.mlp.c_fc2.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.1.mlp.c_fc2.weight_format\n",
            "Processing variable: transformer.h.1.mlp.c_proj.weight\n",
            " with shape:  (2560, 6848)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.1.mlp.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.1.mlp.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.1.mlp.c_proj.weight_format\n",
            "Processing variable: transformer.h.2.ln_1.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.2.ln_1.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.2.attn.c_attn.weight\n",
            " with shape:  (7680, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.2.attn.c_attn.bias\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.2.attn.c_attn.SCB\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.2.attn.c_attn.weight_format\n",
            "Processing variable: transformer.h.2.attn.c_proj.weight\n",
            " with shape:  (2560, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.2.attn.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.2.attn.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.2.attn.c_proj.weight_format\n",
            "Processing variable: transformer.h.2.ln_2.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.2.ln_2.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.2.mlp.c_fc.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.2.mlp.c_fc.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.2.mlp.c_fc.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.2.mlp.c_fc.weight_format\n",
            "Processing variable: transformer.h.2.mlp.c_fc2.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.2.mlp.c_fc2.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.2.mlp.c_fc2.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.2.mlp.c_fc2.weight_format\n",
            "Processing variable: transformer.h.2.mlp.c_proj.weight\n",
            " with shape:  (2560, 6848)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.2.mlp.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.2.mlp.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.2.mlp.c_proj.weight_format\n",
            "Processing variable: transformer.h.3.ln_1.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.3.ln_1.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.3.attn.c_attn.weight\n",
            " with shape:  (7680, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.3.attn.c_attn.bias\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.3.attn.c_attn.SCB\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.3.attn.c_attn.weight_format\n",
            "Processing variable: transformer.h.3.attn.c_proj.weight\n",
            " with shape:  (2560, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.3.attn.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.3.attn.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.3.attn.c_proj.weight_format\n",
            "Processing variable: transformer.h.3.ln_2.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.3.ln_2.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.3.mlp.c_fc.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.3.mlp.c_fc.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.3.mlp.c_fc.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.3.mlp.c_fc.weight_format\n",
            "Processing variable: transformer.h.3.mlp.c_fc2.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.3.mlp.c_fc2.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.3.mlp.c_fc2.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.3.mlp.c_fc2.weight_format\n",
            "Processing variable: transformer.h.3.mlp.c_proj.weight\n",
            " with shape:  (2560, 6848)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.3.mlp.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.3.mlp.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.3.mlp.c_proj.weight_format\n",
            "Processing variable: transformer.h.4.ln_1.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.4.ln_1.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.4.attn.c_attn.weight\n",
            " with shape:  (7680, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.4.attn.c_attn.bias\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.4.attn.c_attn.SCB\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.4.attn.c_attn.weight_format\n",
            "Processing variable: transformer.h.4.attn.c_proj.weight\n",
            " with shape:  (2560, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.4.attn.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.4.attn.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.4.attn.c_proj.weight_format\n",
            "Processing variable: transformer.h.4.ln_2.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.4.ln_2.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.4.mlp.c_fc.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.4.mlp.c_fc.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.4.mlp.c_fc.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.4.mlp.c_fc.weight_format\n",
            "Processing variable: transformer.h.4.mlp.c_fc2.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.4.mlp.c_fc2.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.4.mlp.c_fc2.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.4.mlp.c_fc2.weight_format\n",
            "Processing variable: transformer.h.4.mlp.c_proj.weight\n",
            " with shape:  (2560, 6848)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.4.mlp.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.4.mlp.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.4.mlp.c_proj.weight_format\n",
            "Processing variable: transformer.h.5.ln_1.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.5.ln_1.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.5.attn.c_attn.weight\n",
            " with shape:  (7680, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.5.attn.c_attn.bias\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.5.attn.c_attn.SCB\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.5.attn.c_attn.weight_format\n",
            "Processing variable: transformer.h.5.attn.c_proj.weight\n",
            " with shape:  (2560, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.5.attn.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.5.attn.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.5.attn.c_proj.weight_format\n",
            "Processing variable: transformer.h.5.ln_2.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.5.ln_2.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.5.mlp.c_fc.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.5.mlp.c_fc.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.5.mlp.c_fc.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.5.mlp.c_fc.weight_format\n",
            "Processing variable: transformer.h.5.mlp.c_fc2.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.5.mlp.c_fc2.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.5.mlp.c_fc2.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.5.mlp.c_fc2.weight_format\n",
            "Processing variable: transformer.h.5.mlp.c_proj.weight\n",
            " with shape:  (2560, 6848)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.5.mlp.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.5.mlp.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.5.mlp.c_proj.weight_format\n",
            "Processing variable: transformer.h.6.ln_1.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.6.ln_1.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.6.attn.c_attn.weight\n",
            " with shape:  (7680, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.6.attn.c_attn.bias\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.6.attn.c_attn.SCB\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.6.attn.c_attn.weight_format\n",
            "Processing variable: transformer.h.6.attn.c_proj.weight\n",
            " with shape:  (2560, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.6.attn.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.6.attn.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.6.attn.c_proj.weight_format\n",
            "Processing variable: transformer.h.6.ln_2.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.6.ln_2.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.6.mlp.c_fc.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.6.mlp.c_fc.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.6.mlp.c_fc.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.6.mlp.c_fc.weight_format\n",
            "Processing variable: transformer.h.6.mlp.c_fc2.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.6.mlp.c_fc2.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.6.mlp.c_fc2.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.6.mlp.c_fc2.weight_format\n",
            "Processing variable: transformer.h.6.mlp.c_proj.weight\n",
            " with shape:  (2560, 6848)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.6.mlp.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.6.mlp.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.6.mlp.c_proj.weight_format\n",
            "Processing variable: transformer.h.7.ln_1.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.7.ln_1.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.7.attn.c_attn.weight\n",
            " with shape:  (7680, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.7.attn.c_attn.bias\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.7.attn.c_attn.SCB\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.7.attn.c_attn.weight_format\n",
            "Processing variable: transformer.h.7.attn.c_proj.weight\n",
            " with shape:  (2560, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.7.attn.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.7.attn.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.7.attn.c_proj.weight_format\n",
            "Processing variable: transformer.h.7.ln_2.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.7.ln_2.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.7.mlp.c_fc.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.7.mlp.c_fc.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.7.mlp.c_fc.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.7.mlp.c_fc.weight_format\n",
            "Processing variable: transformer.h.7.mlp.c_fc2.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.7.mlp.c_fc2.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.7.mlp.c_fc2.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.7.mlp.c_fc2.weight_format\n",
            "Processing variable: transformer.h.7.mlp.c_proj.weight\n",
            " with shape:  (2560, 6848)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.7.mlp.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.7.mlp.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.7.mlp.c_proj.weight_format\n",
            "Processing variable: transformer.h.8.ln_1.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.8.ln_1.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.8.attn.c_attn.weight\n",
            " with shape:  (7680, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.8.attn.c_attn.bias\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.8.attn.c_attn.SCB\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.8.attn.c_attn.weight_format\n",
            "Processing variable: transformer.h.8.attn.c_proj.weight\n",
            " with shape:  (2560, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.8.attn.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.8.attn.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.8.attn.c_proj.weight_format\n",
            "Processing variable: transformer.h.8.ln_2.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.8.ln_2.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.8.mlp.c_fc.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.8.mlp.c_fc.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.8.mlp.c_fc.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.8.mlp.c_fc.weight_format\n",
            "Processing variable: transformer.h.8.mlp.c_fc2.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.8.mlp.c_fc2.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.8.mlp.c_fc2.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.8.mlp.c_fc2.weight_format\n",
            "Processing variable: transformer.h.8.mlp.c_proj.weight\n",
            " with shape:  (2560, 6848)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.8.mlp.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.8.mlp.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.8.mlp.c_proj.weight_format\n",
            "Processing variable: transformer.h.9.ln_1.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.9.ln_1.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.9.attn.c_attn.weight\n",
            " with shape:  (7680, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.9.attn.c_attn.bias\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.9.attn.c_attn.SCB\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.9.attn.c_attn.weight_format\n",
            "Processing variable: transformer.h.9.attn.c_proj.weight\n",
            " with shape:  (2560, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.9.attn.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.9.attn.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.9.attn.c_proj.weight_format\n",
            "Processing variable: transformer.h.9.ln_2.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.9.ln_2.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.9.mlp.c_fc.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.9.mlp.c_fc.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.9.mlp.c_fc.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.9.mlp.c_fc.weight_format\n",
            "Processing variable: transformer.h.9.mlp.c_fc2.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.9.mlp.c_fc2.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.9.mlp.c_fc2.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.9.mlp.c_fc2.weight_format\n",
            "Processing variable: transformer.h.9.mlp.c_proj.weight\n",
            " with shape:  (2560, 6848)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.9.mlp.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.9.mlp.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.9.mlp.c_proj.weight_format\n",
            "Processing variable: transformer.h.10.ln_1.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.10.ln_1.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.10.attn.c_attn.weight\n",
            " with shape:  (7680, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.10.attn.c_attn.bias\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.10.attn.c_attn.SCB\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.10.attn.c_attn.weight_format\n",
            "Processing variable: transformer.h.10.attn.c_proj.weight\n",
            " with shape:  (2560, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.10.attn.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.10.attn.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.10.attn.c_proj.weight_format\n",
            "Processing variable: transformer.h.10.ln_2.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.10.ln_2.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.10.mlp.c_fc.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.10.mlp.c_fc.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.10.mlp.c_fc.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.10.mlp.c_fc.weight_format\n",
            "Processing variable: transformer.h.10.mlp.c_fc2.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.10.mlp.c_fc2.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.10.mlp.c_fc2.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.10.mlp.c_fc2.weight_format\n",
            "Processing variable: transformer.h.10.mlp.c_proj.weight\n",
            " with shape:  (2560, 6848)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.10.mlp.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.10.mlp.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.10.mlp.c_proj.weight_format\n",
            "Processing variable: transformer.h.11.ln_1.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.11.ln_1.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.11.attn.c_attn.weight\n",
            " with shape:  (7680, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.11.attn.c_attn.bias\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.11.attn.c_attn.SCB\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.11.attn.c_attn.weight_format\n",
            "Processing variable: transformer.h.11.attn.c_proj.weight\n",
            " with shape:  (2560, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.11.attn.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.11.attn.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.11.attn.c_proj.weight_format\n",
            "Processing variable: transformer.h.11.ln_2.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.11.ln_2.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.11.mlp.c_fc.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.11.mlp.c_fc.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.11.mlp.c_fc.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.11.mlp.c_fc.weight_format\n",
            "Processing variable: transformer.h.11.mlp.c_fc2.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.11.mlp.c_fc2.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.11.mlp.c_fc2.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.11.mlp.c_fc2.weight_format\n",
            "Processing variable: transformer.h.11.mlp.c_proj.weight\n",
            " with shape:  (2560, 6848)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.11.mlp.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.11.mlp.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.11.mlp.c_proj.weight_format\n",
            "Processing variable: transformer.h.12.ln_1.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.12.ln_1.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.12.attn.c_attn.weight\n",
            " with shape:  (7680, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.12.attn.c_attn.bias\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.12.attn.c_attn.SCB\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.12.attn.c_attn.weight_format\n",
            "Processing variable: transformer.h.12.attn.c_proj.weight\n",
            " with shape:  (2560, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.12.attn.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.12.attn.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.12.attn.c_proj.weight_format\n",
            "Processing variable: transformer.h.12.ln_2.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.12.ln_2.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.12.mlp.c_fc.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.12.mlp.c_fc.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.12.mlp.c_fc.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.12.mlp.c_fc.weight_format\n",
            "Processing variable: transformer.h.12.mlp.c_fc2.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.12.mlp.c_fc2.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.12.mlp.c_fc2.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.12.mlp.c_fc2.weight_format\n",
            "Processing variable: transformer.h.12.mlp.c_proj.weight\n",
            " with shape:  (2560, 6848)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.12.mlp.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.12.mlp.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.12.mlp.c_proj.weight_format\n",
            "Processing variable: transformer.h.13.ln_1.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.13.ln_1.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.13.attn.c_attn.weight\n",
            " with shape:  (7680, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.13.attn.c_attn.bias\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.13.attn.c_attn.SCB\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.13.attn.c_attn.weight_format\n",
            "Processing variable: transformer.h.13.attn.c_proj.weight\n",
            " with shape:  (2560, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.13.attn.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.13.attn.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.13.attn.c_proj.weight_format\n",
            "Processing variable: transformer.h.13.ln_2.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.13.ln_2.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.13.mlp.c_fc.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.13.mlp.c_fc.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.13.mlp.c_fc.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.13.mlp.c_fc.weight_format\n",
            "Processing variable: transformer.h.13.mlp.c_fc2.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.13.mlp.c_fc2.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.13.mlp.c_fc2.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.13.mlp.c_fc2.weight_format\n",
            "Processing variable: transformer.h.13.mlp.c_proj.weight\n",
            " with shape:  (2560, 6848)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.13.mlp.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.13.mlp.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.13.mlp.c_proj.weight_format\n",
            "Processing variable: transformer.h.14.ln_1.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.14.ln_1.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.14.attn.c_attn.weight\n",
            " with shape:  (7680, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.14.attn.c_attn.bias\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.14.attn.c_attn.SCB\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.14.attn.c_attn.weight_format\n",
            "Processing variable: transformer.h.14.attn.c_proj.weight\n",
            " with shape:  (2560, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.14.attn.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.14.attn.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.14.attn.c_proj.weight_format\n",
            "Processing variable: transformer.h.14.ln_2.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.14.ln_2.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.14.mlp.c_fc.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.14.mlp.c_fc.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.14.mlp.c_fc.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.14.mlp.c_fc.weight_format\n",
            "Processing variable: transformer.h.14.mlp.c_fc2.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.14.mlp.c_fc2.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.14.mlp.c_fc2.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.14.mlp.c_fc2.weight_format\n",
            "Processing variable: transformer.h.14.mlp.c_proj.weight\n",
            " with shape:  (2560, 6848)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.14.mlp.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.14.mlp.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.14.mlp.c_proj.weight_format\n",
            "Processing variable: transformer.h.15.ln_1.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.15.ln_1.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.15.attn.c_attn.weight\n",
            " with shape:  (7680, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.15.attn.c_attn.bias\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.15.attn.c_attn.SCB\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.15.attn.c_attn.weight_format\n",
            "Processing variable: transformer.h.15.attn.c_proj.weight\n",
            " with shape:  (2560, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.15.attn.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.15.attn.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.15.attn.c_proj.weight_format\n",
            "Processing variable: transformer.h.15.ln_2.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.15.ln_2.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.15.mlp.c_fc.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.15.mlp.c_fc.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.15.mlp.c_fc.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.15.mlp.c_fc.weight_format\n",
            "Processing variable: transformer.h.15.mlp.c_fc2.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.15.mlp.c_fc2.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.15.mlp.c_fc2.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.15.mlp.c_fc2.weight_format\n",
            "Processing variable: transformer.h.15.mlp.c_proj.weight\n",
            " with shape:  (2560, 6848)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.15.mlp.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.15.mlp.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.15.mlp.c_proj.weight_format\n",
            "Processing variable: transformer.h.16.ln_1.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.16.ln_1.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.16.attn.c_attn.weight\n",
            " with shape:  (7680, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.16.attn.c_attn.bias\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.16.attn.c_attn.SCB\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.16.attn.c_attn.weight_format\n",
            "Processing variable: transformer.h.16.attn.c_proj.weight\n",
            " with shape:  (2560, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.16.attn.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.16.attn.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.16.attn.c_proj.weight_format\n",
            "Processing variable: transformer.h.16.ln_2.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.16.ln_2.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.16.mlp.c_fc.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.16.mlp.c_fc.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.16.mlp.c_fc.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.16.mlp.c_fc.weight_format\n",
            "Processing variable: transformer.h.16.mlp.c_fc2.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.16.mlp.c_fc2.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.16.mlp.c_fc2.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.16.mlp.c_fc2.weight_format\n",
            "Processing variable: transformer.h.16.mlp.c_proj.weight\n",
            " with shape:  (2560, 6848)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.16.mlp.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.16.mlp.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.16.mlp.c_proj.weight_format\n",
            "Processing variable: transformer.h.17.ln_1.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.17.ln_1.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.17.attn.c_attn.weight\n",
            " with shape:  (7680, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.17.attn.c_attn.bias\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.17.attn.c_attn.SCB\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.17.attn.c_attn.weight_format\n",
            "Processing variable: transformer.h.17.attn.c_proj.weight\n",
            " with shape:  (2560, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.17.attn.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.17.attn.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.17.attn.c_proj.weight_format\n",
            "Processing variable: transformer.h.17.ln_2.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.17.ln_2.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.17.mlp.c_fc.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.17.mlp.c_fc.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.17.mlp.c_fc.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.17.mlp.c_fc.weight_format\n",
            "Processing variable: transformer.h.17.mlp.c_fc2.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.17.mlp.c_fc2.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.17.mlp.c_fc2.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.17.mlp.c_fc2.weight_format\n",
            "Processing variable: transformer.h.17.mlp.c_proj.weight\n",
            " with shape:  (2560, 6848)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.17.mlp.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.17.mlp.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.17.mlp.c_proj.weight_format\n",
            "Processing variable: transformer.h.18.ln_1.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.18.ln_1.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.18.attn.c_attn.weight\n",
            " with shape:  (7680, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.18.attn.c_attn.bias\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.18.attn.c_attn.SCB\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.18.attn.c_attn.weight_format\n",
            "Processing variable: transformer.h.18.attn.c_proj.weight\n",
            " with shape:  (2560, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.18.attn.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.18.attn.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.18.attn.c_proj.weight_format\n",
            "Processing variable: transformer.h.18.ln_2.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.18.ln_2.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.18.mlp.c_fc.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.18.mlp.c_fc.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.18.mlp.c_fc.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.18.mlp.c_fc.weight_format\n",
            "Processing variable: transformer.h.18.mlp.c_fc2.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.18.mlp.c_fc2.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.18.mlp.c_fc2.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.18.mlp.c_fc2.weight_format\n",
            "Processing variable: transformer.h.18.mlp.c_proj.weight\n",
            " with shape:  (2560, 6848)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.18.mlp.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.18.mlp.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.18.mlp.c_proj.weight_format\n",
            "Processing variable: transformer.h.19.ln_1.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.19.ln_1.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.19.attn.c_attn.weight\n",
            " with shape:  (7680, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.19.attn.c_attn.bias\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.19.attn.c_attn.SCB\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.19.attn.c_attn.weight_format\n",
            "Processing variable: transformer.h.19.attn.c_proj.weight\n",
            " with shape:  (2560, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.19.attn.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.19.attn.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.19.attn.c_proj.weight_format\n",
            "Processing variable: transformer.h.19.ln_2.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.19.ln_2.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.19.mlp.c_fc.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.19.mlp.c_fc.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.19.mlp.c_fc.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.19.mlp.c_fc.weight_format\n",
            "Processing variable: transformer.h.19.mlp.c_fc2.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.19.mlp.c_fc2.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.19.mlp.c_fc2.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.19.mlp.c_fc2.weight_format\n",
            "Processing variable: transformer.h.19.mlp.c_proj.weight\n",
            " with shape:  (2560, 6848)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.19.mlp.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.19.mlp.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.19.mlp.c_proj.weight_format\n",
            "Processing variable: transformer.h.20.ln_1.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.20.ln_1.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.20.attn.c_attn.weight\n",
            " with shape:  (7680, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.20.attn.c_attn.bias\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.20.attn.c_attn.SCB\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.20.attn.c_attn.weight_format\n",
            "Processing variable: transformer.h.20.attn.c_proj.weight\n",
            " with shape:  (2560, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.20.attn.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.20.attn.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.20.attn.c_proj.weight_format\n",
            "Processing variable: transformer.h.20.ln_2.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.20.ln_2.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.20.mlp.c_fc.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.20.mlp.c_fc.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.20.mlp.c_fc.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.20.mlp.c_fc.weight_format\n",
            "Processing variable: transformer.h.20.mlp.c_fc2.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.20.mlp.c_fc2.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.20.mlp.c_fc2.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.20.mlp.c_fc2.weight_format\n",
            "Processing variable: transformer.h.20.mlp.c_proj.weight\n",
            " with shape:  (2560, 6848)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.20.mlp.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.20.mlp.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.20.mlp.c_proj.weight_format\n",
            "Processing variable: transformer.h.21.ln_1.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.21.ln_1.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.21.attn.c_attn.weight\n",
            " with shape:  (7680, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.21.attn.c_attn.bias\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.21.attn.c_attn.SCB\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.21.attn.c_attn.weight_format\n",
            "Processing variable: transformer.h.21.attn.c_proj.weight\n",
            " with shape:  (2560, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.21.attn.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.21.attn.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.21.attn.c_proj.weight_format\n",
            "Processing variable: transformer.h.21.ln_2.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.21.ln_2.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.21.mlp.c_fc.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.21.mlp.c_fc.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.21.mlp.c_fc.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.21.mlp.c_fc.weight_format\n",
            "Processing variable: transformer.h.21.mlp.c_fc2.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.21.mlp.c_fc2.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.21.mlp.c_fc2.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.21.mlp.c_fc2.weight_format\n",
            "Processing variable: transformer.h.21.mlp.c_proj.weight\n",
            " with shape:  (2560, 6848)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.21.mlp.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.21.mlp.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.21.mlp.c_proj.weight_format\n",
            "Processing variable: transformer.h.22.ln_1.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.22.ln_1.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.22.attn.c_attn.weight\n",
            " with shape:  (7680, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.22.attn.c_attn.bias\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.22.attn.c_attn.SCB\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.22.attn.c_attn.weight_format\n",
            "Processing variable: transformer.h.22.attn.c_proj.weight\n",
            " with shape:  (2560, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.22.attn.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.22.attn.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.22.attn.c_proj.weight_format\n",
            "Processing variable: transformer.h.22.ln_2.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.22.ln_2.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.22.mlp.c_fc.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.22.mlp.c_fc.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.22.mlp.c_fc.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.22.mlp.c_fc.weight_format\n",
            "Processing variable: transformer.h.22.mlp.c_fc2.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.22.mlp.c_fc2.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.22.mlp.c_fc2.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.22.mlp.c_fc2.weight_format\n",
            "Processing variable: transformer.h.22.mlp.c_proj.weight\n",
            " with shape:  (2560, 6848)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.22.mlp.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.22.mlp.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.22.mlp.c_proj.weight_format\n",
            "Processing variable: transformer.h.23.ln_1.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.23.ln_1.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.23.attn.c_attn.weight\n",
            " with shape:  (7680, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.23.attn.c_attn.bias\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.23.attn.c_attn.SCB\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.23.attn.c_attn.weight_format\n",
            "Processing variable: transformer.h.23.attn.c_proj.weight\n",
            " with shape:  (2560, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.23.attn.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.23.attn.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.23.attn.c_proj.weight_format\n",
            "Processing variable: transformer.h.23.ln_2.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.23.ln_2.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.23.mlp.c_fc.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.23.mlp.c_fc.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.23.mlp.c_fc.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.23.mlp.c_fc.weight_format\n",
            "Processing variable: transformer.h.23.mlp.c_fc2.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.23.mlp.c_fc2.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.23.mlp.c_fc2.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.23.mlp.c_fc2.weight_format\n",
            "Processing variable: transformer.h.23.mlp.c_proj.weight\n",
            " with shape:  (2560, 6848)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.23.mlp.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.23.mlp.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.23.mlp.c_proj.weight_format\n",
            "Processing variable: transformer.h.24.ln_1.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.24.ln_1.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.24.attn.c_attn.weight\n",
            " with shape:  (7680, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.24.attn.c_attn.bias\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.24.attn.c_attn.SCB\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.24.attn.c_attn.weight_format\n",
            "Processing variable: transformer.h.24.attn.c_proj.weight\n",
            " with shape:  (2560, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.24.attn.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.24.attn.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.24.attn.c_proj.weight_format\n",
            "Processing variable: transformer.h.24.ln_2.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.24.ln_2.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.24.mlp.c_fc.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.24.mlp.c_fc.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.24.mlp.c_fc.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.24.mlp.c_fc.weight_format\n",
            "Processing variable: transformer.h.24.mlp.c_fc2.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.24.mlp.c_fc2.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.24.mlp.c_fc2.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.24.mlp.c_fc2.weight_format\n",
            "Processing variable: transformer.h.24.mlp.c_proj.weight\n",
            " with shape:  (2560, 6848)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.24.mlp.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.24.mlp.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.24.mlp.c_proj.weight_format\n",
            "Processing variable: transformer.h.25.ln_1.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.25.ln_1.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.25.attn.c_attn.weight\n",
            " with shape:  (7680, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.25.attn.c_attn.bias\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.25.attn.c_attn.SCB\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.25.attn.c_attn.weight_format\n",
            "Processing variable: transformer.h.25.attn.c_proj.weight\n",
            " with shape:  (2560, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.25.attn.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.25.attn.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.25.attn.c_proj.weight_format\n",
            "Processing variable: transformer.h.25.ln_2.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.25.ln_2.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.25.mlp.c_fc.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.25.mlp.c_fc.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.25.mlp.c_fc.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.25.mlp.c_fc.weight_format\n",
            "Processing variable: transformer.h.25.mlp.c_fc2.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.25.mlp.c_fc2.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.25.mlp.c_fc2.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.25.mlp.c_fc2.weight_format\n",
            "Processing variable: transformer.h.25.mlp.c_proj.weight\n",
            " with shape:  (2560, 6848)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.25.mlp.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.25.mlp.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.25.mlp.c_proj.weight_format\n",
            "Processing variable: transformer.h.26.ln_1.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.26.ln_1.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.26.attn.c_attn.weight\n",
            " with shape:  (7680, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.26.attn.c_attn.bias\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.26.attn.c_attn.SCB\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.26.attn.c_attn.weight_format\n",
            "Processing variable: transformer.h.26.attn.c_proj.weight\n",
            " with shape:  (2560, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.26.attn.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.26.attn.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.26.attn.c_proj.weight_format\n",
            "Processing variable: transformer.h.26.ln_2.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.26.ln_2.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.26.mlp.c_fc.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.26.mlp.c_fc.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.26.mlp.c_fc.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.26.mlp.c_fc.weight_format\n",
            "Processing variable: transformer.h.26.mlp.c_fc2.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.26.mlp.c_fc2.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.26.mlp.c_fc2.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.26.mlp.c_fc2.weight_format\n",
            "Processing variable: transformer.h.26.mlp.c_proj.weight\n",
            " with shape:  (2560, 6848)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.26.mlp.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.26.mlp.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.26.mlp.c_proj.weight_format\n",
            "Processing variable: transformer.h.27.ln_1.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.27.ln_1.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.27.attn.c_attn.weight\n",
            " with shape:  (7680, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.27.attn.c_attn.bias\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.27.attn.c_attn.SCB\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.27.attn.c_attn.weight_format\n",
            "Processing variable: transformer.h.27.attn.c_proj.weight\n",
            " with shape:  (2560, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.27.attn.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.27.attn.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.27.attn.c_proj.weight_format\n",
            "Processing variable: transformer.h.27.ln_2.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.27.ln_2.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.27.mlp.c_fc.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.27.mlp.c_fc.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.27.mlp.c_fc.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.27.mlp.c_fc.weight_format\n",
            "Processing variable: transformer.h.27.mlp.c_fc2.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.27.mlp.c_fc2.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.27.mlp.c_fc2.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.27.mlp.c_fc2.weight_format\n",
            "Processing variable: transformer.h.27.mlp.c_proj.weight\n",
            " with shape:  (2560, 6848)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.27.mlp.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.27.mlp.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.27.mlp.c_proj.weight_format\n",
            "Processing variable: transformer.h.28.ln_1.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.28.ln_1.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.28.attn.c_attn.weight\n",
            " with shape:  (7680, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.28.attn.c_attn.bias\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.28.attn.c_attn.SCB\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.28.attn.c_attn.weight_format\n",
            "Processing variable: transformer.h.28.attn.c_proj.weight\n",
            " with shape:  (2560, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.28.attn.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.28.attn.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.28.attn.c_proj.weight_format\n",
            "Processing variable: transformer.h.28.ln_2.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.28.ln_2.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.28.mlp.c_fc.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.28.mlp.c_fc.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.28.mlp.c_fc.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.28.mlp.c_fc.weight_format\n",
            "Processing variable: transformer.h.28.mlp.c_fc2.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.28.mlp.c_fc2.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.28.mlp.c_fc2.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.28.mlp.c_fc2.weight_format\n",
            "Processing variable: transformer.h.28.mlp.c_proj.weight\n",
            " with shape:  (2560, 6848)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.28.mlp.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.28.mlp.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.28.mlp.c_proj.weight_format\n",
            "Processing variable: transformer.h.29.ln_1.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.29.ln_1.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.29.attn.c_attn.weight\n",
            " with shape:  (7680, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.29.attn.c_attn.bias\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.29.attn.c_attn.SCB\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.29.attn.c_attn.weight_format\n",
            "Processing variable: transformer.h.29.attn.c_proj.weight\n",
            " with shape:  (2560, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.29.attn.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.29.attn.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.29.attn.c_proj.weight_format\n",
            "Processing variable: transformer.h.29.ln_2.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.29.ln_2.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.29.mlp.c_fc.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.29.mlp.c_fc.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.29.mlp.c_fc.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.29.mlp.c_fc.weight_format\n",
            "Processing variable: transformer.h.29.mlp.c_fc2.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.29.mlp.c_fc2.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.29.mlp.c_fc2.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.29.mlp.c_fc2.weight_format\n",
            "Processing variable: transformer.h.29.mlp.c_proj.weight\n",
            " with shape:  (2560, 6848)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.29.mlp.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.29.mlp.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.29.mlp.c_proj.weight_format\n",
            "Processing variable: transformer.h.30.ln_1.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.30.ln_1.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.30.attn.c_attn.weight\n",
            " with shape:  (7680, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.30.attn.c_attn.bias\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.30.attn.c_attn.SCB\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.30.attn.c_attn.weight_format\n",
            "Processing variable: transformer.h.30.attn.c_proj.weight\n",
            " with shape:  (2560, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.30.attn.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.30.attn.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.30.attn.c_proj.weight_format\n",
            "Processing variable: transformer.h.30.ln_2.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.30.ln_2.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.30.mlp.c_fc.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.30.mlp.c_fc.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.30.mlp.c_fc.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.30.mlp.c_fc.weight_format\n",
            "Processing variable: transformer.h.30.mlp.c_fc2.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.30.mlp.c_fc2.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.30.mlp.c_fc2.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.30.mlp.c_fc2.weight_format\n",
            "Processing variable: transformer.h.30.mlp.c_proj.weight\n",
            " with shape:  (2560, 6848)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.30.mlp.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.30.mlp.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.30.mlp.c_proj.weight_format\n",
            "Processing variable: transformer.h.31.ln_1.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.31.ln_1.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.31.attn.c_attn.weight\n",
            " with shape:  (7680, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.31.attn.c_attn.bias\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.31.attn.c_attn.SCB\n",
            " with shape:  (7680,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.31.attn.c_attn.weight_format\n",
            "Processing variable: transformer.h.31.attn.c_proj.weight\n",
            " with shape:  (2560, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.31.attn.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.31.attn.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.31.attn.c_proj.weight_format\n",
            "Processing variable: transformer.h.31.ln_2.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.31.ln_2.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.31.mlp.c_fc.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.31.mlp.c_fc.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.31.mlp.c_fc.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.31.mlp.c_fc.weight_format\n",
            "Processing variable: transformer.h.31.mlp.c_fc2.weight\n",
            " with shape:  (6832, 2560)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.31.mlp.c_fc2.bias\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.31.mlp.c_fc2.SCB\n",
            " with shape:  (6826,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.31.mlp.c_fc2.weight_format\n",
            "Processing variable: transformer.h.31.mlp.c_proj.weight\n",
            " with shape:  (2560, 6848)\n",
            "  Converting to float16\n",
            "  Transposing\n",
            "Processing variable: transformer.h.31.mlp.c_proj.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.h.31.mlp.c_proj.SCB\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "FOUND transformer.h.31.mlp.c_proj.weight_format\n",
            "Processing variable: transformer.ln_f.weight\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.ln_f.bias\n",
            " with shape:  (2560,)\n",
            "  Converting to float16\n",
            "Processing variable: transformer.relative_pe.slopes\n",
            " with shape:  (32,)\n",
            "  Converting to float16\n",
            "Processing variable: lm_head.weight\n",
            " with shape:  (50257, 2560)\n",
            "  Converting to float16\n",
            "Done. Output file: btlm-3b.ggml.bin\n",
            "\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "from huggingface_hub import login, HfApi\n",
        "\n",
        "login()\n",
        "\n",
        "api = HfApi()\n",
        "\n",
        "api.upload_file(\n",
        "    path_or_fileobj=\"/content/btlm-3b.ggml.bin\",\n",
        "    path_in_repo=\"btlm-3b.ggml.bin\",\n",
        "    repo_id=\"bornjre/btlm-3b-ggml\",\n",
        "    repo_type=\"model\",\n",
        ")\n",
        "\n",
        "# api.upload_folder(\n",
        "#     folder_path=\"/content/btlm-3b-ggml\",\n",
        "#     repo_id=\"bornjre/btlm-3b-ggml\",\n",
        "#     repo_type=\"model\",\n",
        "# )"
      ],
      "metadata": {
        "id": "bVYr-O99ONqd",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 196,
          "referenced_widgets": [
            "427689dd96c94e9e8706872de004600c",
            "f3d6c5dcb2674419a9cc9d0a78a417ea",
            "1f0e6e0047dc471a91a581505031dcf4",
            "63e88741fa824103a38134ca7f92efc4",
            "760f19938d4a420fba6b604413bd75c5",
            "1dc3baf2eb7a4d3981a7a38d574545ea",
            "59aec4ac781a42709f49465aaa8beda8",
            "96c57c6e5c5a446d96664912b2a1f923",
            "acf63c0070b346f8ba0833bf9d6dbf76",
            "72da3e460812424db18da699f1adc68f",
            "6b9996976aeb46a781c2827c7eb80400",
            "0f5aa3f5a3fd4869ad22a7d7eb3f4a01",
            "1a4f383e5b6e4edea22cf8269ac3e136",
            "5e880b14816c40e1ade44bb4e8e3e7a0",
            "c214e6421b8c444eb91f01729650dfc3",
            "5ce845d6ca8e40c5ad7c9d07f7ad271c",
            "4b7fc4873cc44b9eaf915d4e8bd5e134",
            "c8fcc29ea48f44b49ec3547067348bc2",
            "665705e3373b4a2093bd921a2341bf99",
            "e890ec5fafb344609f506f16be57d799",
            "92302f6d76014a778c6cb7370d2f114c",
            "a1652fbe06094b51994fc22f16f9615a",
            "a122a0fc52604974a77fe18f7dd43c2f",
            "dfa55c882b3d44c395d89c53af5f68d7",
            "fac01390a0634c0caa885e4626fda033",
            "6c8e3fe6bca1438a879a0ee18abc5294",
            "d9ffd07e99cb4a2da0966f653351e6cf",
            "0005a3238bb84c619e657100845f9c84",
            "b7ace70b606943059acf751b89b53291",
            "b6e56ddf9da04a52b99de60c9e5a3f57",
            "55b11341f930432282c18b908e8c7c4a",
            "a4174533851147d0b5452428cfbcb4d0",
            "e4cc83f17f3941a69be2c5a328c1df7c",
            "9f696bef605146c088cd39c3e2d41fa7",
            "4b995377bec241c6b61b7e68518403bb",
            "b7e2a45a24724bc79db6b055ff3c6e90",
            "1b2fb7402b6d4bdcbe38e3e1e9179b74",
            "51921dbb33f445f7ab72666ecac3f85c",
            "e804d48983884eb7bfd85123482317f7",
            "a15221a1538c43c7ac162324644f2c2d",
            "baac4e68e10c4e7bbffeff26b066f308",
            "04ed18f2ff804865af93d4d73b347d77",
            "603ecce9c961468eb90510fe0587bc32"
          ]
        },
        "outputId": "1a0aef32-812f-4265-91d6-fe66f4f2ce17"
      },
      "execution_count": 9,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "427689dd96c94e9e8706872de004600c"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "btlm-3b.ggml.bin:   0%|          | 0.00/5.56G [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "c8fcc29ea48f44b49ec3547067348bc2"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "'https://huggingface.co/bornjre/btlm-3b-ggml/blob/main/btlm-3b.ggml.bin'"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "string"
            }
          },
          "metadata": {},
          "execution_count": 9
        }
      ]
    }
  ]
}