{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "Fine-tune a language model with dataset streaming and 8-bit optimizers", "provenance": [], "collapsed_sections": [], "include_colab_link": true }, "language_info": { "name": "python" }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "accelerator": "GPU", "widgets": { "application/vnd.jupyter.widget-state+json": { "372609dca95b4ddcb51491283df860f5": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_e0b881dd26d54c7c92ba9ab5923fab10", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_90eb62f7ec634e098db511a5995d807f", "IPY_MODEL_d60a799761f649378d17a044362e55b9", "IPY_MODEL_a76cf6149c6748a5aa74ada58921d31e" ] } }, "e0b881dd26d54c7c92ba9ab5923fab10": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "90eb62f7ec634e098db511a5995d807f": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_5256dfd69e364597a29b2ad61c01ea93", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": "Downloading: ", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_1a231c5cffbb4225941d02cb5e3bb273" } }, "d60a799761f649378d17a044362e55b9": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_cffbeabee69446c48dfa89ac38d9f45e", "_dom_classes": [], "description": "", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 1376, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 1376, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_0c02af4a252e40fba08c097f59926dc8" } }, "a76cf6149c6748a5aa74ada58921d31e": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_2917b5df9cc14ec3a2fe356c12c8511e", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 3.29k/? [00:00<00:00, 76.9kB/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_5c3abd7a7f354ac0b1cd3d89506f417a" } }, "5256dfd69e364597a29b2ad61c01ea93": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "1a231c5cffbb4225941d02cb5e3bb273": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "cffbeabee69446c48dfa89ac38d9f45e": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "0c02af4a252e40fba08c097f59926dc8": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "2917b5df9cc14ec3a2fe356c12c8511e": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "5c3abd7a7f354ac0b1cd3d89506f417a": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "8714bb6e944345b98b691f40adf0bf76": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_a2983efe78b94919891d6db909100934", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_b61fb9c3745f4c468954713b07f8f16f", "IPY_MODEL_5f05edadc8c943aa82a790e815253a48", "IPY_MODEL_867c7dd23eb64a1b8d02a7cf8a4ad64a" ] } }, "a2983efe78b94919891d6db909100934": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "b61fb9c3745f4c468954713b07f8f16f": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_01445fea254a436fa464b6006f3abd92", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": "Downloading: ", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_74c4d6d598bf4173999a56fa849506d3" } }, "5f05edadc8c943aa82a790e815253a48": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_e299d23c03444805b0a359eb049cddb3", "_dom_classes": [], "description": "", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 492167, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 492167, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_c2f4080d692a46debfb5a8000d2637d6" } }, "867c7dd23eb64a1b8d02a7cf8a4ad64a": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_b0cbc589d149494cbb74139bedb0aafc", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 2.40M/? [00:00<00:00, 18.6MB/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_b3fec901d0cc48a2862b900928b681f3" } }, "01445fea254a436fa464b6006f3abd92": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "74c4d6d598bf4173999a56fa849506d3": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "e299d23c03444805b0a359eb049cddb3": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "c2f4080d692a46debfb5a8000d2637d6": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "b0cbc589d149494cbb74139bedb0aafc": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "b3fec901d0cc48a2862b900928b681f3": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "619629ebf5fc4e3ba9ad49e7e767a37d": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_87a5f7bca18e4a818d6d48e15bc68845", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_75d276bd972d429483af64d1eb27624c", "IPY_MODEL_6f778a9c54f042199869eb16563bb933", "IPY_MODEL_275f3cf6de1f49c1ae1f09991b4ea99c" ] } }, "87a5f7bca18e4a818d6d48e15bc68845": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "75d276bd972d429483af64d1eb27624c": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_670d440077b34a8e86bf2d620ab6fb6d", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": "Downloading: 100%", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_ac7455b1e5e5475cabf4dab505603a43" } }, "6f778a9c54f042199869eb16563bb933": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_4e82c37304d446d88e852553cbe9acb9", "_dom_classes": [], "description": "", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 666, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 666, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_213fc01fa39b4b77ba211a9304c5ea86" } }, "275f3cf6de1f49c1ae1f09991b4ea99c": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_a9dae405fbce41e3a46b05292313bab0", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 666/666 [00:00<00:00, 15.4kB/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_272807a127d54f5ea3f20c0bc7262a25" } }, "670d440077b34a8e86bf2d620ab6fb6d": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "ac7455b1e5e5475cabf4dab505603a43": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "4e82c37304d446d88e852553cbe9acb9": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "213fc01fa39b4b77ba211a9304c5ea86": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "a9dae405fbce41e3a46b05292313bab0": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "272807a127d54f5ea3f20c0bc7262a25": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "1ce665de582c4bd392d6bc4fff9a1499": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_36cdf4ebc5684e9e88d80d5c98d86154", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_288d1670c87e469ba3fe7c0030887d19", "IPY_MODEL_eeb0bf36fb93410f82ab96b5414b6c50", "IPY_MODEL_3c8d0050e0904acf933d18316000ac8a" ] } }, "36cdf4ebc5684e9e88d80d5c98d86154": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "288d1670c87e469ba3fe7c0030887d19": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_3be14001505b4dd19293365cb4a52cbc", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": "Downloading: 100%", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_4195ef4de4ba491995e28b72e4d82a5b" } }, "eeb0bf36fb93410f82ab96b5414b6c50": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_cafc6a76a9ac4de79b46969ebab90265", "_dom_classes": [], "description": "", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 1042301, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 1042301, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_9e6e0d9075454e17932e147678c1cef6" } }, "3c8d0050e0904acf933d18316000ac8a": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_7c83e50b67374fa5a2d713c036ba8e84", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 0.99M/0.99M [00:00<00:00, 1.42MB/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_0e0d370b48c84349a7f7cc45ed5f9d09" } }, "3be14001505b4dd19293365cb4a52cbc": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "4195ef4de4ba491995e28b72e4d82a5b": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "cafc6a76a9ac4de79b46969ebab90265": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "9e6e0d9075454e17932e147678c1cef6": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "7c83e50b67374fa5a2d713c036ba8e84": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "0e0d370b48c84349a7f7cc45ed5f9d09": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "2222296ad8d14cd39e8af17c108ad5ed": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_e39daeca11584766a42f9bc6df76089c", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_8fad1d6738b84704a3b4d248dbcfac2b", "IPY_MODEL_405c42d0df19419e8563b00a3f402ae9", "IPY_MODEL_7e2ceabdd41446cc860e35f94e02de31" ] } }, "e39daeca11584766a42f9bc6df76089c": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "8fad1d6738b84704a3b4d248dbcfac2b": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_5b2f0da9535f46d78999e11162e16666", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": "Downloading: 100%", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_d47fc8e1a9a84efcb6d9a02ba973e940" } }, "405c42d0df19419e8563b00a3f402ae9": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_a127db6cdeae4700834c1dc582ecb609", "_dom_classes": [], "description": "", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 456318, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 456318, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_b9a0b5fe13134774a7560a583513c7cd" } }, "7e2ceabdd41446cc860e35f94e02de31": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_66d58d9dab054294a81601572b143c00", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 446k/446k [00:00<00:00, 1.46MB/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_75f479bbe938484891214f27986364eb" } }, "5b2f0da9535f46d78999e11162e16666": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "d47fc8e1a9a84efcb6d9a02ba973e940": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "a127db6cdeae4700834c1dc582ecb609": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "b9a0b5fe13134774a7560a583513c7cd": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "66d58d9dab054294a81601572b143c00": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "75f479bbe938484891214f27986364eb": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "05c7977a9322499bbc00e80f0d767bee": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_9a76a0ee943343d781caf5b30ce0c6a5", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_f454951d36c74e13bc3cf04b41388345", "IPY_MODEL_1974ea03fa42428fa162eac65f8d71e4", "IPY_MODEL_96cdea5bf8ba44f3a37e5e1f70cbef62" ] } }, "9a76a0ee943343d781caf5b30ce0c6a5": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "f454951d36c74e13bc3cf04b41388345": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_221d95935ae249bb8fbf60007ed39e82", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": "Downloading: 100%", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_0a5ca2e268c24ae1a9e42d5c9dcadb29" } }, "1974ea03fa42428fa162eac65f8d71e4": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_997827c35e02494d82209fa5f1232e5f", "_dom_classes": [], "description": "", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 1355256, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 1355256, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_aaf3281319a94b42ba21941f41d262bc" } }, "96cdea5bf8ba44f3a37e5e1f70cbef62": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_ffa4cf74c0c84e0797de60e50e1f579c", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 1.29M/1.29M [00:00<00:00, 5.20MB/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_d60228cfbd9f44ddbcefb20a74d99779" } }, "221d95935ae249bb8fbf60007ed39e82": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "0a5ca2e268c24ae1a9e42d5c9dcadb29": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "997827c35e02494d82209fa5f1232e5f": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "aaf3281319a94b42ba21941f41d262bc": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "ffa4cf74c0c84e0797de60e50e1f579c": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "d60228cfbd9f44ddbcefb20a74d99779": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "8bff97fa5703438685342e1b140234f5": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_542ccbd2abf941fea5312b72c51ec5b6", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_f71eb8c60e4b4ef6b44b9f6b90bf25c5", "IPY_MODEL_1355930a0edf40e5ac75198e9d901bfb", "IPY_MODEL_ce047a309f984458859ddfc15aad1125" ] } }, "542ccbd2abf941fea5312b72c51ec5b6": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "f71eb8c60e4b4ef6b44b9f6b90bf25c5": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_c82b029db5a547f09d1b9607a5d26ecc", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": "Downloading: 100%", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_01312ccfa1254c7a8acc6a7410af711a" } }, "1355930a0edf40e5ac75198e9d901bfb": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_7db4a94408b94d71ab9f6fccded0f011", "_dom_classes": [], "description": "", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 3247202234, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 3247202234, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_356eaaf643894327a0ba30049c3a2b92" } }, "ce047a309f984458859ddfc15aad1125": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_874af3844342411096d6d8872354f531", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 3.02G/3.02G [02:35<00:00, 10.9MB/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_fd8cb9272f3d4ff9ae673c724a6b757b" } }, "c82b029db5a547f09d1b9607a5d26ecc": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "01312ccfa1254c7a8acc6a7410af711a": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "7db4a94408b94d71ab9f6fccded0f011": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "356eaaf643894327a0ba30049c3a2b92": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "874af3844342411096d6d8872354f531": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "fd8cb9272f3d4ff9ae673c724a6b757b": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "6e2b38e0faf64f529e849f37ad4d4eab": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_ba6e9cadc3274a64ae0738fb99c55860", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_62edc38a7caf4feca6db6de2d767fbb3", "IPY_MODEL_155616f4494440b291ae4722aaeca750", "IPY_MODEL_2045670c97194bd68a398213552d8364" ] } }, "ba6e9cadc3274a64ae0738fb99c55860": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "62edc38a7caf4feca6db6de2d767fbb3": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_1ca831c613424bea8f42c4a89f44b2b5", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 0%", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_b352385cb0664a8b873f5028a728c4e3" } }, "155616f4494440b291ae4722aaeca750": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_085e99b8131f4f8bb65f2ed723a21b48", "_dom_classes": [], "description": "", "_model_name": "FloatProgressModel", "bar_style": "", "max": 1000000, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 32, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_d1ec06e6abca4b35b8e22856bcc9c5ce" } }, "2045670c97194bd68a398213552d8364": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_565f3cec67374748bdc8baf19f18963f", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 32/1000000 [03:13<1383:55:47, 4.98s/it]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_358cb048c5a04a7cb2e9de91017baf01" } }, "1ca831c613424bea8f42c4a89f44b2b5": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "b352385cb0664a8b873f5028a728c4e3": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "085e99b8131f4f8bb65f2ed723a21b48": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "d1ec06e6abca4b35b8e22856bcc9c5ce": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "565f3cec67374748bdc8baf19f18963f": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "358cb048c5a04a7cb2e9de91017baf01": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } } } } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "\"Open" ] }, { "cell_type": "markdown", "metadata": { "id": "Kw6MQx4xFjgy" }, "source": [ "__This notebook__ explains how to fine-tune GPT-2 Large on a large dataset in colab or on your home computer.\n", "\n", "To fit this task into a colab instance, we will use two tricks:\n", "* streaming the [C4 dataset](https://huggingface.co/datasets/c4) using [`datasets` Streaming API](https://huggingface.co/docs/datasets/dataset_streaming.html). Without that, C4 would take up over 300GB of disk space.\n", "* training with 8-Bit Adam from the [`bitsandbytes` library](https://github.com/facebookresearch/bitsandbytes). Without 8-bit compression, training GPT-2 Large would not fit in GPU memory.\n", "\n", "\n", "\n", "This notebook is based on the [\"fine-tune a language model\"](https://github.com/huggingface/notebooks/blob/master/examples/language_modeling.ipynb) tutorial by [Sylvain Gugger](https://sgugger.github.io/pages/about-me.html#about-me) as well as the [pytorch language-model example](https://github.com/huggingface/transformers/blob/master/examples/pytorch/language-modeling/run_clm_no_trainer.py)." ] }, { "cell_type": "markdown", "metadata": { "id": "-E5Z6CLC6UfJ" }, "source": [ "# Installation" ] }, { "cell_type": "markdown", "metadata": { "id": "X4cRE8IbIrIV" }, "source": [ "If you're opening this Notebook on colab, you will probably need to install 🤗 Transformers and 🤗 Datasets. Uncomment the following cell and run it." ] }, { "cell_type": "code", "metadata": { "id": "MOsHUjgdIrIW", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "4940dd79-5eed-4eaa-8ac4-55d6ee49a454" }, "source": [ " ! pip install datasets transformers" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Collecting datasets\n", " Downloading datasets-1.16.1-py3-none-any.whl (298 kB)\n", "\u001b[K |████████████████████████████████| 298 kB 5.3 MB/s \n", "\u001b[?25hCollecting transformers\n", " Downloading transformers-4.12.5-py3-none-any.whl (3.1 MB)\n", "\u001b[K |████████████████████████████████| 3.1 MB 34.1 MB/s \n", "\u001b[?25hCollecting xxhash\n", " Downloading xxhash-2.0.2-cp37-cp37m-manylinux2010_x86_64.whl (243 kB)\n", "\u001b[K |████████████████████████████████| 243 kB 52.1 MB/s \n", "\u001b[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (from datasets) (1.1.5)\n", "Collecting fsspec[http]>=2021.05.0\n", " Downloading fsspec-2021.11.1-py3-none-any.whl (132 kB)\n", "\u001b[K |████████████████████████████████| 132 kB 38.4 MB/s \n", "\u001b[?25hRequirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from datasets) (21.3)\n", "Requirement already satisfied: pyarrow!=4.0.0,>=3.0.0 in /usr/local/lib/python3.7/dist-packages (from datasets) (3.0.0)\n", "Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from datasets) (4.8.2)\n", "Collecting aiohttp\n", " Downloading aiohttp-3.8.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (1.1 MB)\n", "\u001b[K |████████████████████████████████| 1.1 MB 36.3 MB/s \n", "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from datasets) (1.19.5)\n", "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.7/dist-packages (from datasets) (4.62.3)\n", "Collecting huggingface-hub<1.0.0,>=0.1.0\n", " Downloading huggingface_hub-0.2.1-py3-none-any.whl (61 kB)\n", "\u001b[K |████████████████████████████████| 61 kB 395 kB/s \n", "\u001b[?25hRequirement already satisfied: multiprocess in /usr/local/lib/python3.7/dist-packages (from datasets) (0.70.12.2)\n", "Requirement already satisfied: dill in /usr/local/lib/python3.7/dist-packages (from datasets) (0.3.4)\n", "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.7/dist-packages (from datasets) (2.23.0)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0.0,>=0.1.0->datasets) (3.10.0.2)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0.0,>=0.1.0->datasets) (3.4.0)\n", "Requirement already satisfied: pyyaml in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0.0,>=0.1.0->datasets) (3.13)\n", "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging->datasets) (3.0.6)\n", "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets) (2.10)\n", "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets) (3.0.4)\n", "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets) (1.24.3)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets) (2021.10.8)\n", "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (2019.12.20)\n", "Collecting tokenizers<0.11,>=0.10.1\n", " Downloading tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)\n", "\u001b[K |████████████████████████████████| 3.3 MB 33.4 MB/s \n", "\u001b[?25hCollecting pyyaml\n", " Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)\n", "\u001b[K |████████████████████████████████| 596 kB 35.6 MB/s \n", "\u001b[?25hCollecting sacremoses\n", " Downloading sacremoses-0.0.46-py3-none-any.whl (895 kB)\n", "\u001b[K |████████████████████████████████| 895 kB 42.3 MB/s \n", "\u001b[?25hCollecting aiosignal>=1.1.2\n", " Downloading aiosignal-1.2.0-py3-none-any.whl (8.2 kB)\n", "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets) (21.2.0)\n", "Collecting multidict<7.0,>=4.5\n", " Downloading multidict-5.2.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (160 kB)\n", "\u001b[K |████████████████████████████████| 160 kB 49.5 MB/s \n", "\u001b[?25hCollecting frozenlist>=1.1.1\n", " Downloading frozenlist-1.2.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (192 kB)\n", "\u001b[K |████████████████████████████████| 192 kB 52.2 MB/s \n", "\u001b[?25hRequirement already satisfied: charset-normalizer<3.0,>=2.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets) (2.0.8)\n", "Collecting yarl<2.0,>=1.0\n", " Downloading yarl-1.7.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (271 kB)\n", "\u001b[K |████████████████████████████████| 271 kB 55.7 MB/s \n", "\u001b[?25hCollecting asynctest==0.13.0\n", " Downloading asynctest-0.13.0-py3-none-any.whl (26 kB)\n", "Collecting async-timeout<5.0,>=4.0.0a3\n", " Downloading async_timeout-4.0.1-py3-none-any.whl (5.7 kB)\n", "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->datasets) (3.6.0)\n", "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas->datasets) (2.8.2)\n", "Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/dist-packages (from pandas->datasets) (2018.9)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas->datasets) (1.15.0)\n", "Requirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (7.1.2)\n", "Requirement already satisfied: joblib in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.1.0)\n", "Installing collected packages: multidict, frozenlist, yarl, asynctest, async-timeout, aiosignal, pyyaml, fsspec, aiohttp, xxhash, tokenizers, sacremoses, huggingface-hub, transformers, datasets\n", " Attempting uninstall: pyyaml\n", " Found existing installation: PyYAML 3.13\n", " Uninstalling PyYAML-3.13:\n", " Successfully uninstalled PyYAML-3.13\n", "Successfully installed aiohttp-3.8.1 aiosignal-1.2.0 async-timeout-4.0.1 asynctest-0.13.0 datasets-1.16.1 frozenlist-1.2.0 fsspec-2021.11.1 huggingface-hub-0.2.1 multidict-5.2.0 pyyaml-6.0 sacremoses-0.0.46 tokenizers-0.10.3 transformers-4.12.5 xxhash-2.0.2 yarl-1.7.2\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "Aewv5KT-JnHz" }, "source": [ "\n", "We are also installing bitsandbytes which depends on the CUDA version run by your colab. The installed CUDA version is displayed in the top right when calling nvidia-smi. Use this version to install the right bitsandbytes version below. We need a GPU for this, so if you have not yet a GPU loaded use Runtime-> Change runtime type -> select GPU from the dropdown." ] }, { "cell_type": "code", "metadata": { "id": "XprTmlZuJpXL", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "c7343f62-43ef-41f2-972d-6285793fecba" }, "source": [ "! nvidia-smi\n", "! pip install bitsandbytes-cuda112" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Sat Dec 4 19:08:21 2021 \n", "+-----------------------------------------------------------------------------+\n", "| NVIDIA-SMI 495.44 Driver Version: 460.32.03 CUDA Version: 11.2 |\n", "|-------------------------------+----------------------+----------------------+\n", "| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n", "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n", "| | | MIG M. |\n", "|===============================+======================+======================|\n", "| 0 Tesla K80 Off | 00000000:00:04.0 Off | 0 |\n", "| N/A 36C P8 28W / 149W | 0MiB / 11441MiB | 0% Default |\n", "| | | N/A |\n", "+-------------------------------+----------------------+----------------------+\n", " \n", "+-----------------------------------------------------------------------------+\n", "| Processes: |\n", "| GPU GI CI PID Type Process name GPU Memory |\n", "| ID ID Usage |\n", "|=============================================================================|\n", "| No running processes found |\n", "+-----------------------------------------------------------------------------+\n", "Collecting bitsandbytes-cuda112\n", " Downloading bitsandbytes_cuda112-0.26.0-py3-none-any.whl (4.2 MB)\n", "\u001b[K |████████████████████████████████| 4.2 MB 5.4 MB/s \n", "\u001b[?25hInstalling collected packages: bitsandbytes-cuda112\n", "Successfully installed bitsandbytes-cuda112-0.26.0\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "ZyC8zpEqGddx" }, "source": [ "To test the bitsandbytes installation we can run a simple update with 8-bit Adam." ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "A8HODqE2E_6N", "outputId": "e1888813-c491-4d02-baa9-7e0e312d2a36" }, "source": [ "import bitsandbytes as bnb\n", "import torch\n", "\n", "p = torch.nn.Parameter(torch.rand(10,10).cuda())\n", "a = torch.rand(10,10).cuda()\n", "\n", "p1 = p.data.sum().item()\n", "\n", "adam = bnb.optim.Adam8bit([p])\n", "\n", "out = a*p\n", "loss = out.sum()\n", "loss.backward()\n", "adam.step()\n", "\n", "p2 = p.data.sum().item()\n", "\n", "assert p1 != p2\n", "print('SUCCESS!')\n", "print('Installation was successful!')" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "SUCCESS!\n", "Installation was successful!\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "GDkUr_zQBoZR" }, "source": [ "# Dataset Streaming" ] }, { "cell_type": "markdown", "metadata": { "id": "i4UPzShfBrgr" }, "source": [ "Pre-training often requires a huge text dataset. Some famous datasets for pre-training are [C4](https://huggingface.co/datasets/c4), its multilingual version [mC4](https://huggingface.co/datasets/mc4), as well as [OSCAR](https://huggingface.co/datasets/oscar).\n", "\n", "These datasets can be terabytes of data and require a lot of resources:\n", "- a good bandwidth to download all the data\n", "- terabytes of disk space to store the data\n", "- dozens of CPUs and a good infrastructure to tokenize the text dataset\n", "- lots of time to wait for the tokenization to happen\n", "\n", "This makes it very impractical to get your hands on a dataset for pretraining when you have limited resources." ] }, { "cell_type": "markdown", "metadata": { "id": "6xabIliEFxyo" }, "source": [ "Dataset streaming is the solution in this case. Streaming allows to simply have access to the very small subset of the dataset that you need at any time during training. Text samples are progressively downloaded during training, and processed on-the-fly.\n", "\n", "Thanks to dataset streaming:\n", "- training can start directly without waiting for terabytes of data to be downloaded\n", "- you can use an arbitrarily large dataset, without being constrained by your disk space\n", "- you can process the batches of text as they arrive with a regular CPU\n", "- you don't waste time processing examples that are not immediately needed for training" ] }, { "cell_type": "markdown", "metadata": { "id": "9JIEFS8nISgn" }, "source": [ "Dataset streaming is available in [Hugging Face Datasets](https://github.com/huggingface/datasets) - you simply need to pass `streaming=True` when loading a dataset, and it can be used with a PyTorch data loader.\n", "\n", "![dataset streaming](https://huggingface.co/docs/datasets/_images/stream.gif \"Dataset Streaming\")\n", "\n", "Hugging face Datasets also allows you to process examples on-the-fly via `.map()` and shuffle the dataset with `.shuffle()`.\n", "\n", "Here is an example on how to load the C4 dataset in streaming mode:" ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 183, "referenced_widgets": [ "372609dca95b4ddcb51491283df860f5", "e0b881dd26d54c7c92ba9ab5923fab10", "90eb62f7ec634e098db511a5995d807f", "d60a799761f649378d17a044362e55b9", "a76cf6149c6748a5aa74ada58921d31e", "5256dfd69e364597a29b2ad61c01ea93", "1a231c5cffbb4225941d02cb5e3bb273", "cffbeabee69446c48dfa89ac38d9f45e", "0c02af4a252e40fba08c097f59926dc8", "2917b5df9cc14ec3a2fe356c12c8511e", "5c3abd7a7f354ac0b1cd3d89506f417a", "8714bb6e944345b98b691f40adf0bf76", "a2983efe78b94919891d6db909100934", "b61fb9c3745f4c468954713b07f8f16f", "5f05edadc8c943aa82a790e815253a48", "867c7dd23eb64a1b8d02a7cf8a4ad64a", "01445fea254a436fa464b6006f3abd92", "74c4d6d598bf4173999a56fa849506d3", "e299d23c03444805b0a359eb049cddb3", "c2f4080d692a46debfb5a8000d2637d6", "b0cbc589d149494cbb74139bedb0aafc", "b3fec901d0cc48a2862b900928b681f3" ] }, "id": "N4E5ZT6rJnm7", "outputId": "f7c35c58-83c5-4cd4-8032-059c6da674a3" }, "source": [ "from datasets import load_dataset\n", "\n", "c4 = load_dataset(\"c4\", \"en\", streaming=True)\n", "\n", "# Let's print a few examples\n", "for i, example in enumerate(c4[\"train\"]):\n", " print(f\"{i}: {str(example)[:200]}...\")\n", " if i == 5:\n", " break\n" ], "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "372609dca95b4ddcb51491283df860f5", "version_minor": 0, "version_major": 2 }, "text/plain": [ "Downloading: 0%| | 0.00/1.38k [00:00=1.16.1\", \"To fix: pip install -r examples/pytorch/language-modeling/requirements.txt\")\n", "\n", "MODEL_CONFIG_CLASSES = list(MODEL_MAPPING.keys())\n", "MODEL_TYPES = tuple(conf.model_type for conf in MODEL_CONFIG_CLASSES)\n", "\n", "\n", "def parse_args():\n", " parser = argparse.ArgumentParser(description=\"Finetune a transformers model on a causal language modeling task\")\n", " parser.add_argument(\n", " \"--dataset_name\",\n", " type=str,\n", " default=None,\n", " help=\"The name of the dataset to use (via the datasets library).\",\n", " )\n", " parser.add_argument(\n", " \"--dataset_config_name\",\n", " type=str,\n", " default=None,\n", " help=\"The configuration name of the dataset to use (via the datasets library).\",\n", " )\n", " parser.add_argument(\n", " \"--text_column_name\",\n", " type=str,\n", " default=None,\n", " help=\"The name of the column containing the text data.\",\n", " )\n", " parser.add_argument(\n", " \"--dataset_streaming\",\n", " action=\"store_true\",\n", " help=\"If passed, will use dataset streaming (via the datasets library)\",\n", " )\n", " parser.add_argument(\n", " \"--model_name_or_path\",\n", " type=str,\n", " help=\"Path to pretrained model or model identifier from huggingface.co/models.\",\n", " required=False,\n", " )\n", " parser.add_argument(\n", " \"--config_name\",\n", " type=str,\n", " default=None,\n", " help=\"Pretrained config name or path if not the same as model_name\",\n", " )\n", " parser.add_argument(\n", " \"--tokenizer_name\",\n", " type=str,\n", " default=None,\n", " help=\"Pretrained tokenizer name or path if not the same as model_name\",\n", " )\n", " parser.add_argument(\n", " \"--use_slow_tokenizer\",\n", " action=\"store_true\",\n", " help=\"If passed, will use a slow tokenizer (not backed by the 🤗 Tokenizers library).\",\n", " )\n", " parser.add_argument(\n", " \"--per_device_train_batch_size\",\n", " type=int,\n", " default=1,\n", " help=\"Batch size (per device) for the training dataloader.\",\n", " )\n", " parser.add_argument(\n", " \"--learning_rate\",\n", " type=float,\n", " default=5e-5,\n", " help=\"Initial learning rate (after the potential warmup period) to use.\",\n", " )\n", " parser.add_argument(\"--weight_decay\", type=float, default=0.0, help=\"Weight decay to use.\")\n", " parser.add_argument(\"--num_train_epochs\", type=int, default=1, help=\"Total number of training epochs to perform.\")\n", " parser.add_argument(\n", " \"--max_train_steps\",\n", " type=int,\n", " default=None,\n", " help=\"Total number of training steps to perform. If provided, overrides num_train_epochs.\",\n", " )\n", " parser.add_argument(\n", " \"--gradient_accumulation_steps\",\n", " type=int,\n", " default=1,\n", " help=\"Number of updates steps to accumulate before performing a backward/update pass.\",\n", " )\n", " parser.add_argument(\n", " \"--lr_scheduler_type\",\n", " type=SchedulerType,\n", " default=\"linear\",\n", " help=\"The scheduler type to use.\",\n", " choices=[\"linear\", \"cosine\", \"cosine_with_restarts\", \"polynomial\", \"constant\", \"constant_with_warmup\"],\n", " )\n", " parser.add_argument(\n", " \"--num_warmup_steps\", type=int, default=3000, help=\"Number of steps for the warmup in the lr scheduler.\"\n", " )\n", " parser.add_argument(\"--output_dir\", type=str, default=None, help=\"Where to store the final model.\")\n", " parser.add_argument(\"--seed\", type=int, default=None, help=\"A seed for reproducible training.\")\n", " parser.add_argument(\n", " \"--model_type\",\n", " type=str,\n", " default=None,\n", " help=\"Model type to use if training from scratch.\",\n", " choices=MODEL_TYPES,\n", " )\n", " parser.add_argument(\n", " \"--block_size\",\n", " type=int,\n", " default=None,\n", " help=\"Optional input sequence length after tokenization. The training dataset will be truncated in block of this size for training. Default to the model max input length for single sentence inputs (take into account special tokens).\",\n", " )\n", " parser.add_argument(\n", " \"--preprocessing_num_workers\",\n", " type=int,\n", " default=None,\n", " help=\"The number of processes to use for the preprocessing.\",\n", " )\n", " parser.add_argument(\n", " \"--overwrite_cache\", type=bool, default=False, help=\"Overwrite the cached training and evaluation sets\"\n", " )\n", " parser.add_argument(\n", " \"--no_keep_linebreaks\", action=\"store_true\", help=\"Do not keep line breaks when using TXT files.\"\n", " )\n", " parser.add_argument(\"--push_to_hub\", action=\"store_true\", help=\"Whether or not to push the model to the Hub.\")\n", " parser.add_argument(\n", " \"--hub_model_id\", type=str, help=\"The name of the repository to keep in sync with the local `output_dir`.\"\n", " )\n", " parser.add_argument(\"--hub_token\", type=str, help=\"The token to use to push to the Model Hub.\")\n", " args = parser.parse_args(args=[])\n", "\n", " if args.push_to_hub:\n", " assert args.output_dir is not None, \"Need an `output_dir` to create a repo when `--push_to_hub` is passed.\"\n", "\n", " return args\n" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "4QqgoTBcLzqq" }, "source": [ "We setup the streaming dataset, the tokenizer, and the model (GPT-2 medium)." ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1000, "referenced_widgets": [ "619629ebf5fc4e3ba9ad49e7e767a37d", "87a5f7bca18e4a818d6d48e15bc68845", "75d276bd972d429483af64d1eb27624c", "6f778a9c54f042199869eb16563bb933", "275f3cf6de1f49c1ae1f09991b4ea99c", "670d440077b34a8e86bf2d620ab6fb6d", "ac7455b1e5e5475cabf4dab505603a43", "4e82c37304d446d88e852553cbe9acb9", "213fc01fa39b4b77ba211a9304c5ea86", "a9dae405fbce41e3a46b05292313bab0", "272807a127d54f5ea3f20c0bc7262a25", "1ce665de582c4bd392d6bc4fff9a1499", "36cdf4ebc5684e9e88d80d5c98d86154", "288d1670c87e469ba3fe7c0030887d19", "eeb0bf36fb93410f82ab96b5414b6c50", "3c8d0050e0904acf933d18316000ac8a", "3be14001505b4dd19293365cb4a52cbc", "4195ef4de4ba491995e28b72e4d82a5b", "cafc6a76a9ac4de79b46969ebab90265", "9e6e0d9075454e17932e147678c1cef6", "7c83e50b67374fa5a2d713c036ba8e84", "0e0d370b48c84349a7f7cc45ed5f9d09", "2222296ad8d14cd39e8af17c108ad5ed", "e39daeca11584766a42f9bc6df76089c", "8fad1d6738b84704a3b4d248dbcfac2b", "405c42d0df19419e8563b00a3f402ae9", "7e2ceabdd41446cc860e35f94e02de31", "5b2f0da9535f46d78999e11162e16666", "d47fc8e1a9a84efcb6d9a02ba973e940", "a127db6cdeae4700834c1dc582ecb609", "b9a0b5fe13134774a7560a583513c7cd", "66d58d9dab054294a81601572b143c00", "75f479bbe938484891214f27986364eb", "05c7977a9322499bbc00e80f0d767bee", "9a76a0ee943343d781caf5b30ce0c6a5", "f454951d36c74e13bc3cf04b41388345", "1974ea03fa42428fa162eac65f8d71e4", "96cdea5bf8ba44f3a37e5e1f70cbef62", "221d95935ae249bb8fbf60007ed39e82", "0a5ca2e268c24ae1a9e42d5c9dcadb29", "997827c35e02494d82209fa5f1232e5f", "aaf3281319a94b42ba21941f41d262bc", "ffa4cf74c0c84e0797de60e50e1f579c", "d60228cfbd9f44ddbcefb20a74d99779", "8bff97fa5703438685342e1b140234f5", "542ccbd2abf941fea5312b72c51ec5b6", "f71eb8c60e4b4ef6b44b9f6b90bf25c5", "1355930a0edf40e5ac75198e9d901bfb", "ce047a309f984458859ddfc15aad1125", "c82b029db5a547f09d1b9607a5d26ecc", "01312ccfa1254c7a8acc6a7410af711a", "7db4a94408b94d71ab9f6fccded0f011", "356eaaf643894327a0ba30049c3a2b92", "874af3844342411096d6d8872354f531", "fd8cb9272f3d4ff9ae673c724a6b757b" ] }, "id": "PSzJDsewLPEi", "outputId": "f956fc65-1210-4db5-a2ea-fc482f357eb2" }, "source": [ "args = parse_args() # get default arguments\n", "\n", "# If passed along, set the training seed now.\n", "if args.seed is not None:\n", " set_seed(args.seed)\n", "\n", "args.dataset_name = 'c4'\n", "args.dataset_streaming = True\n", "args.text_column_name = \"text\"\n", "args.model_name_or_path = 'gpt2-large'\n", "args.dataset_config_name = \"en\"\n", "args.block_size = 1024\n", "args.max_train_steps = 1_000_000\n", "args.log_loss_interval = 25\n", "\n", "\n", "# LOAD DATA\n", "raw_train_dataset = load_dataset(args.dataset_name, args.dataset_config_name, streaming=args.dataset_streaming, split=\"train\")\n", "\n", "if args.config_name:\n", " config = AutoConfig.from_pretrained(args.config_name)\n", "elif args.model_name_or_path:\n", " config = AutoConfig.from_pretrained(args.model_name_or_path)\n", "else:\n", " config = CONFIG_MAPPING[args.model_type]()\n", " logger.warning(\"You are instantiating a new config instance from scratch.\")\n", "\n", "# TOKENIZER\n", "if args.tokenizer_name:\n", " tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_name, use_fast=not args.use_slow_tokenizer)\n", "elif args.model_name_or_path:\n", " tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path, use_fast=not args.use_slow_tokenizer)\n", "else:\n", " raise ValueError(\n", " \"You are instantiating a new tokenizer from scratch. This is not supported by this script.\"\n", " \"You can do it from another script, save it, and load it from here, using --tokenizer_name.\"\n", " )\n", "\n", "if args.model_name_or_path:\n", " model = AutoModelForCausalLM.from_pretrained(\n", " args.model_name_or_path,\n", " from_tf=bool(\".ckpt\" in args.model_name_or_path),\n", " config=config,\n", " )\n", "else:\n", " logger.info(\"Training new model from scratch\")\n", " model = AutoModelForCausalLM.from_config(config)\n", "\n", "model.resize_token_embeddings(len(tokenizer))\n", "\n", "model.gradient_checkpointing_enable()\n", "model.cuda() # send model to cuda preemptively to free RAM. Yep, we're using GPU as an offload memory. O tempora! O mores!" ], "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "619629ebf5fc4e3ba9ad49e7e767a37d", "version_minor": 0, "version_major": 2 }, "text/plain": [ "Downloading: 0%| | 0.00/666 [00:00= block_size:\n", " total_length = (total_length // block_size) * block_size\n", " # Split by chunks of max_len.\n", " result = {\n", " k: [t[i : i + block_size] for i in range(0, total_length, block_size)]\n", " for k, t in concatenated_examples.items()\n", " }\n", " result[\"labels\"] = result[\"input_ids\"].copy()\n", " return result\n", "\n", "tokenized_train_dataset = raw_train_dataset.shuffle(10_000, seed=42).map(tokenize_function, batched=True)\n", "\n", "if args.block_size is None:\n", " block_size = tokenizer.model_max_length\n", " if block_size > 1024:\n", " logger.warning(\n", " f\"The tokenizer picked seems to have a very large `model_max_length` ({tokenizer.model_max_length}). \"\n", " \"Picking 1024 instead. You can change that default value by passing --block_size xxx.\"\n", " )\n", " block_size = 1024\n", "else:\n", " if args.block_size > tokenizer.model_max_length:\n", " logger.warning(\n", " f\"The block_size passed ({args.block_size}) is larger than the maximum length for the model\"\n", " f\"({tokenizer.model_max_length}). Using block_size={tokenizer.model_max_length}.\"\n", " )\n", " block_size = min(args.block_size, tokenizer.model_max_length)\n", "\n", "# Note that with `batched=True`, this map processes 1,000 texts together, so group_texts throws away a remainder\n", "# for each of those groups of 1,000 texts. You can adjust that batch_size here but a higher value might be slower\n", "# to preprocess.\n", "train_dataset = tokenized_train_dataset.map(group_texts, batched=True)\n", "train_dataset = train_dataset.shuffle(10_000, seed=42).with_format(\"torch\")\n", "\n", "# DataLoaders creation:\n", "train_dataloader = DataLoader(\n", " train_dataset, collate_fn=default_data_collator, batch_size=args.per_device_train_batch_size\n", ")" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "E0Wl7_Vx8HiP" }, "source": [ "## 8-bit Optimizers" ] }, { "cell_type": "markdown", "metadata": { "id": "l3Z-4DmQ_etm" }, "source": [ "\n", "In this example, we fine-tune GPT-2 medium with a sequence dimension of 768 which runs out of memory. How can we fit this model on a colab GPU with 12 GB of memory? One solution is to use 8-bit optimizers.\n", "\n", "8-bit opitimizers decrease the memory footprint for training models by compressing and storing the optimizer statistics for optimizers. For Adam, there are two optimizer buffers, one for an estimate of the running mean of the gradient and one for the standard deviation. Each of the buffers has the size of the full model, as such, the Adam optimizers uses 2x more memory than the model itself. With 8-bit optimizers we reduce that from 32-bit to 8-bit thus reducing the memory due to Adam from 2x the model size to 0.5x the model size -- a reduction by 75%.\n", "\n", "8-bit optimizers work by using dynamic quantization and block-wise quantization to ensure stable training and the same performance as 32-bit optimizers while achieving the 75% reduction in memory.\n", "\n", "8-bit optimizers work as follows\n", "1. Chunk optimizer states into blocks\n", "2. Normalize each block into the range [-1, 1] by dividing by the absmax of the block\n", "3. Perform dynamic quantization\n", "4. Store 8-bit data\n", "\n", "For dequantization we reverse these steps. These steps are demonstrated by the example below:\n", "\n", " ![Schematic of 8-bit optimizers](https://timdettmers.com/wp-content/uploads/2021/12/schematic2.svg)\n", "\n" ] }, { "cell_type": "code", "metadata": { "id": "tpoSBTsR_n7n" }, "source": [ "import bitsandbytes as bnb\n", "#optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) # this crashes with out-of-memory error\n", "optimizer = bnb.optim.Adam8bit(model.parameters(), lr=args.learning_rate)" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "q0MHy1x07_Ac" }, "source": [ "## Train the model" ] }, { "cell_type": "markdown", "metadata": { "id": "4pOO1F7b_-f9" }, "source": [ "Now we are training the model with dataset streaming and 8-bit optimizers." ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 909, "referenced_widgets": [ "6e2b38e0faf64f529e849f37ad4d4eab", "ba6e9cadc3274a64ae0738fb99c55860", "62edc38a7caf4feca6db6de2d767fbb3", "155616f4494440b291ae4722aaeca750", "2045670c97194bd68a398213552d8364", "1ca831c613424bea8f42c4a89f44b2b5", "b352385cb0664a8b873f5028a728c4e3", "085e99b8131f4f8bb65f2ed723a21b48", "d1ec06e6abca4b35b8e22856bcc9c5ce", "565f3cec67374748bdc8baf19f18963f", "358cb048c5a04a7cb2e9de91017baf01" ] }, "id": "ONzFs98p__5M", "outputId": "ac4fb040-a3f3-41aa-cf8d-9699e621faad" }, "source": [ "lr_scheduler = get_scheduler(\n", " name=args.lr_scheduler_type,\n", " optimizer=optimizer,\n", " num_warmup_steps=args.num_warmup_steps,\n", " num_training_steps=args.max_train_steps,\n", ")\n", "# Train!\n", "total_batch_size = args.per_device_train_batch_size * args.gradient_accumulation_steps\n", "# Only show the progress bar once on each machine.\n", "progress_bar = tqdm(range(args.max_train_steps), disable=False)\n", "completed_steps = 0\n", "\n", "def get_free_mem():\n", " t = torch.cuda.get_device_properties(0).total_memory\n", " r = torch.cuda.memory_reserved(0)\n", " a = torch.cuda.memory_allocated(0)\n", " f = r - a\n", " return f/1024**3, r/1024**3, a/1024**3\n", "\n", "for epoch in range(args.num_train_epochs):\n", " model.train()\n", " losses = []\n", " for step, batch in enumerate(train_dataloader):\n", " gpu_data = {}\n", " for key, value in batch.items():\n", " gpu_data[key] = value.cuda()\n", "\n", " outputs = model(**gpu_data, use_cache=False)\n", " loss = outputs.loss\n", " losses.append(loss.item())\n", " loss = loss / args.gradient_accumulation_steps\n", " loss.backward()\n", " if step % args.gradient_accumulation_steps == 0 or step == args.max_train_steps:\n", " optimizer.step()\n", " lr_scheduler.step()\n", " optimizer.zero_grad()\n", " progress_bar.update(1)\n", " completed_steps += 1\n", "\n", " if step % args.log_loss_interval == 0 and step > 0:\n", " try:\n", " perplexity = math.exp(sum(losses)/len(losses))\n", " except OverflowError:\n", " perplexity = float(\"inf\")\n", " losses = []\n", " print(f\"epoch: {epoch+1}, step: {step}, perplexity: {perplexity}\")\n", " \n", "\n", " if completed_steps >= args.max_train_steps:\n", " break\n", "\n" ], "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "6e2b38e0faf64f529e849f37ad4d4eab", "version_minor": 0, "version_major": 2 }, "text/plain": [ " 0%| | 0/1000000 [00:00 1024). Running this sequence through the model will result in indexing errors\n" ] }, { "output_type": "stream", "name": "stdout", "text": [ "epoch: 1, step: 25, perplexity: 21.84923336079448\n" ] }, { "output_type": "stream", "name": "stderr", "text": [ ] } ] } ] }