diff --git "a/efficient_training.ipynb" "b/efficient_training.ipynb" new file mode 100644--- /dev/null +++ "b/efficient_training.ipynb" @@ -0,0 +1,4371 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "Fine-tune a language model with dataset streaming and 8-bit optimizers", + "provenance": [], + "collapsed_sections": [], + "include_colab_link": true + }, + "language_info": { + "name": "python" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "accelerator": "GPU", + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "372609dca95b4ddcb51491283df860f5": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_e0b881dd26d54c7c92ba9ab5923fab10", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_90eb62f7ec634e098db511a5995d807f", + "IPY_MODEL_d60a799761f649378d17a044362e55b9", + "IPY_MODEL_a76cf6149c6748a5aa74ada58921d31e" + ] + } + }, + "e0b881dd26d54c7c92ba9ab5923fab10": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "90eb62f7ec634e098db511a5995d807f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_5256dfd69e364597a29b2ad61c01ea93", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": "Downloading: ", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_1a231c5cffbb4225941d02cb5e3bb273" + } + }, + "d60a799761f649378d17a044362e55b9": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_cffbeabee69446c48dfa89ac38d9f45e", + "_dom_classes": [], + "description": "", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 1376, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 1376, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_0c02af4a252e40fba08c097f59926dc8" + } + }, + "a76cf6149c6748a5aa74ada58921d31e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_2917b5df9cc14ec3a2fe356c12c8511e", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 3.29k/? [00:00<00:00, 76.9kB/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_5c3abd7a7f354ac0b1cd3d89506f417a" + } + }, + "5256dfd69e364597a29b2ad61c01ea93": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "1a231c5cffbb4225941d02cb5e3bb273": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "cffbeabee69446c48dfa89ac38d9f45e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "0c02af4a252e40fba08c097f59926dc8": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "2917b5df9cc14ec3a2fe356c12c8511e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "5c3abd7a7f354ac0b1cd3d89506f417a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "8714bb6e944345b98b691f40adf0bf76": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_a2983efe78b94919891d6db909100934", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_b61fb9c3745f4c468954713b07f8f16f", + "IPY_MODEL_5f05edadc8c943aa82a790e815253a48", + "IPY_MODEL_867c7dd23eb64a1b8d02a7cf8a4ad64a" + ] + } + }, + "a2983efe78b94919891d6db909100934": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "b61fb9c3745f4c468954713b07f8f16f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_01445fea254a436fa464b6006f3abd92", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": "Downloading: ", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_74c4d6d598bf4173999a56fa849506d3" + } + }, + "5f05edadc8c943aa82a790e815253a48": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_e299d23c03444805b0a359eb049cddb3", + "_dom_classes": [], + "description": "", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 492167, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 492167, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_c2f4080d692a46debfb5a8000d2637d6" + } + }, + "867c7dd23eb64a1b8d02a7cf8a4ad64a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_b0cbc589d149494cbb74139bedb0aafc", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 2.40M/? [00:00<00:00, 18.6MB/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_b3fec901d0cc48a2862b900928b681f3" + } + }, + "01445fea254a436fa464b6006f3abd92": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "74c4d6d598bf4173999a56fa849506d3": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "e299d23c03444805b0a359eb049cddb3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "c2f4080d692a46debfb5a8000d2637d6": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "b0cbc589d149494cbb74139bedb0aafc": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "b3fec901d0cc48a2862b900928b681f3": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "619629ebf5fc4e3ba9ad49e7e767a37d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_87a5f7bca18e4a818d6d48e15bc68845", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_75d276bd972d429483af64d1eb27624c", + "IPY_MODEL_6f778a9c54f042199869eb16563bb933", + "IPY_MODEL_275f3cf6de1f49c1ae1f09991b4ea99c" + ] + } + }, + "87a5f7bca18e4a818d6d48e15bc68845": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "75d276bd972d429483af64d1eb27624c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_670d440077b34a8e86bf2d620ab6fb6d", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": "Downloading: 100%", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_ac7455b1e5e5475cabf4dab505603a43" + } + }, + "6f778a9c54f042199869eb16563bb933": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_4e82c37304d446d88e852553cbe9acb9", + "_dom_classes": [], + "description": "", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 666, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 666, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_213fc01fa39b4b77ba211a9304c5ea86" + } + }, + "275f3cf6de1f49c1ae1f09991b4ea99c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_a9dae405fbce41e3a46b05292313bab0", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 666/666 [00:00<00:00, 15.4kB/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_272807a127d54f5ea3f20c0bc7262a25" + } + }, + "670d440077b34a8e86bf2d620ab6fb6d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "ac7455b1e5e5475cabf4dab505603a43": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "4e82c37304d446d88e852553cbe9acb9": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "213fc01fa39b4b77ba211a9304c5ea86": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "a9dae405fbce41e3a46b05292313bab0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "272807a127d54f5ea3f20c0bc7262a25": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "1ce665de582c4bd392d6bc4fff9a1499": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_36cdf4ebc5684e9e88d80d5c98d86154", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_288d1670c87e469ba3fe7c0030887d19", + "IPY_MODEL_eeb0bf36fb93410f82ab96b5414b6c50", + "IPY_MODEL_3c8d0050e0904acf933d18316000ac8a" + ] + } + }, + "36cdf4ebc5684e9e88d80d5c98d86154": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "288d1670c87e469ba3fe7c0030887d19": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_3be14001505b4dd19293365cb4a52cbc", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": "Downloading: 100%", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_4195ef4de4ba491995e28b72e4d82a5b" + } + }, + "eeb0bf36fb93410f82ab96b5414b6c50": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_cafc6a76a9ac4de79b46969ebab90265", + "_dom_classes": [], + "description": "", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 1042301, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 1042301, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_9e6e0d9075454e17932e147678c1cef6" + } + }, + "3c8d0050e0904acf933d18316000ac8a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_7c83e50b67374fa5a2d713c036ba8e84", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 0.99M/0.99M [00:00<00:00, 1.42MB/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_0e0d370b48c84349a7f7cc45ed5f9d09" + } + }, + "3be14001505b4dd19293365cb4a52cbc": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "4195ef4de4ba491995e28b72e4d82a5b": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "cafc6a76a9ac4de79b46969ebab90265": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "9e6e0d9075454e17932e147678c1cef6": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "7c83e50b67374fa5a2d713c036ba8e84": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "0e0d370b48c84349a7f7cc45ed5f9d09": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "2222296ad8d14cd39e8af17c108ad5ed": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_e39daeca11584766a42f9bc6df76089c", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_8fad1d6738b84704a3b4d248dbcfac2b", + "IPY_MODEL_405c42d0df19419e8563b00a3f402ae9", + "IPY_MODEL_7e2ceabdd41446cc860e35f94e02de31" + ] + } + }, + "e39daeca11584766a42f9bc6df76089c": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "8fad1d6738b84704a3b4d248dbcfac2b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_5b2f0da9535f46d78999e11162e16666", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": "Downloading: 100%", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_d47fc8e1a9a84efcb6d9a02ba973e940" + } + }, + "405c42d0df19419e8563b00a3f402ae9": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_a127db6cdeae4700834c1dc582ecb609", + "_dom_classes": [], + "description": "", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 456318, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 456318, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_b9a0b5fe13134774a7560a583513c7cd" + } + }, + "7e2ceabdd41446cc860e35f94e02de31": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_66d58d9dab054294a81601572b143c00", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 446k/446k [00:00<00:00, 1.46MB/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_75f479bbe938484891214f27986364eb" + } + }, + "5b2f0da9535f46d78999e11162e16666": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "d47fc8e1a9a84efcb6d9a02ba973e940": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "a127db6cdeae4700834c1dc582ecb609": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "b9a0b5fe13134774a7560a583513c7cd": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "66d58d9dab054294a81601572b143c00": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "75f479bbe938484891214f27986364eb": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "05c7977a9322499bbc00e80f0d767bee": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_9a76a0ee943343d781caf5b30ce0c6a5", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_f454951d36c74e13bc3cf04b41388345", + "IPY_MODEL_1974ea03fa42428fa162eac65f8d71e4", + "IPY_MODEL_96cdea5bf8ba44f3a37e5e1f70cbef62" + ] + } + }, + "9a76a0ee943343d781caf5b30ce0c6a5": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "f454951d36c74e13bc3cf04b41388345": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_221d95935ae249bb8fbf60007ed39e82", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": "Downloading: 100%", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_0a5ca2e268c24ae1a9e42d5c9dcadb29" + } + }, + "1974ea03fa42428fa162eac65f8d71e4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_997827c35e02494d82209fa5f1232e5f", + "_dom_classes": [], + "description": "", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 1355256, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 1355256, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_aaf3281319a94b42ba21941f41d262bc" + } + }, + "96cdea5bf8ba44f3a37e5e1f70cbef62": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_ffa4cf74c0c84e0797de60e50e1f579c", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 1.29M/1.29M [00:00<00:00, 5.20MB/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_d60228cfbd9f44ddbcefb20a74d99779" + } + }, + "221d95935ae249bb8fbf60007ed39e82": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "0a5ca2e268c24ae1a9e42d5c9dcadb29": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "997827c35e02494d82209fa5f1232e5f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "aaf3281319a94b42ba21941f41d262bc": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "ffa4cf74c0c84e0797de60e50e1f579c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "d60228cfbd9f44ddbcefb20a74d99779": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "8bff97fa5703438685342e1b140234f5": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_542ccbd2abf941fea5312b72c51ec5b6", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_f71eb8c60e4b4ef6b44b9f6b90bf25c5", + "IPY_MODEL_1355930a0edf40e5ac75198e9d901bfb", + "IPY_MODEL_ce047a309f984458859ddfc15aad1125" + ] + } + }, + "542ccbd2abf941fea5312b72c51ec5b6": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "f71eb8c60e4b4ef6b44b9f6b90bf25c5": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_c82b029db5a547f09d1b9607a5d26ecc", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": "Downloading: 100%", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_01312ccfa1254c7a8acc6a7410af711a" + } + }, + "1355930a0edf40e5ac75198e9d901bfb": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_7db4a94408b94d71ab9f6fccded0f011", + "_dom_classes": [], + "description": "", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 3247202234, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 3247202234, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_356eaaf643894327a0ba30049c3a2b92" + } + }, + "ce047a309f984458859ddfc15aad1125": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_874af3844342411096d6d8872354f531", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 3.02G/3.02G [02:35<00:00, 10.9MB/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_fd8cb9272f3d4ff9ae673c724a6b757b" + } + }, + "c82b029db5a547f09d1b9607a5d26ecc": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "01312ccfa1254c7a8acc6a7410af711a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "7db4a94408b94d71ab9f6fccded0f011": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "356eaaf643894327a0ba30049c3a2b92": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "874af3844342411096d6d8872354f531": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "fd8cb9272f3d4ff9ae673c724a6b757b": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "6e2b38e0faf64f529e849f37ad4d4eab": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_ba6e9cadc3274a64ae0738fb99c55860", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_62edc38a7caf4feca6db6de2d767fbb3", + "IPY_MODEL_155616f4494440b291ae4722aaeca750", + "IPY_MODEL_2045670c97194bd68a398213552d8364" + ] + } + }, + "ba6e9cadc3274a64ae0738fb99c55860": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "62edc38a7caf4feca6db6de2d767fbb3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_1ca831c613424bea8f42c4a89f44b2b5", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 0%", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_b352385cb0664a8b873f5028a728c4e3" + } + }, + "155616f4494440b291ae4722aaeca750": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_085e99b8131f4f8bb65f2ed723a21b48", + "_dom_classes": [], + "description": "", + "_model_name": "FloatProgressModel", + "bar_style": "", + "max": 1000000, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 32, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_d1ec06e6abca4b35b8e22856bcc9c5ce" + } + }, + "2045670c97194bd68a398213552d8364": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_565f3cec67374748bdc8baf19f18963f", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 32/1000000 [03:13<1383:55:47, 4.98s/it]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_358cb048c5a04a7cb2e9de91017baf01" + } + }, + "1ca831c613424bea8f42c4a89f44b2b5": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "b352385cb0664a8b873f5028a728c4e3": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "085e99b8131f4f8bb65f2ed723a21b48": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "d1ec06e6abca4b35b8e22856bcc9c5ce": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "565f3cec67374748bdc8baf19f18963f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "358cb048c5a04a7cb2e9de91017baf01": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + } + } + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Kw6MQx4xFjgy" + }, + "source": [ + "__This notebook__ explains how to fine-tune GPT-2 Large on a large dataset in colab or on your home computer.\n", + "\n", + "To fit this task into a colab instance, we will use two tricks:\n", + "* streaming the [C4 dataset](https://huggingface.co/datasets/c4) using [`datasets` Streaming API](https://huggingface.co/docs/datasets/dataset_streaming.html). Without that, C4 would take up over 300GB of disk space.\n", + "* training with 8-Bit Adam from the [`bitsandbytes` library](https://github.com/facebookresearch/bitsandbytes). Without 8-bit compression, training GPT-2 Large would not fit in GPU memory.\n", + "\n", + "\n", + "\n", + "This notebook is based on the [\"fine-tune a language model\"](https://github.com/huggingface/notebooks/blob/master/examples/language_modeling.ipynb) tutorial by [Sylvain Gugger](https://sgugger.github.io/pages/about-me.html#about-me) as well as the [pytorch language-model example](https://github.com/huggingface/transformers/blob/master/examples/pytorch/language-modeling/run_clm_no_trainer.py)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-E5Z6CLC6UfJ" + }, + "source": [ + "# Installation" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "X4cRE8IbIrIV" + }, + "source": [ + "If you're opening this Notebook on colab, you will probably need to install 🤗 Transformers and 🤗 Datasets. Uncomment the following cell and run it." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "MOsHUjgdIrIW", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "4940dd79-5eed-4eaa-8ac4-55d6ee49a454" + }, + "source": [ + " ! pip install datasets transformers" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting datasets\n", + " Downloading datasets-1.16.1-py3-none-any.whl (298 kB)\n", + "\u001b[K |████████████████████████████████| 298 kB 5.3 MB/s \n", + "\u001b[?25hCollecting transformers\n", + " Downloading transformers-4.12.5-py3-none-any.whl (3.1 MB)\n", + "\u001b[K |████████████████████████████████| 3.1 MB 34.1 MB/s \n", + "\u001b[?25hCollecting xxhash\n", + " Downloading xxhash-2.0.2-cp37-cp37m-manylinux2010_x86_64.whl (243 kB)\n", + "\u001b[K |████████████████████████████████| 243 kB 52.1 MB/s \n", + "\u001b[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (from datasets) (1.1.5)\n", + "Collecting fsspec[http]>=2021.05.0\n", + " Downloading fsspec-2021.11.1-py3-none-any.whl (132 kB)\n", + "\u001b[K |████████████████████████████████| 132 kB 38.4 MB/s \n", + "\u001b[?25hRequirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from datasets) (21.3)\n", + "Requirement already satisfied: pyarrow!=4.0.0,>=3.0.0 in /usr/local/lib/python3.7/dist-packages (from datasets) (3.0.0)\n", + "Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from datasets) (4.8.2)\n", + "Collecting aiohttp\n", + " Downloading aiohttp-3.8.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (1.1 MB)\n", + "\u001b[K |████████████████████████████████| 1.1 MB 36.3 MB/s \n", + "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from datasets) (1.19.5)\n", + "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.7/dist-packages (from datasets) (4.62.3)\n", + "Collecting huggingface-hub<1.0.0,>=0.1.0\n", + " Downloading huggingface_hub-0.2.1-py3-none-any.whl (61 kB)\n", + "\u001b[K |████████████████████████████████| 61 kB 395 kB/s \n", + "\u001b[?25hRequirement already satisfied: multiprocess in /usr/local/lib/python3.7/dist-packages (from datasets) (0.70.12.2)\n", + "Requirement already satisfied: dill in /usr/local/lib/python3.7/dist-packages (from datasets) (0.3.4)\n", + "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.7/dist-packages (from datasets) (2.23.0)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0.0,>=0.1.0->datasets) (3.10.0.2)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0.0,>=0.1.0->datasets) (3.4.0)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0.0,>=0.1.0->datasets) (3.13)\n", + "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging->datasets) (3.0.6)\n", + "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets) (2.10)\n", + "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets) (3.0.4)\n", + "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets) (1.24.3)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets) (2021.10.8)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (2019.12.20)\n", + "Collecting tokenizers<0.11,>=0.10.1\n", + " Downloading tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)\n", + "\u001b[K |████████████████████████████████| 3.3 MB 33.4 MB/s \n", + "\u001b[?25hCollecting pyyaml\n", + " Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)\n", + "\u001b[K |████████████████████████████████| 596 kB 35.6 MB/s \n", + "\u001b[?25hCollecting sacremoses\n", + " Downloading sacremoses-0.0.46-py3-none-any.whl (895 kB)\n", + "\u001b[K |████████████████████████████████| 895 kB 42.3 MB/s \n", + "\u001b[?25hCollecting aiosignal>=1.1.2\n", + " Downloading aiosignal-1.2.0-py3-none-any.whl (8.2 kB)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets) (21.2.0)\n", + "Collecting multidict<7.0,>=4.5\n", + " Downloading multidict-5.2.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (160 kB)\n", + "\u001b[K |████████████████████████████████| 160 kB 49.5 MB/s \n", + "\u001b[?25hCollecting frozenlist>=1.1.1\n", + " Downloading frozenlist-1.2.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (192 kB)\n", + "\u001b[K |████████████████████████████████| 192 kB 52.2 MB/s \n", + "\u001b[?25hRequirement already satisfied: charset-normalizer<3.0,>=2.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets) (2.0.8)\n", + "Collecting yarl<2.0,>=1.0\n", + " Downloading yarl-1.7.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (271 kB)\n", + "\u001b[K |████████████████████████████████| 271 kB 55.7 MB/s \n", + "\u001b[?25hCollecting asynctest==0.13.0\n", + " Downloading asynctest-0.13.0-py3-none-any.whl (26 kB)\n", + "Collecting async-timeout<5.0,>=4.0.0a3\n", + " Downloading async_timeout-4.0.1-py3-none-any.whl (5.7 kB)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->datasets) (3.6.0)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas->datasets) (2.8.2)\n", + "Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/dist-packages (from pandas->datasets) (2018.9)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas->datasets) (1.15.0)\n", + "Requirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (7.1.2)\n", + "Requirement already satisfied: joblib in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.1.0)\n", + "Installing collected packages: multidict, frozenlist, yarl, asynctest, async-timeout, aiosignal, pyyaml, fsspec, aiohttp, xxhash, tokenizers, sacremoses, huggingface-hub, transformers, datasets\n", + " Attempting uninstall: pyyaml\n", + " Found existing installation: PyYAML 3.13\n", + " Uninstalling PyYAML-3.13:\n", + " Successfully uninstalled PyYAML-3.13\n", + "Successfully installed aiohttp-3.8.1 aiosignal-1.2.0 async-timeout-4.0.1 asynctest-0.13.0 datasets-1.16.1 frozenlist-1.2.0 fsspec-2021.11.1 huggingface-hub-0.2.1 multidict-5.2.0 pyyaml-6.0 sacremoses-0.0.46 tokenizers-0.10.3 transformers-4.12.5 xxhash-2.0.2 yarl-1.7.2\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Aewv5KT-JnHz" + }, + "source": [ + "\n", + "We are also installing bitsandbytes which depends on the CUDA version run by your colab. The installed CUDA version is displayed in the top right when calling nvidia-smi. Use this version to install the right bitsandbytes version below. We need a GPU for this, so if you have not yet a GPU loaded use Runtime-> Change runtime type -> select GPU from the dropdown." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "XprTmlZuJpXL", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "c7343f62-43ef-41f2-972d-6285793fecba" + }, + "source": [ + "! nvidia-smi\n", + "! pip install bitsandbytes-cuda112" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Sat Dec 4 19:08:21 2021 \n", + "+-----------------------------------------------------------------------------+\n", + "| NVIDIA-SMI 495.44 Driver Version: 460.32.03 CUDA Version: 11.2 |\n", + "|-------------------------------+----------------------+----------------------+\n", + "| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n", + "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n", + "| | | MIG M. |\n", + "|===============================+======================+======================|\n", + "| 0 Tesla K80 Off | 00000000:00:04.0 Off | 0 |\n", + "| N/A 36C P8 28W / 149W | 0MiB / 11441MiB | 0% Default |\n", + "| | | N/A |\n", + "+-------------------------------+----------------------+----------------------+\n", + " \n", + "+-----------------------------------------------------------------------------+\n", + "| Processes: |\n", + "| GPU GI CI PID Type Process name GPU Memory |\n", + "| ID ID Usage |\n", + "|=============================================================================|\n", + "| No running processes found |\n", + "+-----------------------------------------------------------------------------+\n", + "Collecting bitsandbytes-cuda112\n", + " Downloading bitsandbytes_cuda112-0.26.0-py3-none-any.whl (4.2 MB)\n", + "\u001b[K |████████████████████████████████| 4.2 MB 5.4 MB/s \n", + "\u001b[?25hInstalling collected packages: bitsandbytes-cuda112\n", + "Successfully installed bitsandbytes-cuda112-0.26.0\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZyC8zpEqGddx" + }, + "source": [ + "To test the bitsandbytes installation we can run a simple update with 8-bit Adam." + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "A8HODqE2E_6N", + "outputId": "e1888813-c491-4d02-baa9-7e0e312d2a36" + }, + "source": [ + "import bitsandbytes as bnb\n", + "import torch\n", + "\n", + "p = torch.nn.Parameter(torch.rand(10,10).cuda())\n", + "a = torch.rand(10,10).cuda()\n", + "\n", + "p1 = p.data.sum().item()\n", + "\n", + "adam = bnb.optim.Adam8bit([p])\n", + "\n", + "out = a*p\n", + "loss = out.sum()\n", + "loss.backward()\n", + "adam.step()\n", + "\n", + "p2 = p.data.sum().item()\n", + "\n", + "assert p1 != p2\n", + "print('SUCCESS!')\n", + "print('Installation was successful!')" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "SUCCESS!\n", + "Installation was successful!\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GDkUr_zQBoZR" + }, + "source": [ + "# Dataset Streaming" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "i4UPzShfBrgr" + }, + "source": [ + "Pre-training often requires a huge text dataset. Some famous datasets for pre-training are [C4](https://huggingface.co/datasets/c4), its multilingual version [mC4](https://huggingface.co/datasets/mc4), as well as [OSCAR](https://huggingface.co/datasets/oscar).\n", + "\n", + "These datasets can be terabytes of data and require a lot of resources:\n", + "- a good bandwidth to download all the data\n", + "- terabytes of disk space to store the data\n", + "- dozens of CPUs and a good infrastructure to tokenize the text dataset\n", + "- lots of time to wait for the tokenization to happen\n", + "\n", + "This makes it very impractical to get your hands on a dataset for pretraining when you have limited resources." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6xabIliEFxyo" + }, + "source": [ + "Dataset streaming is the solution in this case. Streaming allows to simply have access to the very small subset of the dataset that you need at any time during training. Text samples are progressively downloaded during training, and processed on-the-fly.\n", + "\n", + "Thanks to dataset streaming:\n", + "- training can start directly without waiting for terabytes of data to be downloaded\n", + "- you can use an arbitrarily large dataset, without being constrained by your disk space\n", + "- you can process the batches of text as they arrive with a regular CPU\n", + "- you don't waste time processing examples that are not immediately needed for training" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9JIEFS8nISgn" + }, + "source": [ + "Dataset streaming is available in [Hugging Face Datasets](https://github.com/huggingface/datasets) - you simply need to pass `streaming=True` when loading a dataset, and it can be used with a PyTorch data loader.\n", + "\n", + "![dataset streaming](https://huggingface.co/docs/datasets/_images/stream.gif \"Dataset Streaming\")\n", + "\n", + "Hugging face Datasets also allows you to process examples on-the-fly via `.map()` and shuffle the dataset with `.shuffle()`.\n", + "\n", + "Here is an example on how to load the C4 dataset in streaming mode:" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 183, + "referenced_widgets": [ + "372609dca95b4ddcb51491283df860f5", + "e0b881dd26d54c7c92ba9ab5923fab10", + "90eb62f7ec634e098db511a5995d807f", + "d60a799761f649378d17a044362e55b9", + "a76cf6149c6748a5aa74ada58921d31e", + "5256dfd69e364597a29b2ad61c01ea93", + "1a231c5cffbb4225941d02cb5e3bb273", + "cffbeabee69446c48dfa89ac38d9f45e", + "0c02af4a252e40fba08c097f59926dc8", + "2917b5df9cc14ec3a2fe356c12c8511e", + "5c3abd7a7f354ac0b1cd3d89506f417a", + "8714bb6e944345b98b691f40adf0bf76", + "a2983efe78b94919891d6db909100934", + "b61fb9c3745f4c468954713b07f8f16f", + "5f05edadc8c943aa82a790e815253a48", + "867c7dd23eb64a1b8d02a7cf8a4ad64a", + "01445fea254a436fa464b6006f3abd92", + "74c4d6d598bf4173999a56fa849506d3", + "e299d23c03444805b0a359eb049cddb3", + "c2f4080d692a46debfb5a8000d2637d6", + "b0cbc589d149494cbb74139bedb0aafc", + "b3fec901d0cc48a2862b900928b681f3" + ] + }, + "id": "N4E5ZT6rJnm7", + "outputId": "f7c35c58-83c5-4cd4-8032-059c6da674a3" + }, + "source": [ + "from datasets import load_dataset\n", + "\n", + "c4 = load_dataset(\"c4\", \"en\", streaming=True)\n", + "\n", + "# Let's print a few examples\n", + "for i, example in enumerate(c4[\"train\"]):\n", + " print(f\"{i}: {str(example)[:200]}...\")\n", + " if i == 5:\n", + " break\n" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "372609dca95b4ddcb51491283df860f5", + "version_minor": 0, + "version_major": 2 + }, + "text/plain": [ + "Downloading: 0%| | 0.00/1.38k [00:00=1.16.1\", \"To fix: pip install -r examples/pytorch/language-modeling/requirements.txt\")\n", + "\n", + "MODEL_CONFIG_CLASSES = list(MODEL_MAPPING.keys())\n", + "MODEL_TYPES = tuple(conf.model_type for conf in MODEL_CONFIG_CLASSES)\n", + "\n", + "\n", + "def parse_args():\n", + " parser = argparse.ArgumentParser(description=\"Finetune a transformers model on a causal language modeling task\")\n", + " parser.add_argument(\n", + " \"--dataset_name\",\n", + " type=str,\n", + " default=None,\n", + " help=\"The name of the dataset to use (via the datasets library).\",\n", + " )\n", + " parser.add_argument(\n", + " \"--dataset_config_name\",\n", + " type=str,\n", + " default=None,\n", + " help=\"The configuration name of the dataset to use (via the datasets library).\",\n", + " )\n", + " parser.add_argument(\n", + " \"--text_column_name\",\n", + " type=str,\n", + " default=None,\n", + " help=\"The name of the column containing the text data.\",\n", + " )\n", + " parser.add_argument(\n", + " \"--dataset_streaming\",\n", + " action=\"store_true\",\n", + " help=\"If passed, will use dataset streaming (via the datasets library)\",\n", + " )\n", + " parser.add_argument(\n", + " \"--model_name_or_path\",\n", + " type=str,\n", + " help=\"Path to pretrained model or model identifier from huggingface.co/models.\",\n", + " required=False,\n", + " )\n", + " parser.add_argument(\n", + " \"--config_name\",\n", + " type=str,\n", + " default=None,\n", + " help=\"Pretrained config name or path if not the same as model_name\",\n", + " )\n", + " parser.add_argument(\n", + " \"--tokenizer_name\",\n", + " type=str,\n", + " default=None,\n", + " help=\"Pretrained tokenizer name or path if not the same as model_name\",\n", + " )\n", + " parser.add_argument(\n", + " \"--use_slow_tokenizer\",\n", + " action=\"store_true\",\n", + " help=\"If passed, will use a slow tokenizer (not backed by the 🤗 Tokenizers library).\",\n", + " )\n", + " parser.add_argument(\n", + " \"--per_device_train_batch_size\",\n", + " type=int,\n", + " default=1,\n", + " help=\"Batch size (per device) for the training dataloader.\",\n", + " )\n", + " parser.add_argument(\n", + " \"--learning_rate\",\n", + " type=float,\n", + " default=5e-5,\n", + " help=\"Initial learning rate (after the potential warmup period) to use.\",\n", + " )\n", + " parser.add_argument(\"--weight_decay\", type=float, default=0.0, help=\"Weight decay to use.\")\n", + " parser.add_argument(\"--num_train_epochs\", type=int, default=1, help=\"Total number of training epochs to perform.\")\n", + " parser.add_argument(\n", + " \"--max_train_steps\",\n", + " type=int,\n", + " default=None,\n", + " help=\"Total number of training steps to perform. If provided, overrides num_train_epochs.\",\n", + " )\n", + " parser.add_argument(\n", + " \"--gradient_accumulation_steps\",\n", + " type=int,\n", + " default=1,\n", + " help=\"Number of updates steps to accumulate before performing a backward/update pass.\",\n", + " )\n", + " parser.add_argument(\n", + " \"--lr_scheduler_type\",\n", + " type=SchedulerType,\n", + " default=\"linear\",\n", + " help=\"The scheduler type to use.\",\n", + " choices=[\"linear\", \"cosine\", \"cosine_with_restarts\", \"polynomial\", \"constant\", \"constant_with_warmup\"],\n", + " )\n", + " parser.add_argument(\n", + " \"--num_warmup_steps\", type=int, default=3000, help=\"Number of steps for the warmup in the lr scheduler.\"\n", + " )\n", + " parser.add_argument(\"--output_dir\", type=str, default=None, help=\"Where to store the final model.\")\n", + " parser.add_argument(\"--seed\", type=int, default=None, help=\"A seed for reproducible training.\")\n", + " parser.add_argument(\n", + " \"--model_type\",\n", + " type=str,\n", + " default=None,\n", + " help=\"Model type to use if training from scratch.\",\n", + " choices=MODEL_TYPES,\n", + " )\n", + " parser.add_argument(\n", + " \"--block_size\",\n", + " type=int,\n", + " default=None,\n", + " help=\"Optional input sequence length after tokenization. The training dataset will be truncated in block of this size for training. Default to the model max input length for single sentence inputs (take into account special tokens).\",\n", + " )\n", + " parser.add_argument(\n", + " \"--preprocessing_num_workers\",\n", + " type=int,\n", + " default=None,\n", + " help=\"The number of processes to use for the preprocessing.\",\n", + " )\n", + " parser.add_argument(\n", + " \"--overwrite_cache\", type=bool, default=False, help=\"Overwrite the cached training and evaluation sets\"\n", + " )\n", + " parser.add_argument(\n", + " \"--no_keep_linebreaks\", action=\"store_true\", help=\"Do not keep line breaks when using TXT files.\"\n", + " )\n", + " parser.add_argument(\"--push_to_hub\", action=\"store_true\", help=\"Whether or not to push the model to the Hub.\")\n", + " parser.add_argument(\n", + " \"--hub_model_id\", type=str, help=\"The name of the repository to keep in sync with the local `output_dir`.\"\n", + " )\n", + " parser.add_argument(\"--hub_token\", type=str, help=\"The token to use to push to the Model Hub.\")\n", + " args = parser.parse_args(args=[])\n", + "\n", + " if args.push_to_hub:\n", + " assert args.output_dir is not None, \"Need an `output_dir` to create a repo when `--push_to_hub` is passed.\"\n", + "\n", + " return args\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4QqgoTBcLzqq" + }, + "source": [ + "We setup the streaming dataset, the tokenizer, and the model (GPT-2 medium)." + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000, + "referenced_widgets": [ + "619629ebf5fc4e3ba9ad49e7e767a37d", + "87a5f7bca18e4a818d6d48e15bc68845", + "75d276bd972d429483af64d1eb27624c", + "6f778a9c54f042199869eb16563bb933", + "275f3cf6de1f49c1ae1f09991b4ea99c", + "670d440077b34a8e86bf2d620ab6fb6d", + "ac7455b1e5e5475cabf4dab505603a43", + "4e82c37304d446d88e852553cbe9acb9", + "213fc01fa39b4b77ba211a9304c5ea86", + "a9dae405fbce41e3a46b05292313bab0", + "272807a127d54f5ea3f20c0bc7262a25", + "1ce665de582c4bd392d6bc4fff9a1499", + "36cdf4ebc5684e9e88d80d5c98d86154", + "288d1670c87e469ba3fe7c0030887d19", + "eeb0bf36fb93410f82ab96b5414b6c50", + "3c8d0050e0904acf933d18316000ac8a", + "3be14001505b4dd19293365cb4a52cbc", + "4195ef4de4ba491995e28b72e4d82a5b", + "cafc6a76a9ac4de79b46969ebab90265", + "9e6e0d9075454e17932e147678c1cef6", + "7c83e50b67374fa5a2d713c036ba8e84", + "0e0d370b48c84349a7f7cc45ed5f9d09", + "2222296ad8d14cd39e8af17c108ad5ed", + "e39daeca11584766a42f9bc6df76089c", + "8fad1d6738b84704a3b4d248dbcfac2b", + "405c42d0df19419e8563b00a3f402ae9", + "7e2ceabdd41446cc860e35f94e02de31", + "5b2f0da9535f46d78999e11162e16666", + "d47fc8e1a9a84efcb6d9a02ba973e940", + "a127db6cdeae4700834c1dc582ecb609", + "b9a0b5fe13134774a7560a583513c7cd", + "66d58d9dab054294a81601572b143c00", + "75f479bbe938484891214f27986364eb", + "05c7977a9322499bbc00e80f0d767bee", + "9a76a0ee943343d781caf5b30ce0c6a5", + "f454951d36c74e13bc3cf04b41388345", + "1974ea03fa42428fa162eac65f8d71e4", + "96cdea5bf8ba44f3a37e5e1f70cbef62", + "221d95935ae249bb8fbf60007ed39e82", + "0a5ca2e268c24ae1a9e42d5c9dcadb29", + "997827c35e02494d82209fa5f1232e5f", + "aaf3281319a94b42ba21941f41d262bc", + "ffa4cf74c0c84e0797de60e50e1f579c", + "d60228cfbd9f44ddbcefb20a74d99779", + "8bff97fa5703438685342e1b140234f5", + "542ccbd2abf941fea5312b72c51ec5b6", + "f71eb8c60e4b4ef6b44b9f6b90bf25c5", + "1355930a0edf40e5ac75198e9d901bfb", + "ce047a309f984458859ddfc15aad1125", + "c82b029db5a547f09d1b9607a5d26ecc", + "01312ccfa1254c7a8acc6a7410af711a", + "7db4a94408b94d71ab9f6fccded0f011", + "356eaaf643894327a0ba30049c3a2b92", + "874af3844342411096d6d8872354f531", + "fd8cb9272f3d4ff9ae673c724a6b757b" + ] + }, + "id": "PSzJDsewLPEi", + "outputId": "f956fc65-1210-4db5-a2ea-fc482f357eb2" + }, + "source": [ + "args = parse_args() # get default arguments\n", + "\n", + "# If passed along, set the training seed now.\n", + "if args.seed is not None:\n", + " set_seed(args.seed)\n", + "\n", + "args.dataset_name = 'c4'\n", + "args.dataset_streaming = True\n", + "args.text_column_name = \"text\"\n", + "args.model_name_or_path = 'gpt2-large'\n", + "args.dataset_config_name = \"en\"\n", + "args.block_size = 1024\n", + "args.max_train_steps = 1_000_000\n", + "args.log_loss_interval = 25\n", + "\n", + "\n", + "# LOAD DATA\n", + "raw_train_dataset = load_dataset(args.dataset_name, args.dataset_config_name, streaming=args.dataset_streaming, split=\"train\")\n", + "\n", + "if args.config_name:\n", + " config = AutoConfig.from_pretrained(args.config_name)\n", + "elif args.model_name_or_path:\n", + " config = AutoConfig.from_pretrained(args.model_name_or_path)\n", + "else:\n", + " config = CONFIG_MAPPING[args.model_type]()\n", + " logger.warning(\"You are instantiating a new config instance from scratch.\")\n", + "\n", + "# TOKENIZER\n", + "if args.tokenizer_name:\n", + " tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_name, use_fast=not args.use_slow_tokenizer)\n", + "elif args.model_name_or_path:\n", + " tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path, use_fast=not args.use_slow_tokenizer)\n", + "else:\n", + " raise ValueError(\n", + " \"You are instantiating a new tokenizer from scratch. This is not supported by this script.\"\n", + " \"You can do it from another script, save it, and load it from here, using --tokenizer_name.\"\n", + " )\n", + "\n", + "if args.model_name_or_path:\n", + " model = AutoModelForCausalLM.from_pretrained(\n", + " args.model_name_or_path,\n", + " from_tf=bool(\".ckpt\" in args.model_name_or_path),\n", + " config=config,\n", + " )\n", + "else:\n", + " logger.info(\"Training new model from scratch\")\n", + " model = AutoModelForCausalLM.from_config(config)\n", + "\n", + "model.resize_token_embeddings(len(tokenizer))\n", + "\n", + "model.gradient_checkpointing_enable()\n", + "model.cuda() # send model to cuda preemptively to free RAM. Yep, we're using GPU as an offload memory. O tempora! O mores!" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "619629ebf5fc4e3ba9ad49e7e767a37d", + "version_minor": 0, + "version_major": 2 + }, + "text/plain": [ + "Downloading: 0%| | 0.00/666 [00:00= block_size:\n", + " total_length = (total_length // block_size) * block_size\n", + " # Split by chunks of max_len.\n", + " result = {\n", + " k: [t[i : i + block_size] for i in range(0, total_length, block_size)]\n", + " for k, t in concatenated_examples.items()\n", + " }\n", + " result[\"labels\"] = result[\"input_ids\"].copy()\n", + " return result\n", + "\n", + "tokenized_train_dataset = raw_train_dataset.shuffle(10_000, seed=42).map(tokenize_function, batched=True)\n", + "\n", + "if args.block_size is None:\n", + " block_size = tokenizer.model_max_length\n", + " if block_size > 1024:\n", + " logger.warning(\n", + " f\"The tokenizer picked seems to have a very large `model_max_length` ({tokenizer.model_max_length}). \"\n", + " \"Picking 1024 instead. You can change that default value by passing --block_size xxx.\"\n", + " )\n", + " block_size = 1024\n", + "else:\n", + " if args.block_size > tokenizer.model_max_length:\n", + " logger.warning(\n", + " f\"The block_size passed ({args.block_size}) is larger than the maximum length for the model\"\n", + " f\"({tokenizer.model_max_length}). Using block_size={tokenizer.model_max_length}.\"\n", + " )\n", + " block_size = min(args.block_size, tokenizer.model_max_length)\n", + "\n", + "# Note that with `batched=True`, this map processes 1,000 texts together, so group_texts throws away a remainder\n", + "# for each of those groups of 1,000 texts. You can adjust that batch_size here but a higher value might be slower\n", + "# to preprocess.\n", + "train_dataset = tokenized_train_dataset.map(group_texts, batched=True)\n", + "train_dataset = train_dataset.shuffle(10_000, seed=42).with_format(\"torch\")\n", + "\n", + "# DataLoaders creation:\n", + "train_dataloader = DataLoader(\n", + " train_dataset, collate_fn=default_data_collator, batch_size=args.per_device_train_batch_size\n", + ")" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "E0Wl7_Vx8HiP" + }, + "source": [ + "## 8-bit Optimizers" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "l3Z-4DmQ_etm" + }, + "source": [ + "\n", + "In this example, we fine-tune GPT-2 medium with a sequence dimension of 768 which runs out of memory. How can we fit this model on a colab GPU with 12 GB of memory? One solution is to use 8-bit optimizers.\n", + "\n", + "8-bit opitimizers decrease the memory footprint for training models by compressing and storing the optimizer statistics for optimizers. For Adam, there are two optimizer buffers, one for an estimate of the running mean of the gradient and one for the standard deviation. Each of the buffers has the size of the full model, as such, the Adam optimizers uses 2x more memory than the model itself. With 8-bit optimizers we reduce that from 32-bit to 8-bit thus reducing the memory due to Adam from 2x the model size to 0.5x the model size -- a reduction by 75%.\n", + "\n", + "8-bit optimizers work by using dynamic quantization and block-wise quantization to ensure stable training and the same performance as 32-bit optimizers while achieving the 75% reduction in memory.\n", + "\n", + "8-bit optimizers work as follows\n", + "1. Chunk optimizer states into blocks\n", + "2. Normalize each block into the range [-1, 1] by dividing by the absmax of the block\n", + "3. Perform dynamic quantization\n", + "4. Store 8-bit data\n", + "\n", + "For dequantization we reverse these steps. These steps are demonstrated by the example below:\n", + "\n", + " ![Schematic of 8-bit optimizers](https://timdettmers.com/wp-content/uploads/2021/12/schematic2.svg)\n", + "\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "tpoSBTsR_n7n" + }, + "source": [ + "import bitsandbytes as bnb\n", + "#optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) # this crashes with out-of-memory error\n", + "optimizer = bnb.optim.Adam8bit(model.parameters(), lr=args.learning_rate)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "q0MHy1x07_Ac" + }, + "source": [ + "## Train the model" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4pOO1F7b_-f9" + }, + "source": [ + "Now we are training the model with dataset streaming and 8-bit optimizers." + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 909, + "referenced_widgets": [ + "6e2b38e0faf64f529e849f37ad4d4eab", + "ba6e9cadc3274a64ae0738fb99c55860", + "62edc38a7caf4feca6db6de2d767fbb3", + "155616f4494440b291ae4722aaeca750", + "2045670c97194bd68a398213552d8364", + "1ca831c613424bea8f42c4a89f44b2b5", + "b352385cb0664a8b873f5028a728c4e3", + "085e99b8131f4f8bb65f2ed723a21b48", + "d1ec06e6abca4b35b8e22856bcc9c5ce", + "565f3cec67374748bdc8baf19f18963f", + "358cb048c5a04a7cb2e9de91017baf01" + ] + }, + "id": "ONzFs98p__5M", + "outputId": "ac4fb040-a3f3-41aa-cf8d-9699e621faad" + }, + "source": [ + "lr_scheduler = get_scheduler(\n", + " name=args.lr_scheduler_type,\n", + " optimizer=optimizer,\n", + " num_warmup_steps=args.num_warmup_steps,\n", + " num_training_steps=args.max_train_steps,\n", + ")\n", + "# Train!\n", + "total_batch_size = args.per_device_train_batch_size * args.gradient_accumulation_steps\n", + "# Only show the progress bar once on each machine.\n", + "progress_bar = tqdm(range(args.max_train_steps), disable=False)\n", + "completed_steps = 0\n", + "\n", + "def get_free_mem():\n", + " t = torch.cuda.get_device_properties(0).total_memory\n", + " r = torch.cuda.memory_reserved(0)\n", + " a = torch.cuda.memory_allocated(0)\n", + " f = r - a\n", + " return f/1024**3, r/1024**3, a/1024**3\n", + "\n", + "for epoch in range(args.num_train_epochs):\n", + " model.train()\n", + " losses = []\n", + " for step, batch in enumerate(train_dataloader):\n", + " gpu_data = {}\n", + " for key, value in batch.items():\n", + " gpu_data[key] = value.cuda()\n", + "\n", + " outputs = model(**gpu_data, use_cache=False)\n", + " loss = outputs.loss\n", + " losses.append(loss.item())\n", + " loss = loss / args.gradient_accumulation_steps\n", + " loss.backward()\n", + " if step % args.gradient_accumulation_steps == 0 or step == args.max_train_steps:\n", + " optimizer.step()\n", + " lr_scheduler.step()\n", + " optimizer.zero_grad()\n", + " progress_bar.update(1)\n", + " completed_steps += 1\n", + "\n", + " if step % args.log_loss_interval == 0 and step > 0:\n", + " try:\n", + " perplexity = math.exp(sum(losses)/len(losses))\n", + " except OverflowError:\n", + " perplexity = float(\"inf\")\n", + " losses = []\n", + " print(f\"epoch: {epoch+1}, step: {step}, perplexity: {perplexity}\")\n", + " \n", + "\n", + " if completed_steps >= args.max_train_steps:\n", + " break\n", + "\n" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "6e2b38e0faf64f529e849f37ad4d4eab", + "version_minor": 0, + "version_major": 2 + }, + "text/plain": [ + " 0%| | 0/1000000 [00:00 1024). Running this sequence through the model will result in indexing errors\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "epoch: 1, step: 25, perplexity: 21.84923336079448\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + ] + } + ] + } + ] +}