{ "nbformat": 4, "nbformat_minor": 5, "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.3" }, "colab": { "name": "evaluation.ipynb", "provenance": [], "collapsed_sections": [] }, "accelerator": "GPU", "widgets": { "application/vnd.jupyter.widget-state+json": { "b3ac84dcf48f4ba8a65aecd1df5a1b68": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_490a5358971a45fa92989776dc6757c3", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_f5be7a9c6f1941659c806255a9315b7c", "IPY_MODEL_9eb5033009ee441eb164d862b4b2c39c" ] } }, "490a5358971a45fa92989776dc6757c3": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "f5be7a9c6f1941659c806255a9315b7c": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_ed07265cff0c4c82b95e0d0c47359edb", "_dom_classes": [], "description": "Downloading: 100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 618, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 618, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_46a87c4067114a31ae6e34c3a9464f76" } }, "9eb5033009ee441eb164d862b4b2c39c": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_04a42f0ccb10413494e891907fc547d9", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 618/618 [00:01<00:00, 464B/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_2188756da7e74badb48a459ea15c02d5" } }, "ed07265cff0c4c82b95e0d0c47359edb": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "46a87c4067114a31ae6e34c3a9464f76": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "04a42f0ccb10413494e891907fc547d9": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "2188756da7e74badb48a459ea15c02d5": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "a1db2167447249ed95dfb78a97c24bf9": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_68ce97d8904e4b398af538a7ad1ed1ea", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_245bfeb4fb5542f9900cebd0e3cccc74", "IPY_MODEL_501c0ab625ed4f0bb9954de0b97e90f1" ] } }, "68ce97d8904e4b398af538a7ad1ed1ea": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "245bfeb4fb5542f9900cebd0e3cccc74": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_f8140d5334e14d7988c17bbbae05b08e", "_dom_classes": [], "description": "Downloading: 100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 1388356, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 1388356, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_0b6cce6d1a0e4de48ae7eb967dfeda87" } }, "501c0ab625ed4f0bb9954de0b97e90f1": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_bac6501d76ce4b5ba56ab1effcedccbb", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 1.39M/1.39M [00:00<00:00, 5.74MB/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_cf17e5af2e7e407e9d96e0325ad733de" } }, "f8140d5334e14d7988c17bbbae05b08e": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "0b6cce6d1a0e4de48ae7eb967dfeda87": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "bac6501d76ce4b5ba56ab1effcedccbb": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "cf17e5af2e7e407e9d96e0325ad733de": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "d603ce6680d3425e8c145e77bc0e0e30": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_bbd848e83f30482dab926e53a7188f37", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_29c28419ac2848ca8ce6f73eea0e3425", "IPY_MODEL_ac01f9efc2ec4eb2af6dfd956467ab8e" ] } }, "bbd848e83f30482dab926e53a7188f37": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "29c28419ac2848ca8ce6f73eea0e3425": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_82c474271a584a46b9af812bd9947ff7", "_dom_classes": [], "description": "Running tokenizer on dataset #0: 100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 2, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 2, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_5dea54f5878c4ea1b91aea2d6c01dcc9" } }, "ac01f9efc2ec4eb2af6dfd956467ab8e": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_1f5a386436f142999c9cd61a5567167f", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 2/2 [00:01<00:00, 1.82ba/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_4d13f40b545245d88a9ce9cfa738a59c" } }, "82c474271a584a46b9af812bd9947ff7": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "5dea54f5878c4ea1b91aea2d6c01dcc9": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "1f5a386436f142999c9cd61a5567167f": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "4d13f40b545245d88a9ce9cfa738a59c": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "bacd59325efe4e7a8289e2e77eca3f97": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_b6493940baf04dd6a461abd3d123d20d", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_1b5c2a40effd4df8a0e29f7589e4ce69", "IPY_MODEL_516c9f479605404886582c4af2d4860d" ] } }, "b6493940baf04dd6a461abd3d123d20d": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "1b5c2a40effd4df8a0e29f7589e4ce69": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_7d5cce86ee7c4016a6cff03887038660", "_dom_classes": [], "description": "Running tokenizer on dataset #1: 100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 2, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 2, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_36aa47fe9e22473ea71a0b6a4d740b35" } }, "516c9f479605404886582c4af2d4860d": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_b98d5bb115ce4458919bc628a7c453cc", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 2/2 [00:01<00:00, 1.96ba/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_6437a7817c244f54b4ca06d78d6aeff7" } }, "7d5cce86ee7c4016a6cff03887038660": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "36aa47fe9e22473ea71a0b6a4d740b35": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "b98d5bb115ce4458919bc628a7c453cc": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "6437a7817c244f54b4ca06d78d6aeff7": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "3ac65aaeae574af5b7eea30b4a873ec2": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_8d99822c6a514136aa164846d039bbfc", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_105f722c571f47b2b7a7184c9fe45c18", "IPY_MODEL_4a6ab6cdeb0943ba8cf8486caadf2f8d" ] } }, "8d99822c6a514136aa164846d039bbfc": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "105f722c571f47b2b7a7184c9fe45c18": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_4caf7f4e912d454b8ecf3f3971ede95e", "_dom_classes": [], "description": "Running tokenizer on dataset #2: 100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 2, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 2, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_36873c0c51904573809bb3eadd172383" } }, "4a6ab6cdeb0943ba8cf8486caadf2f8d": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_15d31d2a49804328b4c80fa98dae8ff1", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 2/2 [00:00<00:00, 2.03ba/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_7fa7cbad599741edbf0c099bf2668494" } }, "4caf7f4e912d454b8ecf3f3971ede95e": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "36873c0c51904573809bb3eadd172383": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "15d31d2a49804328b4c80fa98dae8ff1": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "7fa7cbad599741edbf0c099bf2668494": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "f19e664d094f46f0a94d7410685d67eb": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_74626cdc554848fab75607aed0324aa3", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_f2bcfb2929594116b9367a18a1778aa2", "IPY_MODEL_709d06d2c8614f9b97bf86ce8ed1118f" ] } }, "74626cdc554848fab75607aed0324aa3": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "f2bcfb2929594116b9367a18a1778aa2": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_65ca9ae8108d46a1999573c3267f16bc", "_dom_classes": [], "description": "Running tokenizer on dataset #3: 100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 2, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 2, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_520e4029bca14128a6eb4e2bbd4c78ed" } }, "709d06d2c8614f9b97bf86ce8ed1118f": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_b8e8db36327e4706b6ab435a698cb3fd", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 2/2 [00:00<00:00, 2.03ba/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_7d948bdb11c0427b8c49c48e7c5d9772" } }, "65ca9ae8108d46a1999573c3267f16bc": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "520e4029bca14128a6eb4e2bbd4c78ed": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "b8e8db36327e4706b6ab435a698cb3fd": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "7d948bdb11c0427b8c49c48e7c5d9772": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "1de82500b0d34e5c9f6c5f995f27ea03": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_c7e9741c804e421898f9c45cde1ce7cd", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_5178228787094bfba3b671af67e3df0f", "IPY_MODEL_3aa19fa9dda74118844e11a876039a0b" ] } }, "c7e9741c804e421898f9c45cde1ce7cd": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "5178228787094bfba3b671af67e3df0f": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_2afa7d80cc004014b4a672bc0b683fce", "_dom_classes": [], "description": "Downloading: ", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 2482, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 2482, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_b2757df8c2e14c7d90befd479221e5c5" } }, "3aa19fa9dda74118844e11a876039a0b": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_9ada8d8e30114064be38f3b5d4645f36", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 6.34k/? [00:27<00:00, 228B/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_a391191463d0455c8fd4f83b8ae69c8f" } }, "2afa7d80cc004014b4a672bc0b683fce": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "b2757df8c2e14c7d90befd479221e5c5": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "9ada8d8e30114064be38f3b5d4645f36": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "a391191463d0455c8fd4f83b8ae69c8f": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "47aef8e786ea416e8fa99869a46d008f": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_eb587b328a7e4a88bffad029b4943a1e", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_bf71a524605b4a63b33468e43b212b62", "IPY_MODEL_e9a69e05e82e48feb3f203e8ac7b7afa" ] } }, "eb587b328a7e4a88bffad029b4943a1e": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "bf71a524605b4a63b33468e43b212b62": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_f3fa0e5bd0b34ffe92627a1615447d79", "_dom_classes": [], "description": "Downloading: 100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 498796983, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 498796983, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_03b37083ef7642f2942dd97b6e090c33" } }, "e9a69e05e82e48feb3f203e8ac7b7afa": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_18d6f6e2b90747258a2082796f2eeda9", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 499M/499M [05:39<00:00, 1.47MB/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_e33495db5c3343a999c9c6d807abc238" } }, "f3fa0e5bd0b34ffe92627a1615447d79": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "03b37083ef7642f2942dd97b6e090c33": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "18d6f6e2b90747258a2082796f2eeda9": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "e33495db5c3343a999c9c6d807abc238": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "ceb189e178b24e03b398b7ba37e63a02": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_7b85ed3a5f034cbcbb3d87d9e5ac807d", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_81f7f3a6c8dc4c9e8320fbcc8b4aa270", "IPY_MODEL_f1187ab3d4a1491dacbf7f109810bf3c" ] } }, "7b85ed3a5f034cbcbb3d87d9e5ac807d": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "81f7f3a6c8dc4c9e8320fbcc8b4aa270": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_43d1cd650b26471cbf07beb1733b943a", "_dom_classes": [], "description": "Running tokenizer on dataset #0: 100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 2, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 2, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_71410f8bf2d348ea9f3f173ea70c7829" } }, "f1187ab3d4a1491dacbf7f109810bf3c": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_f8ae53cbfe38498095d7f7e2ee6b1b89", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 2/2 [00:01<00:00, 1.99ba/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_dd961e0fdc604f0883ac6a674617b7ac" } }, "43d1cd650b26471cbf07beb1733b943a": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "71410f8bf2d348ea9f3f173ea70c7829": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "f8ae53cbfe38498095d7f7e2ee6b1b89": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "dd961e0fdc604f0883ac6a674617b7ac": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "e9a04e6cb6094f00b75a248aebc11dcf": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_3a118dd7adf3459db52afc86e39e4681", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_59dae515b4804e3f8a64372ec0cd5254", "IPY_MODEL_a835eb4e43174c178a43fc20cf878f67" ] } }, "3a118dd7adf3459db52afc86e39e4681": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "59dae515b4804e3f8a64372ec0cd5254": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_bbd988fc15734daf834d7c1be0898f54", "_dom_classes": [], "description": "Running tokenizer on dataset #1: 100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 2, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 2, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_59a87b9b20fd43e2ad4df96508e2a4e8" } }, "a835eb4e43174c178a43fc20cf878f67": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_0c06e7199e2440ff8df24c7f4d3816bf", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 2/2 [00:00<00:00, 2.01ba/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_1721231f136a41a89226a6993e770708" } }, "bbd988fc15734daf834d7c1be0898f54": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "59a87b9b20fd43e2ad4df96508e2a4e8": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "0c06e7199e2440ff8df24c7f4d3816bf": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "1721231f136a41a89226a6993e770708": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "ea1a49df91de49b39d7c28620d396ed3": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_f43ced84b4ae4d0290406ff29ac319b6", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_249eab1d100f4e66891404bd0293cfac", "IPY_MODEL_de5f4a79619b4361adb69ebf75f480d3" ] } }, "f43ced84b4ae4d0290406ff29ac319b6": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "249eab1d100f4e66891404bd0293cfac": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_38baac16961b491d99a195ec08b7146c", "_dom_classes": [], "description": "Running tokenizer on dataset #2: 100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 2, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 2, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_31550c6938b245bdaff271f77e36a265" } }, "de5f4a79619b4361adb69ebf75f480d3": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_86d213b1b67649e28127ca8919d1b508", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 2/2 [00:00<00:00, 2.09ba/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_3833a9687f8440c8bc7159be7c5da6aa" } }, "38baac16961b491d99a195ec08b7146c": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "31550c6938b245bdaff271f77e36a265": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "86d213b1b67649e28127ca8919d1b508": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "3833a9687f8440c8bc7159be7c5da6aa": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "0eb974fa255b4b378c19580c9b443242": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_72b317f6253640efa451b41609f418cc", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_9dbc6bdd1fcd4258b27b0b36f537a52b", "IPY_MODEL_48eab5fd5f5e4f4f9b1517a3f2e53052" ] } }, "72b317f6253640efa451b41609f418cc": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "9dbc6bdd1fcd4258b27b0b36f537a52b": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_562f962ecfe1438ba3bd67406c3bc415", "_dom_classes": [], "description": "Running tokenizer on dataset #3: 100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 2, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 2, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_e5c84b81d24a4120ad364cf7a8d92b13" } }, "48eab5fd5f5e4f4f9b1517a3f2e53052": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_12ba4202432a4d9da9b4295bee72ffac", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 2/2 [00:00<00:00, 2.55ba/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_4b846d71afd943278c1ac0cecfa13f65" } }, "562f962ecfe1438ba3bd67406c3bc415": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "e5c84b81d24a4120ad364cf7a8d92b13": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "12ba4202432a4d9da9b4295bee72ffac": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "4b846d71afd943278c1ac0cecfa13f65": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "c23a9b479ca54b5082ced5e0359f54d9": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_ce19ae483e1c4a9b90b3fecfea1330e4", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_e953e23fd1364232b6cd39ec11ccaecb", "IPY_MODEL_75b304f6ca5945b295a64fafcf283c4e" ] } }, "ce19ae483e1c4a9b90b3fecfea1330e4": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "e953e23fd1364232b6cd39ec11ccaecb": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_cb932f442d0c4c8b959e455cc0ce1329", "_dom_classes": [], "description": "Running tokenizer on dataset #0: 100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 1, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 1, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_5fa39708c9684ce3b93577f1a0197bf4" } }, "75b304f6ca5945b295a64fafcf283c4e": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_4e7a38f355d84811a66936be929d270d", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 1/1 [00:00<00:00, 4.83ba/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_50499dbed5cb470a9b5dd7f4e5144405" } }, "cb932f442d0c4c8b959e455cc0ce1329": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "5fa39708c9684ce3b93577f1a0197bf4": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "4e7a38f355d84811a66936be929d270d": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "50499dbed5cb470a9b5dd7f4e5144405": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "1d3024c3e0374116bb856902590f6bfd": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_1a406ffa02dc479aa0fcdfa41afcbb0b", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_9f6792ca58ed4906bb671ae049e2b860", "IPY_MODEL_54ef5c70c3f146e1a1fbd82f8aeedbb9" ] } }, "1a406ffa02dc479aa0fcdfa41afcbb0b": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "9f6792ca58ed4906bb671ae049e2b860": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_ed8c1a4ff520407eb35402fb4e5fcc71", "_dom_classes": [], "description": "Running tokenizer on dataset #1: 100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 1, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 1, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_68819af7768e486da1752ff27d2e4104" } }, "54ef5c70c3f146e1a1fbd82f8aeedbb9": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_4039c305be024488abecf4a635345c46", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 1/1 [00:00<00:00, 2.59ba/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_80622e53e4124d0cbd506542f81f4818" } }, "ed8c1a4ff520407eb35402fb4e5fcc71": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "68819af7768e486da1752ff27d2e4104": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "4039c305be024488abecf4a635345c46": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "80622e53e4124d0cbd506542f81f4818": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "2cad6e60bb414f46b79720dfc4f87f9e": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_fc6b7d2794bb44cdabafdfba208b8dc3", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_f8fd5a767ff3404ebbadc6cc63110445", "IPY_MODEL_8b638f038ef04ea7b2ad3bdcb90f0141" ] } }, "fc6b7d2794bb44cdabafdfba208b8dc3": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "f8fd5a767ff3404ebbadc6cc63110445": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_305833c39a074d0296ef4511b6f61e7e", "_dom_classes": [], "description": "Running tokenizer on dataset #2: 100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 1, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 1, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_f31cad3887d640d8b4322598eca1ca70" } }, "8b638f038ef04ea7b2ad3bdcb90f0141": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_02ecdd0d12614752a19b294395cb821c", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 1/1 [00:00<00:00, 3.82ba/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_4c62a1fd18a54327a1f50bf3ca087027" } }, "305833c39a074d0296ef4511b6f61e7e": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "f31cad3887d640d8b4322598eca1ca70": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "02ecdd0d12614752a19b294395cb821c": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "4c62a1fd18a54327a1f50bf3ca087027": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "9daad6aa599343278ca5198897b390b3": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_b803d1d51df9466db862f96fb8e49257", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_266b34a7ee284ac590ffa999f1351330", "IPY_MODEL_93a03570b7bf4f3f8494f7b5677a7c70" ] } }, "b803d1d51df9466db862f96fb8e49257": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "266b34a7ee284ac590ffa999f1351330": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_543ff4b83d804f52afbcb7a57ca22570", "_dom_classes": [], "description": "Running tokenizer on dataset #3: 100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 1, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 1, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_468d17728336411db088c69330f4de68" } }, "93a03570b7bf4f3f8494f7b5677a7c70": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_fe17854a2dbe4949aeb4efb309cab6fc", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 1/1 [00:00<00:00, 3.93ba/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_a1feb0e41e2f47e28f550ed81b61e952" } }, "543ff4b83d804f52afbcb7a57ca22570": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "468d17728336411db088c69330f4de68": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "fe17854a2dbe4949aeb4efb309cab6fc": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "a1feb0e41e2f47e28f550ed81b61e952": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "ca9eb397fcd640b48838e58579b475bc": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_4234b3e6815040af90d298f6ab2a808e", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_30739849e0db45daa664d8992ab6f458", "IPY_MODEL_24dc9ed72f8048fe8b174f2f1ef6e2ca" ] } }, "4234b3e6815040af90d298f6ab2a808e": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "30739849e0db45daa664d8992ab6f458": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_641876e099e84c44854262d5e709409e", "_dom_classes": [], "description": "Running tokenizer on dataset #0: 100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 1, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 1, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_f8ab57bd0aba4f608a8f7549baa4ead4" } }, "24dc9ed72f8048fe8b174f2f1ef6e2ca": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_75b2a86adb434e8cb3358a3461be99e6", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 1/1 [00:00<00:00, 2.70ba/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_e005269e95704fae8ff9ebe8f932d023" } }, "641876e099e84c44854262d5e709409e": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "f8ab57bd0aba4f608a8f7549baa4ead4": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "75b2a86adb434e8cb3358a3461be99e6": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "e005269e95704fae8ff9ebe8f932d023": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "0217f8a6209948cb9334eeaf5c489b1b": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_8589613c58db44bc959fc43c003dc689", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_168ee56a9f65422ca6c503ea29e8a825", "IPY_MODEL_b5f3ea592806493abe87f29fbe8059fb" ] } }, "8589613c58db44bc959fc43c003dc689": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "168ee56a9f65422ca6c503ea29e8a825": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_a53357f0785046afbbfcc408a5aa0f19", "_dom_classes": [], "description": "Running tokenizer on dataset #1: 100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 1, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 1, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_3c4b3cd5af584ad584da81f19a26eec0" } }, "b5f3ea592806493abe87f29fbe8059fb": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_9097e5ba3c00454c823871d38393907b", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 1/1 [00:00<00:00, 3.40ba/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_5d0f563e826c4ef79ff1ed6cc7ab1c01" } }, "a53357f0785046afbbfcc408a5aa0f19": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "3c4b3cd5af584ad584da81f19a26eec0": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "9097e5ba3c00454c823871d38393907b": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "5d0f563e826c4ef79ff1ed6cc7ab1c01": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "9de41b790b2c4df88309b8e6e66972cc": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_26274dbb3a324f3ca7a2a91ef24adb83", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_783f5d6b893d4f2e968c249605773c9e", "IPY_MODEL_1a597fd54ab04a4cb80147e93abc72b8" ] } }, "26274dbb3a324f3ca7a2a91ef24adb83": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "783f5d6b893d4f2e968c249605773c9e": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_f317aee64c0f43a08c8157413d7956e1", "_dom_classes": [], "description": "Running tokenizer on dataset #2: 100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 1, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 1, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_1f37c1324fba4488a0d19cc1d623f887" } }, "1a597fd54ab04a4cb80147e93abc72b8": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_584ec18135294eb9a916c85385eb4bea", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 1/1 [00:00<00:00, 4.69ba/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_8e67b29112a04c48a978cf648904ee61" } }, "f317aee64c0f43a08c8157413d7956e1": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "1f37c1324fba4488a0d19cc1d623f887": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "584ec18135294eb9a916c85385eb4bea": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "8e67b29112a04c48a978cf648904ee61": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "ece5245bfb5c4b02aa1e2527974f1cc6": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_ab4e0611c3d7428b9ffa703f794852a7", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_0df9c4b4ed0240f2871906f72e9e9839", "IPY_MODEL_5731d37f432b4713b9b8be897b1f6686" ] } }, "ab4e0611c3d7428b9ffa703f794852a7": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "0df9c4b4ed0240f2871906f72e9e9839": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_fa9d6f6c384241f192660b2668956ad4", "_dom_classes": [], "description": "Running tokenizer on dataset #3: 100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 1, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 1, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_c04cd9397ecf4de69d5e7fc39311629c" } }, "5731d37f432b4713b9b8be897b1f6686": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_669706d500bc404ca65e16f939db02c5", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 1/1 [00:00<00:00, 5.11ba/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_fc71353cc3b5434ba3c32d5c64069757" } }, "fa9d6f6c384241f192660b2668956ad4": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "c04cd9397ecf4de69d5e7fc39311629c": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "669706d500bc404ca65e16f939db02c5": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "fc71353cc3b5434ba3c32d5c64069757": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } } } } }, "cells": [ { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "QnKI9cdUFw23", "outputId": "82086023-d380-4e1a-f11a-b0047bb10190" }, "source": [ "!pip install danlp transformers datasets numpy flax seqeval" ], "id": "QnKI9cdUFw23", "execution_count": 1, "outputs": [ { "output_type": "stream", "text": [ "Collecting danlp\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/62/6b/3a245c069f0a5376e565d67c2f9fb04a39e4d7e94c93c2d27e57c7bf9012/danlp-0.0.12-py3-none-any.whl (71kB)\n", "\r\u001b[K |████▋ | 10kB 16.0MB/s eta 0:00:01\r\u001b[K |█████████▏ | 20kB 22.1MB/s eta 0:00:01\r\u001b[K |█████████████▉ | 30kB 25.0MB/s eta 0:00:01\r\u001b[K |██████████████████▍ | 40kB 27.0MB/s eta 0:00:01\r\u001b[K |███████████████████████ | 51kB 29.0MB/s eta 0:00:01\r\u001b[K |███████████████████████████▋ | 61kB 29.9MB/s eta 0:00:01\r\u001b[K |████████████████████████████████| 71kB 9.4MB/s \n", "\u001b[?25hCollecting transformers\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/fd/1a/41c644c963249fd7f3836d926afa1e3f1cc234a1c40d80c5f03ad8f6f1b2/transformers-4.8.2-py3-none-any.whl (2.5MB)\n", "\u001b[K |████████████████████████████████| 2.5MB 33.3MB/s \n", "\u001b[?25hCollecting datasets\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/86/27/9c91ddee87b06d2de12f134c5171a49890427e398389f07f6463485723c3/datasets-1.9.0-py3-none-any.whl (262kB)\n", "\u001b[K |████████████████████████████████| 266kB 42.9MB/s \n", "\u001b[?25hRequirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (1.19.5)\n", "Collecting flax\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/f6/21/21ca1f4831ac24646578d2545c4db9a8369b9da4a4b7dcf067feee312b45/flax-0.3.4-py3-none-any.whl (183kB)\n", "\u001b[K |████████████████████████████████| 184kB 49.2MB/s \n", "\u001b[?25hCollecting seqeval\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/9d/2d/233c79d5b4e5ab1dbf111242299153f3caddddbb691219f363ad55ce783d/seqeval-1.2.2.tar.gz (43kB)\n", "\u001b[K |████████████████████████████████| 51kB 4.7MB/s \n", "\u001b[?25hRequirement already satisfied: tweepy in /usr/local/lib/python3.7/dist-packages (from danlp) (3.10.0)\n", "Collecting conllu\n", " Downloading https://files.pythonhosted.org/packages/ae/be/be6959c3ff2dbfdd87de4be0ccdff577835b5d08b1d25bf7fd4aaf0d7add/conllu-4.4-py2.py3-none-any.whl\n", "Requirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (from danlp) (1.1.5)\n", "Collecting pyconll\n", " Downloading https://files.pythonhosted.org/packages/0a/4c/edf12b4b211f8a0f7f85a52ed4b50cd453ac96e9b751427e0296eb7ae42a/pyconll-3.1.0-py3-none-any.whl\n", "Requirement already satisfied: tqdm in /usr/local/lib/python3.7/dist-packages (from danlp) (4.41.1)\n", "Collecting sacremoses\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/75/ee/67241dc87f266093c533a2d4d3d69438e57d7a90abb216fa076e7d475d4a/sacremoses-0.0.45-py3-none-any.whl (895kB)\n", "\u001b[K |████████████████████████████████| 901kB 46.1MB/s \n", "\u001b[?25hCollecting huggingface-hub==0.0.12\n", " Downloading https://files.pythonhosted.org/packages/2f/ee/97e253668fda9b17e968b3f97b2f8e53aa0127e8807d24a547687423fe0b/huggingface_hub-0.0.12-py3-none-any.whl\n", "Requirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from transformers) (20.9)\n", "Requirement already satisfied: pyyaml in /usr/local/lib/python3.7/dist-packages (from transformers) (3.13)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers) (3.0.12)\n", "Requirement already satisfied: importlib-metadata; python_version < \"3.8\" in /usr/local/lib/python3.7/dist-packages (from transformers) (4.6.0)\n", "Collecting tokenizers<0.11,>=0.10.1\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/d4/e2/df3543e8ffdab68f5acc73f613de9c2b155ac47f162e725dcac87c521c11/tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3MB)\n", "\u001b[K |████████████████████████████████| 3.3MB 36.7MB/s \n", "\u001b[?25hRequirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (2019.12.20)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from transformers) (2.23.0)\n", "Requirement already satisfied: dill in /usr/local/lib/python3.7/dist-packages (from datasets) (0.3.4)\n", "Collecting fsspec>=2021.05.0\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/0e/3a/666e63625a19883ae8e1674099e631f9737bd5478c4790e5ad49c5ac5261/fsspec-2021.6.1-py3-none-any.whl (115kB)\n", "\u001b[K |████████████████████████████████| 122kB 52.7MB/s \n", "\u001b[?25hRequirement already satisfied: pyarrow!=4.0.0,>=1.0.0 in /usr/local/lib/python3.7/dist-packages (from datasets) (3.0.0)\n", "Requirement already satisfied: multiprocess in /usr/local/lib/python3.7/dist-packages (from datasets) (0.70.12.2)\n", "Collecting xxhash\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/7d/4f/0a862cad26aa2ed7a7cd87178cbbfa824fc1383e472d63596a0d018374e7/xxhash-2.0.2-cp37-cp37m-manylinux2010_x86_64.whl (243kB)\n", "\u001b[K |████████████████████████████████| 245kB 49.9MB/s \n", "\u001b[?25hRequirement already satisfied: msgpack in /usr/local/lib/python3.7/dist-packages (from flax) (1.0.2)\n", "Requirement already satisfied: jax>=0.2.13 in /usr/local/lib/python3.7/dist-packages (from flax) (0.2.13)\n", "Requirement already satisfied: matplotlib in /usr/local/lib/python3.7/dist-packages (from flax) (3.2.2)\n", "Collecting optax\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/07/48/4f65dbb5ec096917ec039ba2c7eccf97ee05a4157e0e965a45ed3b7a13f9/optax-0.0.9-py3-none-any.whl (118kB)\n", "\u001b[K |████████████████████████████████| 122kB 53.9MB/s \n", "\u001b[?25hRequirement already satisfied: scikit-learn>=0.21.3 in /usr/local/lib/python3.7/dist-packages (from seqeval) (0.22.2.post1)\n", "Requirement already satisfied: six>=1.10.0 in /usr/local/lib/python3.7/dist-packages (from tweepy->danlp) (1.15.0)\n", "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.7/dist-packages (from tweepy->danlp) (1.3.0)\n", "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas->danlp) (2.8.1)\n", "Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/dist-packages (from pandas->danlp) (2018.9)\n", "Requirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (7.1.2)\n", "Requirement already satisfied: joblib in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.0.1)\n", "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from huggingface-hub==0.0.12->transformers) (3.7.4.3)\n", "Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging->transformers) (2.4.7)\n", "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata; python_version < \"3.8\"->transformers) (3.4.1)\n", "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2.10)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2021.5.30)\n", "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (3.0.4)\n", "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (1.24.3)\n", "Requirement already satisfied: absl-py in /usr/local/lib/python3.7/dist-packages (from jax>=0.2.13->flax) (0.12.0)\n", "Requirement already satisfied: opt-einsum in /usr/local/lib/python3.7/dist-packages (from jax>=0.2.13->flax) (3.3.0)\n", "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->flax) (1.3.1)\n", "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.7/dist-packages (from matplotlib->flax) (0.10.0)\n", "Requirement already satisfied: jaxlib>=0.1.37 in /usr/local/lib/python3.7/dist-packages (from optax->flax) (0.1.66+cuda110)\n", "Collecting chex>=0.0.4\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/0f/95/ccd2da57155c019efb3a60e3e5ecb9da431e19ebb16cce1e6981d615d75e/chex-0.0.8-py3-none-any.whl (57kB)\n", "\u001b[K |████████████████████████████████| 61kB 9.6MB/s \n", "\u001b[?25hRequirement already satisfied: scipy>=0.17.0 in /usr/local/lib/python3.7/dist-packages (from scikit-learn>=0.21.3->seqeval) (1.4.1)\n", "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.7/dist-packages (from requests-oauthlib>=0.7.0->tweepy->danlp) (3.1.1)\n", "Requirement already satisfied: flatbuffers in /usr/local/lib/python3.7/dist-packages (from jaxlib>=0.1.37->optax->flax) (1.12)\n", "Requirement already satisfied: toolz>=0.9.0 in /usr/local/lib/python3.7/dist-packages (from chex>=0.0.4->optax->flax) (0.11.1)\n", "Requirement already satisfied: dm-tree>=0.1.5 in /usr/local/lib/python3.7/dist-packages (from chex>=0.0.4->optax->flax) (0.1.6)\n", "Building wheels for collected packages: seqeval\n", " Building wheel for seqeval (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for seqeval: filename=seqeval-1.2.2-cp37-none-any.whl size=16184 sha256=81ccf3b27c5f3a0c3ce7d2c6f1f8de09ed86269e87a025f4fb1e192824c0964a\n", " Stored in directory: /root/.cache/pip/wheels/52/df/1b/45d75646c37428f7e626214704a0e35bd3cfc32eda37e59e5f\n", "Successfully built seqeval\n", "Installing collected packages: conllu, pyconll, danlp, sacremoses, huggingface-hub, tokenizers, transformers, fsspec, xxhash, datasets, chex, optax, flax, seqeval\n", "Successfully installed chex-0.0.8 conllu-4.4 danlp-0.0.12 datasets-1.9.0 flax-0.3.4 fsspec-2021.6.1 huggingface-hub-0.0.12 optax-0.0.9 pyconll-3.1.0 sacremoses-0.0.45 seqeval-1.2.2 tokenizers-0.10.3 transformers-4.8.2 xxhash-2.0.2\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "e72bf8e7-5819-4e14-a0e1-384234089c84" }, "source": [ "from danlp.datasets import DDT\n", "from transformers import (AutoConfig, AutoTokenizer, AutoModelForTokenClassification, \n", " DataCollatorForTokenClassification, TrainingArguments, Trainer)\n", "from datasets import Dataset, load_metric\n", "from functools import partial\n", "import numpy as np" ], "id": "e72bf8e7-5819-4e14-a0e1-384234089c84", "execution_count": 2, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "7bc1ab27-c2bb-42fc-94d7-a94e4d1dc4e4" }, "source": [ "# Evaluation of Language Models for Danish" ], "id": "7bc1ab27-c2bb-42fc-94d7-a94e4d1dc4e4" }, { "cell_type": "markdown", "metadata": { "id": "79792719-51b5-4c4f-a4ab-7124719b9853" }, "source": [ "This notebook is an investigation into how much, if anything, is gained from including more languages into the training set of a language model at pretraining. We will finetune and evaluate three models:\n", "\n", "1. `flax-community/roberta-base-danish` is a Danish RoBERTa-base model trained on the Danish part of the [mC4](https://github.com/allenai/allennlp/discussions/5265) dataset;\n", "2. `flax-community/roberta-large-scandi` is a Scandinavian RoBERTa-base model, trained on the Danish, Norwegian and Swedish part of the [mC4](https://github.com/allenai/allennlp/discussions/5265) dataset;\n", "3. `xlm-roberta-base` is a multilingual RoBERTa-base model trained on over 100 languages, on a filtered subset of the Common Crawl dataset." ], "id": "79792719-51b5-4c4f-a4ab-7124719b9853" }, { "cell_type": "markdown", "metadata": { "id": "f026a443-e2bf-4f51-b934-629b277c3530" }, "source": [ "## Named Entity Recognition" ], "id": "f026a443-e2bf-4f51-b934-629b277c3530" }, { "cell_type": "markdown", "metadata": { "id": "7aee4cb1-28d3-40da-a939-00e55ad5ce2c" }, "source": [ "### Preparing the datasets" ], "id": "7aee4cb1-28d3-40da-a939-00e55ad5ce2c" }, { "cell_type": "markdown", "metadata": { "id": "5c463704-5c8b-4e88-9fca-db7000b70aed" }, "source": [ "We start by loading the DaNE dataset for the NER task." ], "id": "5c463704-5c8b-4e88-9fca-db7000b70aed" }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "8daf7629-311d-4fba-916b-b9c7f6debfa4", "outputId": "6f9bfa7e-3d7b-4230-c249-8bb74671ffc9" }, "source": [ "# Load the DaNE data\n", "train, val, test = DDT().load_as_simple_ner(predefined_splits=True)\n", "\n", "# Split docs and labels\n", "train_docs, train_labels = train\n", "val_docs, val_labels = val\n", "test_docs, test_labels = test\n", "\n", "print(f'Loaded {len(train_docs)} training samples, '\n", " f'{len(val_docs)} validation samples and '\n", " f'{len(test_docs)} test samples.')" ], "id": "8daf7629-311d-4fba-916b-b9c7f6debfa4", "execution_count": 3, "outputs": [ { "output_type": "stream", "text": [ "Downloading file /tmp/tmptw7g3c2s\n", "Loaded 4383 training samples, 564 validation samples and 565 test samples.\n" ], "name": "stdout" } ] }, { "cell_type": "markdown", "metadata": { "id": "3bc2922d-3c32-4e96-ba08-07e0c56a387f" }, "source": [ "We next set up the labels in the dataset, converting them to a numeric representation." ], "id": "3bc2922d-3c32-4e96-ba08-07e0c56a387f" }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "e0082911-f46f-45d3-ac53-aa59296fabc0", "outputId": "e1d0b1a6-2a92-4d1b-a72f-4facd464b9f2" }, "source": [ "# Get the set of all unique labels in the dataset\n", "unique_labels = list({lbl for lbl_list in train_labels for lbl in lbl_list})\n", "\n", "# Set up a numeric representation of the labels\n", "label2id = {unique_labels[id]: id for id in range(len(unique_labels))}\n", "id2label = {id: unique_labels[id] for id in range(len(unique_labels))}\n", "\n", "print(f'There are {len(unique_labels)} unique labels in the dataset:')\n", "print(unique_labels)" ], "id": "e0082911-f46f-45d3-ac53-aa59296fabc0", "execution_count": 4, "outputs": [ { "output_type": "stream", "text": [ "There are 9 unique labels in the dataset:\n", "['B-PER', 'I-PER', 'O', 'I-LOC', 'B-ORG', 'B-MISC', 'I-MISC', 'B-LOC', 'I-ORG']\n" ], "name": "stdout" } ] }, { "cell_type": "markdown", "metadata": { "id": "a4dd43e2-accf-403e-a39d-51d68dd9c5de" }, "source": [ "### Setting up the models" ], "id": "a4dd43e2-accf-403e-a39d-51d68dd9c5de" }, { "cell_type": "markdown", "metadata": { "id": "b9b9024f-f981-4a08-8dcf-ce73c4d77a35" }, "source": [ "Next, we load the tokenisers and the models that we want to compare." ], "id": "b9b9024f-f981-4a08-8dcf-ce73c4d77a35" }, { "cell_type": "code", "metadata": { "id": "713a522b-511c-4dd3-9948-66e3bf8cb40b" }, "source": [ "def prepare_model(name: str) -> dict: \n", " config = AutoConfig.from_pretrained(name, \n", " num_labels=len(unique_labels),\n", " label2id=label2id,\n", " id2label=id2label,\n", " finetuning_task='ner')\n", " \n", " tokenizer = AutoTokenizer.from_pretrained(name, \n", " use_fast=True,\n", " add_prefix_space=True)\n", " \n", " try:\n", " model = AutoModelForTokenClassification.from_pretrained(name,\n", " config=config)\n", " except OSError:\n", " model = AutoModelForTokenClassification.from_pretrained(name,\n", " config=config,\n", " from_flax=True)\n", " \n", " return dict(name=name, model=model, tokenizer=tokenizer)" ], "id": "713a522b-511c-4dd3-9948-66e3bf8cb40b", "execution_count": 5, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "d84b8b8a-4413-4778-b26e-ab6bf76c0798" }, "source": [ "### Setting up tokenisation of the datasets" ], "id": "d84b8b8a-4413-4778-b26e-ab6bf76c0798" }, { "cell_type": "markdown", "metadata": { "id": "143a3ba2-fdac-44fb-8130-0524866a01a0" }, "source": [ "We start by converting the datasets into the HuggingFace format." ], "id": "143a3ba2-fdac-44fb-8130-0524866a01a0" }, { "cell_type": "code", "metadata": { "id": "d8945416-1869-424f-b146-0c5848611305" }, "source": [ "train_dataset = Dataset.from_dict(dict(docs=train_docs, orig_labels=train_labels))\n", "val_dataset = Dataset.from_dict(dict(docs=val_docs, orig_labels=val_labels))\n", "test_dataset = Dataset.from_dict(dict(docs=test_docs, orig_labels=test_labels))" ], "id": "d8945416-1869-424f-b146-0c5848611305", "execution_count": 6, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "c8f7a029-f01e-4159-a317-47c9eba7fbfa" }, "source": [ "Next, we define a function which tokenises the dataset as well as aligning it with the labels in the dataset." ], "id": "c8f7a029-f01e-4159-a317-47c9eba7fbfa" }, { "cell_type": "code", "metadata": { "id": "fbf1ee90-5222-4840-9c61-43ace2e9abe3" }, "source": [ "def tokenize_and_align_labels(examples: dict, tokenizer) -> dict:\n", " '''Tokenize all texts and align the labels with them'''\n", " tokenized_inputs = tokenizer(\n", " examples['docs'],\n", " # We use this argument because the texts in our dataset are lists of words (with a label for each word).\n", " is_split_into_words=True,\n", " )\n", " labels = []\n", " for i, label in enumerate(examples['orig_labels']):\n", " word_ids = tokenized_inputs.word_ids(batch_index=i)\n", " previous_word_idx = None\n", " label_ids = []\n", " for word_idx in word_ids:\n", " # Special tokens have a word id that is None. We set the label to -100 so they are automatically\n", " # ignored in the loss function.\n", " if word_idx is None:\n", " label_ids.append(-100)\n", " # We set the label for the first token of each word.\n", " elif word_idx != previous_word_idx:\n", " label_ids.append(label2id[label[word_idx]])\n", " # For the other tokens in a word, we set the label to either the current label or -100, depending on\n", " # the label_all_tokens flag.\n", " else:\n", " label_ids.append(-100)#label2id[label[word_idx]])\n", " previous_word_idx = word_idx\n", "\n", " labels.append(label_ids)\n", " tokenized_inputs[\"labels\"] = labels\n", " return tokenized_inputs\n", "\n", "def tokenize_dataset(dataset: Dataset, tokenizer) -> Dataset:\n", " return dataset.map(partial(tokenize_and_align_labels, tokenizer=tokenizer),\n", " batched=True,\n", " num_proc=4,\n", " desc=\"Running tokenizer on dataset\")" ], "id": "fbf1ee90-5222-4840-9c61-43ace2e9abe3", "execution_count": 7, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "e508cfec-a830-416d-901e-0a6b5ce67598" }, "source": [ "Just to see that it worked, let's have a look at a tokenized dataset." ], "id": "e508cfec-a830-416d-901e-0a6b5ce67598" }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 450, "referenced_widgets": [ "b3ac84dcf48f4ba8a65aecd1df5a1b68", "490a5358971a45fa92989776dc6757c3", "f5be7a9c6f1941659c806255a9315b7c", "9eb5033009ee441eb164d862b4b2c39c", "ed07265cff0c4c82b95e0d0c47359edb", "46a87c4067114a31ae6e34c3a9464f76", "04a42f0ccb10413494e891907fc547d9", "2188756da7e74badb48a459ea15c02d5", "a1db2167447249ed95dfb78a97c24bf9", "68ce97d8904e4b398af538a7ad1ed1ea", "245bfeb4fb5542f9900cebd0e3cccc74", "501c0ab625ed4f0bb9954de0b97e90f1", "f8140d5334e14d7988c17bbbae05b08e", "0b6cce6d1a0e4de48ae7eb967dfeda87", "bac6501d76ce4b5ba56ab1effcedccbb", "cf17e5af2e7e407e9d96e0325ad733de", "d603ce6680d3425e8c145e77bc0e0e30", "bbd848e83f30482dab926e53a7188f37", "29c28419ac2848ca8ce6f73eea0e3425", "ac01f9efc2ec4eb2af6dfd956467ab8e", "82c474271a584a46b9af812bd9947ff7", "5dea54f5878c4ea1b91aea2d6c01dcc9", "1f5a386436f142999c9cd61a5567167f", "4d13f40b545245d88a9ce9cfa738a59c", "bacd59325efe4e7a8289e2e77eca3f97", "b6493940baf04dd6a461abd3d123d20d", "1b5c2a40effd4df8a0e29f7589e4ce69", "516c9f479605404886582c4af2d4860d", "7d5cce86ee7c4016a6cff03887038660", "36aa47fe9e22473ea71a0b6a4d740b35", "b98d5bb115ce4458919bc628a7c453cc", "6437a7817c244f54b4ca06d78d6aeff7", "3ac65aaeae574af5b7eea30b4a873ec2", "8d99822c6a514136aa164846d039bbfc", "105f722c571f47b2b7a7184c9fe45c18", "4a6ab6cdeb0943ba8cf8486caadf2f8d", "4caf7f4e912d454b8ecf3f3971ede95e", "36873c0c51904573809bb3eadd172383", "15d31d2a49804328b4c80fa98dae8ff1", "7fa7cbad599741edbf0c099bf2668494", "f19e664d094f46f0a94d7410685d67eb", "74626cdc554848fab75607aed0324aa3", "f2bcfb2929594116b9367a18a1778aa2", "709d06d2c8614f9b97bf86ce8ed1118f", "65ca9ae8108d46a1999573c3267f16bc", "520e4029bca14128a6eb4e2bbd4c78ed", "b8e8db36327e4706b6ab435a698cb3fd", "7d948bdb11c0427b8c49c48e7c5d9772" ] }, "id": "4f6b4d78-f060-4b98-b022-a4182f0617c3", "outputId": "3c1e9613-7224-464b-e43f-90f568364b4b" }, "source": [ "tokenizer = AutoTokenizer.from_pretrained('flax-community/roberta-base-danish', \n", " use_fast=True,\n", " add_prefix_space=True)\n", "tokenized_train = tokenize_dataset(train_dataset, tokenizer)\n", "print(f'Sample document:')\n", "print(list(zip(tokenized_train[0][\"docs\"], tokenized_train[0][\"orig_labels\"])))\n", "print()\n", "print(f'Tokenized document:')\n", "print(list(zip([tokenizer.decode(tok).strip() for tok in tokenized_train[0][\"input_ids\"]], \n", " [id2label[id] for id in tokenized_train[0][\"labels\"] if id != -100])))" ], "id": "4f6b4d78-f060-4b98-b022-a4182f0617c3", "execution_count": 8, "outputs": [ { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "b3ac84dcf48f4ba8a65aecd1df5a1b68", "version_minor": 0, "version_major": 2 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=618.0, style=ProgressStyle(description_…" ] }, "metadata": { "tags": [] } }, { "output_type": "stream", "text": [ "\n" ], "name": "stdout" }, { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a1db2167447249ed95dfb78a97c24bf9", "version_minor": 0, "version_major": 2 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1388356.0, style=ProgressStyle(descript…" ] }, "metadata": { "tags": [] } }, { "output_type": "stream", "text": [ "\n", " " ], "name": "stdout" }, { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "d603ce6680d3425e8c145e77bc0e0e30", "version_minor": 0, "version_major": 2 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Running tokenizer on dataset #0', max=2.0, style=Progress…" ] }, "metadata": { "tags": [] } }, { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "bacd59325efe4e7a8289e2e77eca3f97", "version_minor": 0, "version_major": 2 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Running tokenizer on dataset #1', max=2.0, style=Progress…" ] }, "metadata": { "tags": [] } }, { "output_type": "stream", "text": [ " " ], "name": "stdout" }, { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "3ac65aaeae574af5b7eea30b4a873ec2", "version_minor": 0, "version_major": 2 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Running tokenizer on dataset #2', max=2.0, style=Progress…" ] }, "metadata": { "tags": [] } }, { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "f19e664d094f46f0a94d7410685d67eb", "version_minor": 0, "version_major": 2 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Running tokenizer on dataset #3', max=2.0, style=Progress…" ] }, "metadata": { "tags": [] } }, { "output_type": "stream", "text": [ "\n", "\n", "\n", "\n", "Sample document:\n", "[('På', 'O'), ('fredag', 'O'), ('har', 'O'), ('SID', 'B-ORG'), ('inviteret', 'O'), ('til', 'O'), ('reception', 'O'), ('i', 'O'), ('SID-huset', 'B-LOC'), ('i', 'O'), ('anledning', 'O'), ('af', 'O'), ('at', 'O'), ('formanden', 'O'), ('Kjeld', 'B-PER'), ('Christensen', 'I-PER'), ('går', 'O'), ('ind', 'O'), ('i', 'O'), ('de', 'O'), ('glade', 'O'), ('tressere', 'O'), ('.', 'O')]\n", "\n", "Tokenized document:\n", "[('På', 'O'), ('fredag', 'O'), ('har', 'O'), ('SID', 'B-ORG'), ('inviteret', 'O'), ('til', 'O'), ('reception', 'O'), ('i', 'O'), ('SID', 'B-LOC'), ('-', 'O'), ('huset', 'O'), ('i', 'O'), ('anledning', 'O'), ('af', 'O'), ('at', 'B-PER'), ('formanden', 'I-PER'), ('Kjeld', 'O'), ('Christensen', 'O'), ('går', 'O'), ('ind', 'O'), ('i', 'O'), ('de', 'O'), ('glade', 'O')]\n" ], "name": "stdout" } ] }, { "cell_type": "markdown", "metadata": { "id": "5be05dc0-94c2-41e2-ad36-464c846e034e" }, "source": [ "### Finetuning the models" ], "id": "5be05dc0-94c2-41e2-ad36-464c846e034e" }, { "cell_type": "markdown", "metadata": { "id": "515c11c5-51a8-4323-847d-7ca67178f1ef" }, "source": [ "We now set up the actual finetuning of the models. We will be employing the `Trainer` class from the `transformers` library, and the following `compute_metrics` helper function is used during training to compute the metrics that we are interested in." ], "id": "515c11c5-51a8-4323-847d-7ca67178f1ef" }, { "cell_type": "code", "metadata": { "id": "75aadb73-a073-48bb-b808-a3f228556db2", "colab": { "base_uri": "https://localhost:8080/", "height": 66, "referenced_widgets": [ "1de82500b0d34e5c9f6c5f995f27ea03", "c7e9741c804e421898f9c45cde1ce7cd", "5178228787094bfba3b671af67e3df0f", "3aa19fa9dda74118844e11a876039a0b", "2afa7d80cc004014b4a672bc0b683fce", "b2757df8c2e14c7d90befd479221e5c5", "9ada8d8e30114064be38f3b5d4645f36", "a391191463d0455c8fd4f83b8ae69c8f" ] }, "outputId": "b470d1a4-d845-424a-e2c9-16d87b52a1f9" }, "source": [ "# Initialise metric\n", "metric = load_metric(\"seqeval\")\n", "\n", "def compute_metrics(p):\n", " '''Helper function for computing metrics'''\n", " predictions, labels = p\n", " predictions = np.argmax(predictions, axis=-1)\n", "\n", " # Remove ignored index (special tokens)\n", " true_predictions = [\n", " [id2label[p] for (p, l) in zip(prediction, label) if l != -100]\n", " for prediction, label in zip(predictions, labels)\n", " ]\n", " true_labels = [\n", " [id2label[l] for (p, l) in zip(prediction, label) if l != -100]\n", " for prediction, label in zip(predictions, labels)\n", " ]\n", "\n", " results = metric.compute(predictions=true_predictions, references=true_labels)\n", " return dict(precision=results[\"overall_precision\"],\n", " recall=results[\"overall_recall\"],\n", " f1=results[\"overall_f1\"],\n", " accuracy=results[\"overall_accuracy\"])" ], "id": "75aadb73-a073-48bb-b808-a3f228556db2", "execution_count": 9, "outputs": [ { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "1de82500b0d34e5c9f6c5f995f27ea03", "version_minor": 0, "version_major": 2 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=2482.0, style=ProgressStyle(description…" ] }, "metadata": { "tags": [] } }, { "output_type": "stream", "text": [ "\n" ], "name": "stdout" } ] }, { "cell_type": "markdown", "metadata": { "id": "cdc47b49-6227-498a-835f-caa38e5e7796" }, "source": [ "The following script then tokenises the dataset using the specified tokeniser, and starts finetuning on the DaNE dataset." ], "id": "cdc47b49-6227-498a-835f-caa38e5e7796" }, { "cell_type": "code", "metadata": { "id": "e4a09abf-6230-44f4-87b0-fd630b7c502f" }, "source": [ "def finetune(model_name: str, \n", " epochs: int = 10, \n", " lr: float = 5e-5, \n", " batch_size: int = 32,\n", " save: bool = True):\n", " '''Finetune a transformer model for NER on the DaNE dataset'''\n", "\n", " # Fetch the model and tokenizer\n", " model_dict = prepare_model(model_name)\n", " \n", " # Tokenize the datasets\n", " tokenized_train = tokenize_dataset(train_dataset, model_dict['tokenizer'])\n", " tokenized_val = tokenize_dataset(val_dataset, model_dict['tokenizer'])\n", " tokenized_test = tokenize_dataset(test_dataset, model_dict['tokenizer'])\n", " \n", " # Initialise the data collator\n", " data_collator = DataCollatorForTokenClassification(model_dict['tokenizer'])\n", " \n", " # Initialise training arguments\n", " training_args = TrainingArguments(output_dir=f'../models/{model_dict[\"name\"]}-ner-dane',\n", " evaluation_strategy='epoch',\n", " logging_strategy='epoch',\n", " save_strategy='epoch' if save else 'no',\n", " per_device_train_batch_size=batch_size,\n", " per_device_eval_batch_size=batch_size,\n", " gradient_accumulation_steps=1,\n", " learning_rate=lr,\n", " num_train_epochs=epochs,\n", " warmup_steps=50,\n", " report_to='all',\n", " load_best_model_at_end=True)\n", " \n", " # Initialise Trainer\n", " trainer = Trainer(model=model_dict['model'],\n", " args=training_args,\n", " train_dataset=tokenized_train,\n", " eval_dataset=tokenized_val,\n", " tokenizer=model_dict['tokenizer'],\n", " data_collator=data_collator,\n", " compute_metrics=compute_metrics)\n", " \n", " # Finetune the model\n", " train_result = trainer.train()\n", " \n", " # Log training metrics and save the state\n", " metrics = train_result.metrics\n", " trainer.log_metrics(\"train\", metrics)\n", " trainer.save_metrics(\"train\", metrics)\n", " trainer.save_state()\n", " \n", " # Log validation metrics\n", " metrics = trainer.evaluate()\n", " trainer.log_metrics(\"eval\", metrics)\n", " trainer.save_metrics(\"eval\", metrics)\n", " \n", " # Log test metrics\n", " predictions, labels, metrics = trainer.predict(test_dataset, metric_key_prefix=\"predict\")\n", " predictions = np.argmax(predictions, axis=-1)\n", " trainer.log_metrics(\"test\", metrics)\n", " trainer.save_metrics(\"test\", metrics)" ], "id": "e4a09abf-6230-44f4-87b0-fd630b7c502f", "execution_count": 10, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "mm-FMWWblx1O" }, "source": [ "model_names = dict(danish='flax-community/roberta-base-danish',\n", " scandi='flax-community/roberta-large-scandi',#'Maltehb/roberta-base-scandinavian',\n", " multi='xlm-roberta-base',\n", " multilarge='xlm-roberta-large',\n", " botxo='Maltehb/danish-bert-botxo')" ], "id": "mm-FMWWblx1O", "execution_count": 15, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1000, "referenced_widgets": [ "47aef8e786ea416e8fa99869a46d008f", "eb587b328a7e4a88bffad029b4943a1e", "bf71a524605b4a63b33468e43b212b62", "e9a69e05e82e48feb3f203e8ac7b7afa", "f3fa0e5bd0b34ffe92627a1615447d79", "03b37083ef7642f2942dd97b6e090c33", "18d6f6e2b90747258a2082796f2eeda9", "e33495db5c3343a999c9c6d807abc238", "ceb189e178b24e03b398b7ba37e63a02", "7b85ed3a5f034cbcbb3d87d9e5ac807d", "81f7f3a6c8dc4c9e8320fbcc8b4aa270", "f1187ab3d4a1491dacbf7f109810bf3c", "43d1cd650b26471cbf07beb1733b943a", "71410f8bf2d348ea9f3f173ea70c7829", "f8ae53cbfe38498095d7f7e2ee6b1b89", "dd961e0fdc604f0883ac6a674617b7ac", "e9a04e6cb6094f00b75a248aebc11dcf", "3a118dd7adf3459db52afc86e39e4681", "59dae515b4804e3f8a64372ec0cd5254", "a835eb4e43174c178a43fc20cf878f67", "bbd988fc15734daf834d7c1be0898f54", "59a87b9b20fd43e2ad4df96508e2a4e8", "0c06e7199e2440ff8df24c7f4d3816bf", "1721231f136a41a89226a6993e770708", "ea1a49df91de49b39d7c28620d396ed3", "f43ced84b4ae4d0290406ff29ac319b6", "249eab1d100f4e66891404bd0293cfac", "de5f4a79619b4361adb69ebf75f480d3", "38baac16961b491d99a195ec08b7146c", "31550c6938b245bdaff271f77e36a265", "86d213b1b67649e28127ca8919d1b508", "3833a9687f8440c8bc7159be7c5da6aa", "0eb974fa255b4b378c19580c9b443242", "72b317f6253640efa451b41609f418cc", "9dbc6bdd1fcd4258b27b0b36f537a52b", "48eab5fd5f5e4f4f9b1517a3f2e53052", "562f962ecfe1438ba3bd67406c3bc415", "e5c84b81d24a4120ad364cf7a8d92b13", "12ba4202432a4d9da9b4295bee72ffac", "4b846d71afd943278c1ac0cecfa13f65", "c23a9b479ca54b5082ced5e0359f54d9", "ce19ae483e1c4a9b90b3fecfea1330e4", "e953e23fd1364232b6cd39ec11ccaecb", "75b304f6ca5945b295a64fafcf283c4e", "cb932f442d0c4c8b959e455cc0ce1329", "5fa39708c9684ce3b93577f1a0197bf4", "4e7a38f355d84811a66936be929d270d", "50499dbed5cb470a9b5dd7f4e5144405", "1d3024c3e0374116bb856902590f6bfd", "1a406ffa02dc479aa0fcdfa41afcbb0b", "9f6792ca58ed4906bb671ae049e2b860", "54ef5c70c3f146e1a1fbd82f8aeedbb9", "ed8c1a4ff520407eb35402fb4e5fcc71", "68819af7768e486da1752ff27d2e4104", "4039c305be024488abecf4a635345c46", "80622e53e4124d0cbd506542f81f4818", "2cad6e60bb414f46b79720dfc4f87f9e", "fc6b7d2794bb44cdabafdfba208b8dc3", "f8fd5a767ff3404ebbadc6cc63110445", "8b638f038ef04ea7b2ad3bdcb90f0141", "305833c39a074d0296ef4511b6f61e7e", "f31cad3887d640d8b4322598eca1ca70", "02ecdd0d12614752a19b294395cb821c", "4c62a1fd18a54327a1f50bf3ca087027", "9daad6aa599343278ca5198897b390b3", "b803d1d51df9466db862f96fb8e49257", "266b34a7ee284ac590ffa999f1351330", "93a03570b7bf4f3f8494f7b5677a7c70", "543ff4b83d804f52afbcb7a57ca22570", "468d17728336411db088c69330f4de68", "fe17854a2dbe4949aeb4efb309cab6fc", "a1feb0e41e2f47e28f550ed81b61e952", "ca9eb397fcd640b48838e58579b475bc", "4234b3e6815040af90d298f6ab2a808e", "30739849e0db45daa664d8992ab6f458", "24dc9ed72f8048fe8b174f2f1ef6e2ca", "641876e099e84c44854262d5e709409e", "f8ab57bd0aba4f608a8f7549baa4ead4", "75b2a86adb434e8cb3358a3461be99e6", "e005269e95704fae8ff9ebe8f932d023", "0217f8a6209948cb9334eeaf5c489b1b", "8589613c58db44bc959fc43c003dc689", "168ee56a9f65422ca6c503ea29e8a825", "b5f3ea592806493abe87f29fbe8059fb", "a53357f0785046afbbfcc408a5aa0f19", "3c4b3cd5af584ad584da81f19a26eec0", "9097e5ba3c00454c823871d38393907b", "5d0f563e826c4ef79ff1ed6cc7ab1c01", "9de41b790b2c4df88309b8e6e66972cc", "26274dbb3a324f3ca7a2a91ef24adb83", "783f5d6b893d4f2e968c249605773c9e", "1a597fd54ab04a4cb80147e93abc72b8", "f317aee64c0f43a08c8157413d7956e1", "1f37c1324fba4488a0d19cc1d623f887", "584ec18135294eb9a916c85385eb4bea", "8e67b29112a04c48a978cf648904ee61", "ece5245bfb5c4b02aa1e2527974f1cc6", "ab4e0611c3d7428b9ffa703f794852a7", "0df9c4b4ed0240f2871906f72e9e9839", "5731d37f432b4713b9b8be897b1f6686", "fa9d6f6c384241f192660b2668956ad4", "c04cd9397ecf4de69d5e7fc39311629c", "669706d500bc404ca65e16f939db02c5", "fc71353cc3b5434ba3c32d5c64069757" ] }, "id": "e863e244-a332-46e7-8bbe-fce0d0b46c57", "outputId": "5c9bf179-dded-42cd-9af3-ad7aba2dcee4" }, "source": [ "finetune(model_names['multilarge'], epochs=25, lr=5e-5, batch_size=32, save=False)" ], "id": "e863e244-a332-46e7-8bbe-fce0d0b46c57", "execution_count": 13, "outputs": [ { "output_type": "stream", "text": [ "404 Client Error: Not Found for url: https://huggingface.co/flax-community/roberta-large-scandi/resolve/main/pytorch_model.bin\n" ], "name": "stderr" }, { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "47aef8e786ea416e8fa99869a46d008f", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=498796983.0, style=ProgressStyle(descri…" ] }, "metadata": { "tags": [] } }, { "output_type": "stream", "text": [ "\n" ], "name": "stdout" }, { "output_type": "stream", "text": [ "/usr/local/lib/python3.7/dist-packages/transformers/modeling_flax_pytorch_utils.py:201: UserWarning: The given NumPy array is not writeable, and PyTorch does not support non-writeable tensors. This means you can write to the underlying (supposedly non-writeable) NumPy array using the tensor. You may want to copy the array to protect its data or make it writeable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at /pytorch/torch/csrc/utils/tensor_numpy.cpp:180.)\n", " pt_model_dict[flax_key] = torch.from_numpy(flax_tensor)\n", "Some weights of the Flax model were not used when initializing the PyTorch model RobertaForTokenClassification: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight']\n", "- This IS expected if you are initializing RobertaForTokenClassification from a Flax model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a FlaxBertForPreTraining model).\n", "- This IS NOT expected if you are initializing RobertaForTokenClassification from a Flax model that you expect to be exactly identical (e.g. initializing a BertForSequenceClassification model from a FlaxBertForSequenceClassification model).\n", "Some weights of RobertaForTokenClassification were not initialized from the Flax model and are newly initialized: ['classifier.weight', 'classifier.bias', 'roberta.embeddings.position_ids']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ], "name": "stderr" }, { "output_type": "stream", "text": [ " " ], "name": "stdout" }, { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "ceb189e178b24e03b398b7ba37e63a02", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Running tokenizer on dataset #0', max=2.0, style=Progress…" ] }, "metadata": { "tags": [] } }, { "output_type": "stream", "text": [ " " ], "name": "stdout" }, { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "e9a04e6cb6094f00b75a248aebc11dcf", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Running tokenizer on dataset #1', max=2.0, style=Progress…" ] }, "metadata": { "tags": [] } }, { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "ea1a49df91de49b39d7c28620d396ed3", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Running tokenizer on dataset #2', max=2.0, style=Progress…" ] }, "metadata": { "tags": [] } }, { "output_type": "stream", "text": [ " " ], "name": "stdout" }, { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "0eb974fa255b4b378c19580c9b443242", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Running tokenizer on dataset #3', max=2.0, style=Progress…" ] }, "metadata": { "tags": [] } }, { "output_type": "stream", "text": [ "\n", "\n", "\n", "\n", " " ], "name": "stdout" }, { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "c23a9b479ca54b5082ced5e0359f54d9", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Running tokenizer on dataset #0', max=1.0, style=Progress…" ] }, "metadata": { "tags": [] } }, { "output_type": "stream", "text": [ "\n", " " ], "name": "stdout" }, { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "1d3024c3e0374116bb856902590f6bfd", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Running tokenizer on dataset #1', max=1.0, style=Progress…" ] }, "metadata": { "tags": [] } }, { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "2cad6e60bb414f46b79720dfc4f87f9e", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Running tokenizer on dataset #2', max=1.0, style=Progress…" ] }, "metadata": { "tags": [] } }, { "output_type": "stream", "text": [ " \n" ], "name": "stdout" }, { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "9daad6aa599343278ca5198897b390b3", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Running tokenizer on dataset #3', max=1.0, style=Progress…" ] }, "metadata": { "tags": [] } }, { "output_type": "stream", "text": [ "\n", "\n", " " ], "name": "stdout" }, { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "ca9eb397fcd640b48838e58579b475bc", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Running tokenizer on dataset #0', max=1.0, style=Progress…" ] }, "metadata": { "tags": [] } }, { "output_type": "stream", "text": [ " " ], "name": "stdout" }, { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "0217f8a6209948cb9334eeaf5c489b1b", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Running tokenizer on dataset #1', max=1.0, style=Progress…" ] }, "metadata": { "tags": [] } }, { "output_type": "stream", "text": [ "\n", " \n", " " ], "name": "stdout" }, { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "9de41b790b2c4df88309b8e6e66972cc", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Running tokenizer on dataset #2', max=1.0, style=Progress…" ] }, "metadata": { "tags": [] } }, { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "ece5245bfb5c4b02aa1e2527974f1cc6", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Running tokenizer on dataset #3', max=1.0, style=Progress…" ] }, "metadata": { "tags": [] } }, { "output_type": "stream", "text": [ "\n", "\n" ], "name": "stdout" }, { "output_type": "stream", "text": [ "The following columns in the training set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: docs, orig_labels.\n", "***** Running training *****\n", " Num examples = 4383\n", " Num Epochs = 25\n", " Instantaneous batch size per device = 32\n", " Total train batch size (w. parallel, distributed & accumulation) = 32\n", " Gradient Accumulation steps = 1\n", " Total optimization steps = 3425\n" ], "name": "stderr" }, { "output_type": "display_data", "data": { "text/html": [ "\n", "
\n", " \n", " \n", " [1080/3425 08:03 < 17:31, 2.23 it/s, Epoch 7.88/25]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EpochTraining LossValidation LossPrecisionRecallF1Accuracy
10.4765000.1810830.4088980.4020830.4054620.956252
20.1384000.0767930.6897200.7687500.7270940.979965
30.0759000.0618240.7480620.8041670.7751000.983062
40.0502000.0589040.7736940.8333330.8024070.985192
50.0345000.0554400.8149610.8625000.8380570.986837
60.0252000.0568320.8035020.8604170.8309860.986643
70.0183000.0585090.8040000.8375000.8204080.986643

" ], "text/plain": [ "" ] }, "metadata": { "tags": [] } }, { "output_type": "stream", "text": [ "The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: docs, orig_labels.\n", "***** Running Evaluation *****\n", " Num examples = 564\n", " Batch size = 32\n", "Saving model checkpoint to ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-137\n", "Configuration saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-137/config.json\n", "Model weights saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-137/pytorch_model.bin\n", "tokenizer config file saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-137/tokenizer_config.json\n", "Special tokens file saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-137/special_tokens_map.json\n", "The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: docs, orig_labels.\n", "***** Running Evaluation *****\n", " Num examples = 564\n", " Batch size = 32\n", "Saving model checkpoint to ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-274\n", "Configuration saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-274/config.json\n", "Model weights saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-274/pytorch_model.bin\n", "tokenizer config file saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-274/tokenizer_config.json\n", "Special tokens file saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-274/special_tokens_map.json\n", "The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: docs, orig_labels.\n", "***** Running Evaluation *****\n", " Num examples = 564\n", " Batch size = 32\n", "Saving model checkpoint to ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-411\n", "Configuration saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-411/config.json\n", "Model weights saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-411/pytorch_model.bin\n", "tokenizer config file saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-411/tokenizer_config.json\n", "Special tokens file saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-411/special_tokens_map.json\n", "The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: docs, orig_labels.\n", "***** Running Evaluation *****\n", " Num examples = 564\n", " Batch size = 32\n", "Saving model checkpoint to ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-548\n", "Configuration saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-548/config.json\n", "Model weights saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-548/pytorch_model.bin\n", "tokenizer config file saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-548/tokenizer_config.json\n", "Special tokens file saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-548/special_tokens_map.json\n", "The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: docs, orig_labels.\n", "***** Running Evaluation *****\n", " Num examples = 564\n", " Batch size = 32\n", "Saving model checkpoint to ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-685\n", "Configuration saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-685/config.json\n", "Model weights saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-685/pytorch_model.bin\n", "tokenizer config file saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-685/tokenizer_config.json\n", "Special tokens file saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-685/special_tokens_map.json\n", "The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: docs, orig_labels.\n", "***** Running Evaluation *****\n", " Num examples = 564\n", " Batch size = 32\n", "Saving model checkpoint to ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-822\n", "Configuration saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-822/config.json\n", "Model weights saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-822/pytorch_model.bin\n", "tokenizer config file saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-822/tokenizer_config.json\n", "Special tokens file saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-822/special_tokens_map.json\n", "The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: docs, orig_labels.\n", "***** Running Evaluation *****\n", " Num examples = 564\n", " Batch size = 32\n", "Saving model checkpoint to ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-959\n", "Configuration saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-959/config.json\n", "Model weights saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-959/pytorch_model.bin\n", "tokenizer config file saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-959/tokenizer_config.json\n", "Special tokens file saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-959/special_tokens_map.json\n" ], "name": "stderr" }, { "output_type": "display_data", "data": { "text/html": [ "\n", "

\n", " \n", " \n", " [2878/3425 21:59 < 04:10, 2.18 it/s, Epoch 21/25]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EpochTraining LossValidation LossPrecisionRecallF1Accuracy
10.4765000.1810830.4088980.4020830.4054620.956252
20.1384000.0767930.6897200.7687500.7270940.979965
30.0759000.0618240.7480620.8041670.7751000.983062
40.0502000.0589040.7736940.8333330.8024070.985192
50.0345000.0554400.8149610.8625000.8380570.986837
60.0252000.0568320.8035020.8604170.8309860.986643
70.0183000.0585090.8040000.8375000.8204080.986643
80.0133000.0636130.8323470.8791670.8551170.988289
90.0112000.0657740.8181820.8812500.8485460.987224
100.0085000.0624340.8531190.8833330.8679630.989063
110.0073000.0644650.8362920.8833330.8591690.988966
120.0054000.0662950.8542910.8916670.8725790.989160
130.0045000.0677130.8508950.8916670.8708040.989644
140.0041000.0681050.8540000.8895830.8714290.989160
150.0034000.0698190.8643720.8895830.8767970.989837
160.0027000.0745520.8562750.8812500.8685830.989063
170.0025000.0741900.8737270.8937500.8836250.989741
180.0018000.0748410.8600000.8958330.8775510.988870
190.0017000.0749290.8707070.8979170.8841030.989741
200.0018000.0786820.8554220.8875000.8711660.989160
210.0016000.0766860.8669350.8958330.8811480.989741

" ], "text/plain": [ "" ] }, "metadata": { "tags": [] } }, { "output_type": "stream", "text": [ "The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: docs, orig_labels.\n", "***** Running Evaluation *****\n", " Num examples = 564\n", " Batch size = 32\n", "Saving model checkpoint to ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-1096\n", "Configuration saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-1096/config.json\n", "Model weights saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-1096/pytorch_model.bin\n", "tokenizer config file saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-1096/tokenizer_config.json\n", "Special tokens file saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-1096/special_tokens_map.json\n", "The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: docs, orig_labels.\n", "***** Running Evaluation *****\n", " Num examples = 564\n", " Batch size = 32\n", "Saving model checkpoint to ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-1233\n", "Configuration saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-1233/config.json\n", "Model weights saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-1233/pytorch_model.bin\n", "tokenizer config file saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-1233/tokenizer_config.json\n", "Special tokens file saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-1233/special_tokens_map.json\n", "The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: docs, orig_labels.\n", "***** Running Evaluation *****\n", " Num examples = 564\n", " Batch size = 32\n", "Saving model checkpoint to ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-1370\n", "Configuration saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-1370/config.json\n", "Model weights saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-1370/pytorch_model.bin\n", "tokenizer config file saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-1370/tokenizer_config.json\n", "Special tokens file saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-1370/special_tokens_map.json\n", "The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: docs, orig_labels.\n", "***** Running Evaluation *****\n", " Num examples = 564\n", " Batch size = 32\n", "Saving model checkpoint to ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-1507\n", "Configuration saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-1507/config.json\n", "Model weights saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-1507/pytorch_model.bin\n", "tokenizer config file saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-1507/tokenizer_config.json\n", "Special tokens file saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-1507/special_tokens_map.json\n", "The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: docs, orig_labels.\n", "***** Running Evaluation *****\n", " Num examples = 564\n", " Batch size = 32\n", "Saving model checkpoint to ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-1644\n", "Configuration saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-1644/config.json\n", "Model weights saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-1644/pytorch_model.bin\n", "tokenizer config file saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-1644/tokenizer_config.json\n", "Special tokens file saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-1644/special_tokens_map.json\n", "The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: docs, orig_labels.\n", "***** Running Evaluation *****\n", " Num examples = 564\n", " Batch size = 32\n", "Saving model checkpoint to ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-1781\n", "Configuration saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-1781/config.json\n", "Model weights saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-1781/pytorch_model.bin\n", "tokenizer config file saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-1781/tokenizer_config.json\n", "Special tokens file saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-1781/special_tokens_map.json\n", "The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: docs, orig_labels.\n", "***** Running Evaluation *****\n", " Num examples = 564\n", " Batch size = 32\n", "Saving model checkpoint to ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-1918\n", "Configuration saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-1918/config.json\n", "Model weights saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-1918/pytorch_model.bin\n", "tokenizer config file saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-1918/tokenizer_config.json\n", "Special tokens file saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-1918/special_tokens_map.json\n", "The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: docs, orig_labels.\n", "***** Running Evaluation *****\n", " Num examples = 564\n", " Batch size = 32\n", "Saving model checkpoint to ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-2055\n", "Configuration saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-2055/config.json\n", "Model weights saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-2055/pytorch_model.bin\n", "tokenizer config file saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-2055/tokenizer_config.json\n", "Special tokens file saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-2055/special_tokens_map.json\n", "The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: docs, orig_labels.\n", "***** Running Evaluation *****\n", " Num examples = 564\n", " Batch size = 32\n", "Saving model checkpoint to ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-2192\n", "Configuration saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-2192/config.json\n", "Model weights saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-2192/pytorch_model.bin\n", "tokenizer config file saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-2192/tokenizer_config.json\n", "Special tokens file saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-2192/special_tokens_map.json\n", "The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: docs, orig_labels.\n", "***** Running Evaluation *****\n", " Num examples = 564\n", " Batch size = 32\n", "Saving model checkpoint to ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-2329\n", "Configuration saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-2329/config.json\n", "Model weights saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-2329/pytorch_model.bin\n", "tokenizer config file saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-2329/tokenizer_config.json\n", "Special tokens file saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-2329/special_tokens_map.json\n", "The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: docs, orig_labels.\n", "***** Running Evaluation *****\n", " Num examples = 564\n", " Batch size = 32\n", "Saving model checkpoint to ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-2466\n", "Configuration saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-2466/config.json\n", "Model weights saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-2466/pytorch_model.bin\n", "tokenizer config file saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-2466/tokenizer_config.json\n", "Special tokens file saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-2466/special_tokens_map.json\n", "The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: docs, orig_labels.\n", "***** Running Evaluation *****\n", " Num examples = 564\n", " Batch size = 32\n", "Saving model checkpoint to ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-2603\n", "Configuration saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-2603/config.json\n", "Model weights saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-2603/pytorch_model.bin\n", "tokenizer config file saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-2603/tokenizer_config.json\n", "Special tokens file saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-2603/special_tokens_map.json\n", "The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: docs, orig_labels.\n", "***** Running Evaluation *****\n", " Num examples = 564\n", " Batch size = 32\n", "Saving model checkpoint to ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-2740\n", "Configuration saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-2740/config.json\n", "Model weights saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-2740/pytorch_model.bin\n", "tokenizer config file saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-2740/tokenizer_config.json\n", "Special tokens file saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-2740/special_tokens_map.json\n", "The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: docs, orig_labels.\n", "***** Running Evaluation *****\n", " Num examples = 564\n", " Batch size = 32\n", "Saving model checkpoint to ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-2877\n", "Configuration saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-2877/config.json\n", "Model weights saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-2877/pytorch_model.bin\n", "tokenizer config file saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-2877/tokenizer_config.json\n", "Special tokens file saved in ../models/flax-community/roberta-large-scandi-ner-dane/checkpoint-2877/special_tokens_map.json\n" ], "name": "stderr" }, { "output_type": "error", "ename": "RuntimeError", "evalue": "ignored", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mOSError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/torch/serialization.py\u001b[0m in \u001b[0;36msave\u001b[0;34m(obj, f, pickle_module, pickle_protocol, _use_new_zipfile_serialization)\u001b[0m\n\u001b[1;32m 378\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0m_open_zipfile_writer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mopened_file\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mopened_zipfile\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 379\u001b[0;31m \u001b[0m_save\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mopened_zipfile\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpickle_module\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpickle_protocol\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 380\u001b[0m \u001b[0;32mreturn\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/torch/serialization.py\u001b[0m in \u001b[0;36m_save\u001b[0;34m(obj, zip_file, pickle_module, pickle_protocol)\u001b[0m\n\u001b[1;32m 498\u001b[0m \u001b[0mnum_bytes\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mstorage\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mstorage\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0melement_size\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 499\u001b[0;31m \u001b[0mzip_file\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwrite_record\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstorage\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata_ptr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnum_bytes\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 500\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mOSError\u001b[0m: [Errno 28] No space left on device", "\nDuring handling of the above exception, another exception occurred:\n", "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mfinetune\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel_names\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'scandi'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mepochs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m25\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlr\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m5e-5\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m32\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msave\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;32m\u001b[0m in \u001b[0;36mfinetune\u001b[0;34m(model_name, epochs, lr, batch_size, save)\u001b[0m\n\u001b[1;32m 41\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 42\u001b[0m \u001b[0;31m# Finetune the model\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 43\u001b[0;31m \u001b[0mtrain_result\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtrainer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 44\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 45\u001b[0m \u001b[0;31m# Log training metrics and save the state\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/transformers/trainer.py\u001b[0m in \u001b[0;36mtrain\u001b[0;34m(self, resume_from_checkpoint, trial, **kwargs)\u001b[0m\n\u001b[1;32m 1329\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1330\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcontrol\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcallback_handler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_epoch_end\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstate\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcontrol\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1331\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_maybe_log_save_evaluate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtr_loss\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtrial\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mepoch\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1332\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1333\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mDebugOption\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTPU_METRICS_DEBUG\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdebug\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/transformers/trainer.py\u001b[0m in \u001b[0;36m_maybe_log_save_evaluate\u001b[0;34m(self, tr_loss, model, trial, epoch)\u001b[0m\n\u001b[1;32m 1428\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1429\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcontrol\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshould_save\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1430\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_save_checkpoint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtrial\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmetrics\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmetrics\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1431\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcontrol\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcallback_handler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_save\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstate\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcontrol\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1432\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/transformers/trainer.py\u001b[0m in \u001b[0;36m_save_checkpoint\u001b[0;34m(self, model, trial, metrics)\u001b[0m\n\u001b[1;32m 1518\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshould_save\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdeepspeed\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1519\u001b[0m \u001b[0;31m# deepspeed.save_checkpoint above saves model/optim/sched\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1520\u001b[0;31m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msave\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moptimizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstate_dict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput_dir\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"optimizer.pt\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1521\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mwarnings\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcatch_warnings\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrecord\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mcaught_warnings\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1522\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msave\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlr_scheduler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstate_dict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput_dir\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"scheduler.pt\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/torch/serialization.py\u001b[0m in \u001b[0;36msave\u001b[0;34m(obj, f, pickle_module, pickle_protocol, _use_new_zipfile_serialization)\u001b[0m\n\u001b[1;32m 378\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0m_open_zipfile_writer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mopened_file\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mopened_zipfile\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 379\u001b[0m \u001b[0m_save\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mopened_zipfile\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpickle_module\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpickle_protocol\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 380\u001b[0;31m \u001b[0;32mreturn\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 381\u001b[0m \u001b[0m_legacy_save\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mopened_file\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpickle_module\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpickle_protocol\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 382\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/torch/serialization.py\u001b[0m in \u001b[0;36m__exit__\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m 257\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 258\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__exit__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 259\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfile_like\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwrite_end_of_file\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 260\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuffer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mflush\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 261\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mRuntimeError\u001b[0m: [enforce fail at inline_container.cc:298] . unexpected pos 507054784 vs 507054672" ] } ] }, { "cell_type": "code", "metadata": { "id": "87YkQyTi7Lq7" }, "source": [ "" ], "id": "87YkQyTi7Lq7", "execution_count": null, "outputs": [] } ] }