{ "cells": [ { "cell_type": "markdown", "source": [ "#
demo for `pszemraj/flan-t5-large-grammar-synthesis`\n", "\n", "\n", "- this notebook runs on CPU by default. use google or chatGPT to figure out how to change that if you want to run on GPU\n", "- some details on usage\n", " - this model was trained on several (1-8) sentences at a time. \n", " - by default, it **will not** work well for super low token counts (like 4) or super long texts\n", " - I would recommend using it in batches of 4-128 tokens at a time\n", " - an **extension** of this notebook for **batch inference** on longer texts is available [here](https://colab.research.google.com/gist/pszemraj/6e961b08970f98479511bb1e17cdb4f0/batch-grammar-check-correct-demo.ipynb)\n", "- [link to model card](https://huggingface.co/pszemraj/flan-t5-large-grammar-synthesis)\n", "\n", "\n", "---\n", "\n" ], "metadata": { "id": "LEpomcp9mMno" } }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "id": "LrDWdEzv3LaX" }, "outputs": [], "source": [ "#@markdown add auto-Colab formatting with `IPython.display`\n", "from IPython.display import HTML, display\n", "# colab formatting\n", "def set_css():\n", " display(\n", " HTML(\n", " \"\"\"\n", " \n", " \"\"\"\n", " )\n", " )\n", "\n", "get_ipython().events.register(\"pre_run_cell\", set_css)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "lIYdn1woOS1n", "outputId": "fba7842b-269d-4bfa-991c-714c8764281e" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.8/6.8 MB\u001b[0m \u001b[31m20.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m215.3/215.3 KB\u001b[0m \u001b[31m15.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m199.8/199.8 KB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.6/7.6 MB\u001b[0m \u001b[31m18.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h" ] } ], "source": [ "pip install -U -q transformers accelerate" ] }, { "cell_type": "code", "source": [ "from transformers import pipeline\n", "\n", "corrector = pipeline(\n", " \"text2text-generation\",\n", " \"pszemraj/flan-t5-large-grammar-synthesis\",\n", ")\n" ], "metadata": { "id": "8ZfUqTVJl8Zw", "outputId": "991715c1-9a86-4c18-b9c7-d68dfedfd400", "colab": { "base_uri": "https://localhost:8080/", "height": 209, "referenced_widgets": [ "08fedd984cb345979d086691883d2091", "df98c4ff2d9448b1b29b16f73b8730ef", "60cba2280ac5433389dda7867818d0e1", "84b6ac6af76b4302944de3ea3da0d952", "95eb184f9443408daa3c40e87650ed96", "125aa3fb67854b2da3d56dff006d9d53", "c9729b9d28fa45fcabcf8a41b94dcdee", "7b59d36e571144dbbf0e716157897786", "78eb30ee9018435493d89086e3cb36d7", "59d0d2b4834743c4ac7f24763d05afd9", "44119365ec72475f93c644d5ffce3be8", "6ad2f1a6b85b4803997ccd96d06ad94f", "8853f178dfcc49cda14bc38f88716110", "bc245822258c4d14becb89bd8aa5a339", "e6843cf397d74ffca184ed91ac2d1bd8", "c069ae665ff2473fbaa1e4d1a55b7f7e", "19d3a3c80ee84d09bc89540004c7ac8f", "beb83591efae4cc4902d99971dce24ed", "1e3eda5989654c8a9843c6d7af87f308", "ae44480189e1498ab47b19efa7b25cd8", "944280e6b4814ddb95ab16690b4ece7d", "6fd4a143ad9d4aa49a89f657c1a81eba", "4a8ac6c2a05f465887144ffc160edda8", "6d8c0a62bba14058b32bb6f94c8d55a5", "a42abc1786904c8a82cc532b6b736bb9", "e8225c749c4d42f896596387fbebed14", "8d1e87c9ff9446abb065ddd03c4a2431", "acf5fc8d4bf344f885eef81dece035cd", "1a4e8e4aea7e4fcfa3e3ef3fad36ed24", "39258e178ceb4d53964c1c2e0ca3e1c6", "74d5c58f966f40cbae1df41777466592", "f11aa0ffaab149dd9779b6eed0ddd00c", "1c61f6e664e94db8ac099617350148c9", "1a4bb142f34b4d85a3eb34bdc8651f5b", "7ea857b51aa646408061c802deee77d0", "b2695b84e1564869afcd48f41b8c7afa", "3835af40ff6445dbb5ec856189f762e7", "aefa6ea29b7549de8558aa90722a8f21", "3053fd8da9a541b59d3dd495c73b3341", "ec6755d7c6874e8cbe73b53013738537", "2f203739ccdb40d593347a21a0fdd5fc", "45cd90413b4d4c30a75a1d36c5becafd", "7c0ff834038e4c90930923ec46c1d677", "7c8845ac7ca64841936c084148b4cc20", "b01d8f96a17548aaaf82920c85f0bd13", "24e9108dfd6f44aeb40dde987c831117", "af106c38e6c4497ab28db4a246ef425a", "17223e9936c24ff7836f47a5f9cf378e", "dfdf2481db344c569555c195fb639b33", "81eeb57a5cac4a3d9597bb25c4eedbcc", "6d3fe0e7fd0b405d86f31e9ca38f3fc1", "72d40fb3ddec4e21aaa7a17fbc777009", "0b8ef90b8b41448aa728b0bd7a986ab9", "ae4966aacc7e4208b50ea8c415219bdc", "a36245544b1a4bfd83f4b39fe6dd57a3", "29bc49c6f76043d1bbb254a920a9e5af", "c394048de6d242e4881bcd58e02a5c70", "da6dae85b8e84c51a25b4b4fd420987f", "a6eaaf733d2e4d1f9d687b838cb4c120", "67b90402c14d44838a93eff8a8c9d878", "4f8d5cd1df1c4f23997ab4beae49d3e8", "323b150a82364fa881350a069f927d25", "22897a70344a4b6caf6432b19ae045b2", "ec7c38e1753f430fb9ecc85c0166a004", "004fc8fe86864ac7859750f0975094ac", "7487cd90a3d7411ca199f91588aed6b7" ] } }, "execution_count": 2, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "Downloading (…)lve/main/config.json: 0%| | 0.00/892 [00:00