File size: 98,123 Bytes
e49f5ad
1
{"cells":[{"cell_type":"markdown","source":["## Preliminary operations"],"metadata":{"id":"viixGIJcKPSQ"},"id":"viixGIJcKPSQ"},{"cell_type":"code","source":["from google.colab import drive\n","drive.mount('/content/drive')"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"MevE4jEZ5QBT","executionInfo":{"status":"ok","timestamp":1652189481823,"user_tz":-120,"elapsed":25189,"user":{"displayName":"Stefano Fiorucci","userId":"12409279692445770059"}},"outputId":"d4b2a927-e000-442b-ebc6-0d40d8a165d6"},"id":"MevE4jEZ5QBT","execution_count":1,"outputs":[{"output_type":"stream","name":"stdout","text":["Mounted at /content/drive\n"]}]},{"cell_type":"code","source":["# install dependencies\n","! pip install farm-haystack[faiss-gpu]"],"metadata":{"id":"VYWRJ-Lf55nV","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1652189651623,"user_tz":-120,"elapsed":161669,"user":{"displayName":"Stefano Fiorucci","userId":"12409279692445770059"}},"outputId":"5c860ef6-d4cb-4293-d704-51454a3f88bf"},"id":"VYWRJ-Lf55nV","execution_count":2,"outputs":[{"output_type":"stream","name":"stdout","text":["Collecting farm-haystack[faiss-gpu]\n","  Downloading farm_haystack-1.4.0-py3-none-any.whl (524 kB)\n","\u001b[K     |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 524 kB 6.8 MB/s \n","\u001b[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (from farm-haystack[faiss-gpu]) (1.3.5)\n","Requirement already satisfied: networkx in /usr/local/lib/python3.7/dist-packages (from farm-haystack[faiss-gpu]) (2.6.3)\n","Collecting elastic-apm\n","  Downloading elastic_apm-6.9.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (374 kB)\n","\u001b[K     |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 374 kB 44.4 MB/s \n","\u001b[?25hCollecting rapidfuzz\n","  Downloading rapidfuzz-2.0.11-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.8 MB)\n","\u001b[K     |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1.8 MB 45.7 MB/s \n","\u001b[?25hCollecting mmh3\n","  Downloading mmh3-3.0.0-cp37-cp37m-manylinux2010_x86_64.whl (50 kB)\n","\u001b[K     |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 50 kB 6.7 MB/s \n","\u001b[?25hRequirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from farm-haystack[faiss-gpu]) (4.11.3)\n","Collecting mlflow\n","  Downloading mlflow-1.25.1-py3-none-any.whl (16.8 MB)\n","\u001b[K     |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 16.8 MB 720 kB/s \n","\u001b[?25hCollecting seqeval\n","  Downloading seqeval-1.2.2.tar.gz (43 kB)\n","\u001b[K     |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 43 kB 2.0 MB/s \n","\u001b[?25hRequirement already satisfied: scikit-learn>=1.0.0 in /usr/local/lib/python3.7/dist-packages (from farm-haystack[faiss-gpu]) (1.0.2)\n","Collecting langdetect\n","  Downloading langdetect-1.0.9.tar.gz (981 kB)\n","\u001b[K     |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 981 kB 17.9 MB/s \n","\u001b[?25hCollecting tika\n","  Downloading tika-1.24.tar.gz (28 kB)\n","Requirement already satisfied: dill in /usr/local/lib/python3.7/dist-packages (from farm-haystack[faiss-gpu]) (0.3.4)\n","Requirement already satisfied: nltk in /usr/local/lib/python3.7/dist-packages (from farm-haystack[faiss-gpu]) (3.2.5)\n","Collecting pydantic\n","  Downloading pydantic-1.9.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (10.9 MB)\n","\u001b[K     |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 10.9 MB 13.2 MB/s \n","\u001b[?25hRequirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from farm-haystack[faiss-gpu]) (2.23.0)\n","Requirement already satisfied: jsonschema in /usr/local/lib/python3.7/dist-packages (from farm-haystack[faiss-gpu]) (4.3.3)\n","Collecting elasticsearch<=7.10,>=7.7\n","  Downloading elasticsearch-7.10.0-py2.py3-none-any.whl (321 kB)\n","\u001b[K     |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 321 kB 52.3 MB/s \n","\u001b[?25hCollecting azure-core<1.23\n","  Downloading azure_core-1.22.1-py3-none-any.whl (178 kB)\n","\u001b[K     |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 178 kB 52.2 MB/s \n","\u001b[?25hCollecting python-docx\n","  Downloading python-docx-0.8.11.tar.gz (5.6 MB)\n","\u001b[K     |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 5.6 MB 20.4 MB/s \n","\u001b[?25hRequirement already satisfied: scipy>=1.3.2 in /usr/local/lib/python3.7/dist-packages (from farm-haystack[faiss-gpu]) (1.4.1)\n","Collecting sentence-transformers>=2.2.0\n","  Downloading sentence-transformers-2.2.0.tar.gz (79 kB)\n","\u001b[K     |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 79 kB 8.3 MB/s \n","\u001b[?25hRequirement already satisfied: more-itertools in /usr/local/lib/python3.7/dist-packages (from farm-haystack[faiss-gpu]) (8.12.0)\n","Collecting torch<1.11,>1.9\n","  Downloading torch-1.10.2-cp37-cp37m-manylinux1_x86_64.whl (881.9 MB)\n","\u001b[K     |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 834.1 MB 1.3 MB/s eta 0:00:37tcmalloc: large alloc 1147494400 bytes == 0x399b0000 @  0x7fe1e1193615 0x592b76 0x4df71e 0x59afff 0x515655 0x549576 0x593fce 0x548ae9 0x51566f 0x549576 0x593fce 0x548ae9 0x5127f1 0x598e3b 0x511f68 0x598e3b 0x511f68 0x598e3b 0x511f68 0x4bc98a 0x532e76 0x594b72 0x515600 0x549576 0x593fce 0x548ae9 0x5127f1 0x549576 0x593fce 0x5118f8 0x593dd7\n","\u001b[K     |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 881.9 MB 2.0 kB/s \n","\u001b[?25hCollecting transformers==4.13.0\n","  Downloading transformers-4.13.0-py3-none-any.whl (3.3 MB)\n","\u001b[K     |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 3.3 MB 38.0 MB/s \n","\u001b[?25hRequirement already satisfied: tqdm in /usr/local/lib/python3.7/dist-packages (from farm-haystack[faiss-gpu]) (4.64.0)\n","Collecting quantulum3\n","  Downloading quantulum3-0.7.10-py3-none-any.whl (10.7 MB)\n","\u001b[K     |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 10.7 MB 34.5 MB/s \n","\u001b[?25hCollecting azure-ai-formrecognizer==3.2.0b2\n","  Downloading azure_ai_formrecognizer-3.2.0b2-py2.py3-none-any.whl (219 kB)\n","\u001b[K     |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 219 kB 35.3 MB/s \n","\u001b[?25hCollecting posthog\n","  Downloading posthog-1.4.7-py2.py3-none-any.whl (22 kB)\n","Requirement already satisfied: six>=1.11.0 in /usr/local/lib/python3.7/dist-packages (from azure-ai-formrecognizer==3.2.0b2->farm-haystack[faiss-gpu]) (1.15.0)\n","Collecting azure-common~=1.1\n","  Downloading azure_common-1.1.28-py2.py3-none-any.whl (14 kB)\n","Collecting msrest>=0.6.21\n","  Downloading msrest-0.6.21-py2.py3-none-any.whl (85 kB)\n","\u001b[K     |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 85 kB 4.2 MB/s \n","\u001b[?25hCollecting tokenizers<0.11,>=0.10.1\n","  Downloading tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)\n","\u001b[K     |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 3.3 MB 33.6 MB/s \n","\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from transformers==4.13.0->farm-haystack[faiss-gpu]) (1.21.6)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers==4.13.0->farm-haystack[faiss-gpu]) (3.6.0)\n","Collecting huggingface-hub<1.0,>=0.1.0\n","  Downloading huggingface_hub-0.5.1-py3-none-any.whl (77 kB)\n","\u001b[K     |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 77 kB 6.6 MB/s \n","\u001b[?25hCollecting pyyaml>=5.1\n","  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)\n","\u001b[K     |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 596 kB 51.0 MB/s \n","\u001b[?25hRequirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers==4.13.0->farm-haystack[faiss-gpu]) (2019.12.20)\n","Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.7/dist-packages (from transformers==4.13.0->farm-haystack[faiss-gpu]) (21.3)\n","Collecting sacremoses\n","  Downloading sacremoses-0.0.53.tar.gz (880 kB)\n","\u001b[K     |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 880 kB 43.3 MB/s \n","\u001b[?25hRequirement already satisfied: certifi in /usr/local/lib/python3.7/dist-packages (from elasticsearch<=7.10,>=7.7->farm-haystack[faiss-gpu]) (2021.10.8)\n","Requirement already satisfied: urllib3<2,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from elasticsearch<=7.10,>=7.7->farm-haystack[faiss-gpu]) (1.24.3)\n","Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0,>=0.1.0->transformers==4.13.0->farm-haystack[faiss-gpu]) (4.2.0)\n","Collecting isodate>=0.6.0\n","  Downloading isodate-0.6.1-py2.py3-none-any.whl (41 kB)\n","\u001b[K     |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 41 kB 623 kB/s \n","\u001b[?25hRequirement already satisfied: requests-oauthlib>=0.5.0 in /usr/local/lib/python3.7/dist-packages (from msrest>=0.6.21->azure-ai-formrecognizer==3.2.0b2->farm-haystack[faiss-gpu]) (1.3.1)\n","Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=20.0->transformers==4.13.0->farm-haystack[faiss-gpu]) (3.0.8)\n","Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->farm-haystack[faiss-gpu]) (3.0.4)\n","Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->farm-haystack[faiss-gpu]) (2.10)\n","Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.7/dist-packages (from requests-oauthlib>=0.5.0->msrest>=0.6.21->azure-ai-formrecognizer==3.2.0b2->farm-haystack[faiss-gpu]) (3.2.0)\n","Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.7/dist-packages (from scikit-learn>=1.0.0->farm-haystack[faiss-gpu]) (1.1.0)\n","Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from scikit-learn>=1.0.0->farm-haystack[faiss-gpu]) (3.1.0)\n","Requirement already satisfied: torchvision in /usr/local/lib/python3.7/dist-packages (from sentence-transformers>=2.2.0->farm-haystack[faiss-gpu]) (0.12.0+cu113)\n","Collecting sentencepiece\n","  Downloading sentencepiece-0.1.96-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n","\u001b[K     |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1.2 MB 31.5 MB/s \n","\u001b[?25hCollecting sqlalchemy-utils\n","  Downloading SQLAlchemy_Utils-0.38.2-py3-none-any.whl (100 kB)\n","\u001b[K     |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 100 kB 10.0 MB/s \n","\u001b[?25hRequirement already satisfied: sqlalchemy<2,>=1.4.2 in /usr/local/lib/python3.7/dist-packages (from farm-haystack[faiss-gpu]) (1.4.36)\n","Collecting psycopg2-binary\n","  Downloading psycopg2_binary-2.9.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.0 MB)\n","\u001b[K     |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 3.0 MB 37.1 MB/s \n","\u001b[?25hCollecting faiss-gpu<2,>=1.6.3\n","  Downloading faiss_gpu-1.7.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (85.5 MB)\n","\u001b[K     |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 85.5 MB 92 kB/s \n","\u001b[?25hRequirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.7/dist-packages (from sqlalchemy<2,>=1.4.2->farm-haystack[faiss-gpu]) (1.1.2)\n","Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->farm-haystack[faiss-gpu]) (3.8.0)\n","Requirement already satisfied: pyrsistent!=0.17.0,!=0.17.1,!=0.17.2,>=0.14.0 in /usr/local/lib/python3.7/dist-packages (from jsonschema->farm-haystack[faiss-gpu]) (0.18.1)\n","Requirement already satisfied: importlib-resources>=1.4.0 in /usr/local/lib/python3.7/dist-packages (from jsonschema->farm-haystack[faiss-gpu]) (5.7.1)\n","Requirement already satisfied: attrs>=17.4.0 in /usr/local/lib/python3.7/dist-packages (from jsonschema->farm-haystack[faiss-gpu]) (21.4.0)\n","Collecting gunicorn\n","  Downloading gunicorn-20.1.0-py3-none-any.whl (79 kB)\n","\u001b[K     |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 79 kB 8.4 MB/s \n","\u001b[?25hRequirement already satisfied: entrypoints in /usr/local/lib/python3.7/dist-packages (from mlflow->farm-haystack[faiss-gpu]) (0.4)\n","Requirement already satisfied: sqlparse>=0.3.1 in /usr/local/lib/python3.7/dist-packages (from mlflow->farm-haystack[faiss-gpu]) (0.4.2)\n","Collecting alembic\n","  Downloading alembic-1.7.7-py3-none-any.whl (210 kB)\n","\u001b[K     |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 210 kB 46.3 MB/s \n","\u001b[?25hCollecting gitpython>=2.1.0\n","  Downloading GitPython-3.1.27-py3-none-any.whl (181 kB)\n","\u001b[K     |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 181 kB 48.8 MB/s \n","\u001b[?25hRequirement already satisfied: pytz in /usr/local/lib/python3.7/dist-packages (from mlflow->farm-haystack[faiss-gpu]) (2022.1)\n","Collecting querystring-parser\n","  Downloading querystring_parser-1.2.4-py2.py3-none-any.whl (7.9 kB)\n","Collecting prometheus-flask-exporter\n","  Downloading prometheus_flask_exporter-0.20.1-py3-none-any.whl (18 kB)\n","Collecting databricks-cli>=0.8.7\n","  Downloading databricks-cli-0.16.6.tar.gz (62 kB)\n","\u001b[K     |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 62 kB 622 kB/s \n","\u001b[?25hCollecting docker>=4.0.0\n","  Downloading docker-5.0.3-py2.py3-none-any.whl (146 kB)\n","\u001b[K     |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 146 kB 52.8 MB/s \n","\u001b[?25hRequirement already satisfied: cloudpickle in /usr/local/lib/python3.7/dist-packages (from mlflow->farm-haystack[faiss-gpu]) (1.3.0)\n","Requirement already satisfied: click>=7.0 in /usr/local/lib/python3.7/dist-packages (from mlflow->farm-haystack[faiss-gpu]) (7.1.2)\n","Requirement already satisfied: Flask in /usr/local/lib/python3.7/dist-packages (from mlflow->farm-haystack[faiss-gpu]) (1.1.4)\n","Requirement already satisfied: protobuf>=3.7.0 in /usr/local/lib/python3.7/dist-packages (from mlflow->farm-haystack[faiss-gpu]) (3.17.3)\n","Collecting pyjwt>=1.7.0\n","  Downloading PyJWT-2.3.0-py3-none-any.whl (16 kB)\n","Requirement already satisfied: tabulate>=0.7.7 in /usr/local/lib/python3.7/dist-packages (from databricks-cli>=0.8.7->mlflow->farm-haystack[faiss-gpu]) (0.8.9)\n","Collecting websocket-client>=0.32.0\n","  Downloading websocket_client-1.3.2-py3-none-any.whl (54 kB)\n","\u001b[K     |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 54 kB 2.9 MB/s \n","\u001b[?25hCollecting gitdb<5,>=4.0.1\n","  Downloading gitdb-4.0.9-py3-none-any.whl (63 kB)\n","\u001b[K     |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 63 kB 1.9 MB/s \n","\u001b[?25hCollecting smmap<6,>=3.0.1\n","  Downloading smmap-5.0.0-py3-none-any.whl (24 kB)\n","Collecting Mako\n","  Downloading Mako-1.2.0-py3-none-any.whl (78 kB)\n","\u001b[K     |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 78 kB 7.3 MB/s \n","\u001b[?25hRequirement already satisfied: itsdangerous<2.0,>=0.24 in /usr/local/lib/python3.7/dist-packages (from Flask->mlflow->farm-haystack[faiss-gpu]) (1.1.0)\n","Requirement already satisfied: Werkzeug<2.0,>=0.15 in /usr/local/lib/python3.7/dist-packages (from Flask->mlflow->farm-haystack[faiss-gpu]) (1.0.1)\n","Requirement already satisfied: Jinja2<3.0,>=2.10.1 in /usr/local/lib/python3.7/dist-packages (from Flask->mlflow->farm-haystack[faiss-gpu]) (2.11.3)\n","Requirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.7/dist-packages (from Jinja2<3.0,>=2.10.1->Flask->mlflow->farm-haystack[faiss-gpu]) (2.0.1)\n","Requirement already satisfied: setuptools>=3.0 in /usr/local/lib/python3.7/dist-packages (from gunicorn->mlflow->farm-haystack[faiss-gpu]) (57.4.0)\n","Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas->farm-haystack[faiss-gpu]) (2.8.2)\n","Collecting backoff<2.0.0,>=1.10.0\n","  Downloading backoff-1.11.1-py2.py3-none-any.whl (13 kB)\n","Collecting monotonic>=1.5\n","  Downloading monotonic-1.6-py2.py3-none-any.whl (8.2 kB)\n","Requirement already satisfied: prometheus-client in /usr/local/lib/python3.7/dist-packages (from prometheus-flask-exporter->mlflow->farm-haystack[faiss-gpu]) (0.14.1)\n","Requirement already satisfied: lxml>=2.3.2 in /usr/local/lib/python3.7/dist-packages (from python-docx->farm-haystack[faiss-gpu]) (4.2.6)\n","Collecting num2words\n","  Downloading num2words-0.5.10-py3-none-any.whl (101 kB)\n","\u001b[K     |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 101 kB 11.4 MB/s \n","\u001b[?25hRequirement already satisfied: inflect in /usr/local/lib/python3.7/dist-packages (from quantulum3->farm-haystack[faiss-gpu]) (2.1.0)\n","Requirement already satisfied: docopt>=0.6.2 in /usr/local/lib/python3.7/dist-packages (from num2words->quantulum3->farm-haystack[faiss-gpu]) (0.6.2)\n","Collecting jarowinkler<1.1.0,>=1.0.2\n","  Downloading jarowinkler-1.0.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (103 kB)\n","\u001b[K     |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 103 kB 52.7 MB/s \n","\u001b[?25hCollecting torchvision\n","  Downloading torchvision-0.12.0-cp37-cp37m-manylinux1_x86_64.whl (21.0 MB)\n","\u001b[K     |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 21.0 MB 5.2 MB/s \n","\u001b[?25h  Downloading torchvision-0.11.3-cp37-cp37m-manylinux1_x86_64.whl (23.2 MB)\n","\u001b[K     |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 23.2 MB 1.4 MB/s \n","\u001b[?25hRequirement already satisfied: pillow!=8.3.0,>=5.3.0 in /usr/local/lib/python3.7/dist-packages (from torchvision->sentence-transformers>=2.2.0->farm-haystack[faiss-gpu]) (7.1.2)\n","Building wheels for collected packages: sentence-transformers, langdetect, databricks-cli, python-docx, sacremoses, seqeval, tika\n","  Building wheel for sentence-transformers (setup.py) ... \u001b[?25l\u001b[?25hdone\n","  Created wheel for sentence-transformers: filename=sentence_transformers-2.2.0-py3-none-any.whl size=120747 sha256=78ee0812cc2d1d74eb33df92f06ab47670672b543b3620e0caeec1881ae3ead0\n","  Stored in directory: /root/.cache/pip/wheels/83/c0/df/b6873ab7aac3f2465aa9144b6b4c41c4391cfecc027c8b07e7\n","  Building wheel for langdetect (setup.py) ... \u001b[?25l\u001b[?25hdone\n","  Created wheel for langdetect: filename=langdetect-1.0.9-py3-none-any.whl size=993242 sha256=bef3968e1cceab5b68dd1025df3703a389b0206a219a69768c643a09b011013c\n","  Stored in directory: /root/.cache/pip/wheels/c5/96/8a/f90c59ed25d75e50a8c10a1b1c2d4c402e4dacfa87f3aff36a\n","  Building wheel for databricks-cli (setup.py) ... \u001b[?25l\u001b[?25hdone\n","  Created wheel for databricks-cli: filename=databricks_cli-0.16.6-py3-none-any.whl size=112631 sha256=72e291d5e52fd87e572aec37b8f5f38bc9848e8183a5dea6ac8c0d91abc1f46e\n","  Stored in directory: /root/.cache/pip/wheels/96/c1/f8/d75a22e789ab6a4dff11f18338c3af4360189aa371295cc934\n","  Building wheel for python-docx (setup.py) ... \u001b[?25l\u001b[?25hdone\n","  Created wheel for python-docx: filename=python_docx-0.8.11-py3-none-any.whl size=184507 sha256=63327ac216c3fcdbdaf12c2b06f89b607d9512eb699bf77beb105bf9c8d4df67\n","  Stored in directory: /root/.cache/pip/wheels/f6/6f/b9/d798122a8b55b74ad30b5f52b01482169b445fbb84a11797a6\n","  Building wheel for sacremoses (setup.py) ... \u001b[?25l\u001b[?25hdone\n","  Created wheel for sacremoses: filename=sacremoses-0.0.53-py3-none-any.whl size=895260 sha256=47d211902d48fbf93e6c1e701400ce9a0d097557d10ba112c6412abda8f93abe\n","  Stored in directory: /root/.cache/pip/wheels/87/39/dd/a83eeef36d0bf98e7a4d1933a4ad2d660295a40613079bafc9\n","  Building wheel for seqeval (setup.py) ... \u001b[?25l\u001b[?25hdone\n","  Created wheel for seqeval: filename=seqeval-1.2.2-py3-none-any.whl size=16180 sha256=c325eef1ce6ac89a8430038af5d6f149e2e92eaf9a9ad880c7d74f027c738460\n","  Stored in directory: /root/.cache/pip/wheels/05/96/ee/7cac4e74f3b19e3158dce26a20a1c86b3533c43ec72a549fd7\n","  Building wheel for tika (setup.py) ... \u001b[?25l\u001b[?25hdone\n","  Created wheel for tika: filename=tika-1.24-py3-none-any.whl size=32893 sha256=61bb8b103bb3d215af2339c27ad44793b3a7a92182d9658c2f677a9691a583ee\n","  Stored in directory: /root/.cache/pip/wheels/ec/2b/38/58ff05467a742e32f67f5d0de048fa046e764e2fbb25ac93f3\n","Successfully built sentence-transformers langdetect databricks-cli python-docx sacremoses seqeval tika\n","Installing collected packages: smmap, pyyaml, websocket-client, torch, tokenizers, sacremoses, pyjwt, Mako, isodate, huggingface-hub, gitdb, transformers, torchvision, sentencepiece, querystring-parser, prometheus-flask-exporter, num2words, msrest, monotonic, jarowinkler, gunicorn, gitpython, docker, databricks-cli, backoff, azure-core, azure-common, alembic, tika, seqeval, sentence-transformers, rapidfuzz, quantulum3, python-docx, pydantic, posthog, mmh3, mlflow, langdetect, elasticsearch, elastic-apm, azure-ai-formrecognizer, sqlalchemy-utils, psycopg2-binary, farm-haystack, faiss-gpu\n","  Attempting uninstall: pyyaml\n","    Found existing installation: PyYAML 3.13\n","    Uninstalling PyYAML-3.13:\n","      Successfully uninstalled PyYAML-3.13\n","  Attempting uninstall: torch\n","    Found existing installation: torch 1.11.0+cu113\n","    Uninstalling torch-1.11.0+cu113:\n","      Successfully uninstalled torch-1.11.0+cu113\n","  Attempting uninstall: torchvision\n","    Found existing installation: torchvision 0.12.0+cu113\n","    Uninstalling torchvision-0.12.0+cu113:\n","      Successfully uninstalled torchvision-0.12.0+cu113\n","\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n","torchtext 0.12.0 requires torch==1.11.0, but you have torch 1.10.2 which is incompatible.\n","torchaudio 0.11.0+cu113 requires torch==1.11.0, but you have torch 1.10.2 which is incompatible.\u001b[0m\n","Successfully installed Mako-1.2.0 alembic-1.7.7 azure-ai-formrecognizer-3.2.0b2 azure-common-1.1.28 azure-core-1.22.1 backoff-1.11.1 databricks-cli-0.16.6 docker-5.0.3 elastic-apm-6.9.1 elasticsearch-7.10.0 faiss-gpu-1.7.2 farm-haystack-1.4.0 gitdb-4.0.9 gitpython-3.1.27 gunicorn-20.1.0 huggingface-hub-0.5.1 isodate-0.6.1 jarowinkler-1.0.2 langdetect-1.0.9 mlflow-1.25.1 mmh3-3.0.0 monotonic-1.6 msrest-0.6.21 num2words-0.5.10 posthog-1.4.7 prometheus-flask-exporter-0.20.1 psycopg2-binary-2.9.3 pydantic-1.9.0 pyjwt-2.3.0 python-docx-0.8.11 pyyaml-6.0 quantulum3-0.7.10 querystring-parser-1.2.4 rapidfuzz-2.0.11 sacremoses-0.0.53 sentence-transformers-2.2.0 sentencepiece-0.1.96 seqeval-1.2.2 smmap-5.0.0 sqlalchemy-utils-0.38.2 tika-1.24 tokenizers-0.10.3 torch-1.10.2 torchvision-0.11.3 transformers-4.13.0 websocket-client-1.3.2\n"]}]},{"cell_type":"markdown","source":["## Load data"],"metadata":{"id":"QVDuHAMIK4bg"},"id":"QVDuHAMIK4bg"},{"cell_type":"code","execution_count":3,"id":"72139774","metadata":{"execution":{"iopub.execute_input":"2022-01-09T08:40:46.176031Z","iopub.status.busy":"2022-01-09T08:40:46.175755Z","iopub.status.idle":"2022-01-09T08:40:46.179554Z","shell.execute_reply":"2022-01-09T08:40:46.178704Z","shell.execute_reply.started":"2022-01-09T08:40:46.175959Z"},"id":"72139774","executionInfo":{"status":"ok","timestamp":1652189651625,"user_tz":-120,"elapsed":32,"user":{"displayName":"Stefano Fiorucci","userId":"12409279692445770059"}}},"outputs":[],"source":["import glob\n","import json"]},{"cell_type":"code","execution_count":4,"id":"4421e328","metadata":{"execution":{"iopub.execute_input":"2022-01-09T08:40:47.846999Z","iopub.status.busy":"2022-01-09T08:40:47.846757Z","iopub.status.idle":"2022-01-09T08:40:48.327632Z","shell.execute_reply":"2022-01-09T08:40:48.326829Z","shell.execute_reply.started":"2022-01-09T08:40:47.846975Z"},"id":"4421e328","executionInfo":{"status":"ok","timestamp":1652189675961,"user_tz":-120,"elapsed":24363,"user":{"displayName":"Stefano Fiorucci","userId":"12409279692445770059"}}},"outputs":[],"source":["DATA_DIRECTORY = '/content/drive/MyDrive/Colab Notebooks/wklp/data'\n","\n","docs=[]\n","\n","for json_file in glob.glob(f'{DATA_DIRECTORY}/*.json'):\n","    with open(json_file, 'r') as fin:\n","        json_content=json.load(fin)\n","        \n","    doc={'content': json_content['text'],\n","        'meta': {'name': json_content['name'],\n","                 'url': json_content['url']}}\n","    docs.append(doc)"]},{"cell_type":"code","source":["len(docs)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"GR6qWQAn72WG","executionInfo":{"status":"ok","timestamp":1652189679928,"user_tz":-120,"elapsed":9,"user":{"displayName":"Stefano Fiorucci","userId":"12409279692445770059"}},"outputId":"3e17336f-1145-43ff-c3ca-fab7604343d1"},"id":"GR6qWQAn72WG","execution_count":5,"outputs":[{"output_type":"execute_result","data":{"text/plain":["1087"]},"metadata":{},"execution_count":5}]},{"cell_type":"code","execution_count":6,"id":"aa231b94","metadata":{"execution":{"iopub.execute_input":"2022-01-09T08:40:48.796741Z","iopub.status.busy":"2022-01-09T08:40:48.796550Z","iopub.status.idle":"2022-01-09T08:40:48.805224Z","shell.execute_reply":"2022-01-09T08:40:48.804705Z","shell.execute_reply.started":"2022-01-09T08:40:48.796722Z"},"colab":{"base_uri":"https://localhost:8080/"},"id":"aa231b94","executionInfo":{"status":"ok","timestamp":1652189681394,"user_tz":-120,"elapsed":10,"user":{"displayName":"Stefano Fiorucci","userId":"12409279692445770059"}},"outputId":"a42147fb-b9a4-4500-cc96-ce73177030f9"},"outputs":[{"output_type":"execute_result","data":{"text/plain":["{'content': \"Pete Lindstrom\\nPete Lindstrom was a citizen of Twin Peaks, Washington who was killed in the Blizzard of 1889.\\nHis death was witnessed by Knut Zimmerman, who reported that wind had plunged a candle from the Annual Candlelighting and Christmas Tree Ceremony into the back of Lindstrom's head, killing him.\",\n"," 'meta': {'name': 'Pete_Lindstrom',\n","  'url': 'https://twinpeaks.fandom.com/wiki/Pete_Lindstrom'}}"]},"metadata":{},"execution_count":6}],"source":["docs[5]"]},{"cell_type":"markdown","source":["## Define document store ([FAISS](https://github.com/facebookresearch/faiss)) and write documents\n","\n"],"metadata":{"id":"Yu3bAUPoLrPI"},"id":"Yu3bAUPoLrPI"},{"cell_type":"code","execution_count":8,"id":"bfe846df","metadata":{"execution":{"iopub.execute_input":"2022-01-09T08:40:59.678181Z","iopub.status.busy":"2022-01-09T08:40:59.678003Z","iopub.status.idle":"2022-01-09T08:40:59.753228Z","shell.execute_reply":"2022-01-09T08:40:59.752500Z","shell.execute_reply.started":"2022-01-09T08:40:59.678161Z"},"id":"bfe846df","executionInfo":{"status":"ok","timestamp":1652190218453,"user_tz":-120,"elapsed":10410,"user":{"displayName":"Stefano Fiorucci","userId":"12409279692445770059"}},"colab":{"base_uri":"https://localhost:8080/"},"outputId":"187c2d40-470a-4f87-ab50-ec4082bccb33"},"outputs":[{"output_type":"stream","name":"stderr","text":["INFO - haystack.modeling.model.optimization -  apex not found, won't use it. See https://nvidia.github.io/apex/\n","ERROR - root -  Failed to import 'magic' (from 'python-magic' and 'python-magic-bin' on Windows). FileTypeClassifier will not perform mimetype detection on extensionless files. Please make sure the necessary OS libraries are installed if you need this functionality.\n","INFO - haystack.telemetry -  Haystack sends anonymous usage data to understand the actual usage and steer dev efforts towards features that are most meaningful to users. You can opt-out at anytime by calling disable_telemetry() or by manually setting the environment variable HAYSTACK_TELEMETRY_ENABLED as described for different operating systems on the documentation page. More information at https://haystack.deepset.ai/guides/telemetry\n"]}],"source":["from haystack.document_stores import FAISSDocumentStore\n","\n","# the document store settings are those compatible with Embedding Retriever\n","document_store = FAISSDocumentStore(\n","    similarity=\"dot_product\",\n","    embedding_dim=768)"]},{"cell_type":"code","execution_count":9,"id":"bc5adb1c","metadata":{"execution":{"iopub.execute_input":"2022-01-09T08:41:04.538529Z","iopub.status.busy":"2022-01-09T08:41:04.538227Z","iopub.status.idle":"2022-01-09T08:41:05.147190Z","shell.execute_reply":"2022-01-09T08:41:05.146513Z","shell.execute_reply.started":"2022-01-09T08:41:04.538503Z"},"colab":{"base_uri":"https://localhost:8080/"},"id":"bc5adb1c","executionInfo":{"status":"ok","timestamp":1652190317389,"user_tz":-120,"elapsed":2085,"user":{"displayName":"Stefano Fiorucci","userId":"12409279692445770059"}},"outputId":"4cc11a2d-5ce5-41c1-e5eb-a0ee411ab00b"},"outputs":[{"output_type":"stream","name":"stdout","text":["[nltk_data] Downloading package punkt to /root/nltk_data...\n","[nltk_data]   Unzipping tokenizers/punkt.zip.\n"]},{"output_type":"stream","name":"stderr","text":["  0%|          | 0/1087 [00:00<?, ?docs/s]WARNING - haystack.nodes.preprocessor.preprocessor -  One or more sentence found with word count higher than the split length.\n","100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1087/1087 [00:01<00:00, 980.44docs/s]\n"]}],"source":["# preprocess documents, splitting by chunks of 200 words\n","\n","from haystack.nodes import PreProcessor\n","\n","processor = PreProcessor(\n","    clean_empty_lines=True,\n","    clean_whitespace=True,\n","    clean_header_footer=True,\n","    split_by=\"word\",\n","    split_length=200,\n","   split_respect_sentence_boundary=True,\n","    split_overlap=0,\n","    language ='en'\n",")\n","preprocessed_docs = processor.process(docs)"]},{"cell_type":"code","execution_count":11,"id":"41986306","metadata":{"execution":{"iopub.execute_input":"2022-01-09T08:41:07.414905Z","iopub.status.busy":"2022-01-09T08:41:07.414681Z","iopub.status.idle":"2022-01-09T08:41:07.418856Z","shell.execute_reply":"2022-01-09T08:41:07.418094Z","shell.execute_reply.started":"2022-01-09T08:41:07.414884Z"},"colab":{"base_uri":"https://localhost:8080/"},"id":"41986306","executionInfo":{"status":"ok","timestamp":1652190335845,"user_tz":-120,"elapsed":370,"user":{"displayName":"Stefano Fiorucci","userId":"12409279692445770059"}},"outputId":"04292f05-fd20-432f-febc-8b91ce3d47c4"},"outputs":[{"output_type":"stream","name":"stdout","text":["<Document: id=3f6b71a59e1226326e53871d05393810, content='Pete Lindstrom\n","Pete Lindstrom was a citizen of Twin Peaks, Washington who was killed in the Blizzard ...'>\n"]}],"source":["print(preprocessed_docs[5])\n"]},{"cell_type":"code","source":["len(preprocessed_docs)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"b9PS0PkM_1EF","executionInfo":{"status":"ok","timestamp":1652190343399,"user_tz":-120,"elapsed":370,"user":{"displayName":"Stefano Fiorucci","userId":"12409279692445770059"}},"outputId":"25fba54f-46d9-4c53-b0c1-15e8a878cad0"},"id":"b9PS0PkM_1EF","execution_count":12,"outputs":[{"output_type":"execute_result","data":{"text/plain":["2825"]},"metadata":{},"execution_count":12}]},{"cell_type":"code","execution_count":81,"id":"191144b4","metadata":{"execution":{"iopub.execute_input":"2022-01-09T08:41:10.695292Z","iopub.status.busy":"2022-01-09T08:41:10.695064Z","iopub.status.idle":"2022-01-09T08:41:22.144864Z","shell.execute_reply":"2022-01-09T08:41:22.144203Z","shell.execute_reply.started":"2022-01-09T08:41:10.695271Z"},"colab":{"base_uri":"https://localhost:8080/","height":49,"referenced_widgets":["425730d860514e2d87c0870cbb943842","06c58f8fc29343fa96e36d5b1f8dd078","046fa73af99645cc88b49c0f3e5f96b7","e256a26a0f41436a9755c56f3ffebd11","1e2bf8bf2ab14c9e880c06b04f752a1b","1377c76f1051467fb391c2c0119b0634","4d4babe9fcb24dd7996ecbeb7006018f","ff4bc8be1b8041e6a116bc37e366bf96","e004a6c61f2d4e1d8e9d02c51dcc6ebd","88c675dce7bd4247842ffeb6470d31dd","1d447ec86fe84008b29495ecb78a7fac"]},"id":"191144b4","executionInfo":{"status":"ok","timestamp":1652179167100,"user_tz":-120,"elapsed":11491,"user":{"displayName":"Stefano Fiorucci","userId":"12409279692445770059"}},"outputId":"c30f2216-2c6c-4f28-867c-dfc0bd76bc09"},"outputs":[{"output_type":"display_data","data":{"text/plain":["Writing Documents:   0%|          | 0/2825 [00:00<?, ?it/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"425730d860514e2d87c0870cbb943842"}},"metadata":{}}],"source":["# write documents\n","document_store.write_documents(preprocessed_docs)\n"]},{"cell_type":"markdown","source":["## Define retriever (Embedding Retriever) and generate document embeddings\n"],"metadata":{"id":"O7Eo9F7yf1N_"},"id":"O7Eo9F7yf1N_"},{"cell_type":"code","execution_count":82,"id":"7993e609","metadata":{"execution":{"iopub.execute_input":"2022-01-09T08:41:22.146473Z","iopub.status.busy":"2022-01-09T08:41:22.146213Z","iopub.status.idle":"2022-01-09T08:41:30.833036Z","shell.execute_reply":"2022-01-09T08:41:30.832333Z","shell.execute_reply.started":"2022-01-09T08:41:22.146441Z"},"colab":{"base_uri":"https://localhost:8080/","height":188,"referenced_widgets":["ab5054496cae4e56b8f884db8cfa1cf7","61f277dcf14c4cc692c1cf6dd7c5a846","c7a72de53d104ff2b470ffe9a24b5a05","0f2a6092eb35478693982c6ba694eedf","abe2fe0c05634127bc61ddae4ecbefe9","617fefdfbf594f9d84b64528d58e391e","364f355213fd49e89373c5cc2bbbd646","99cd62ad76d740d197ca16db71359c9f","161afc4e516a4436a7edd60c8fe12dbf","d03003493ce243d38512f5a3990a80f7","ae739e7eca68419ca55f741ee17e325c","bb09ce6273944cd9be20a5d4730acfe5","ddd00b44cb994eaca361ee9d182854f5","8fc242cfcf074a0dbdd852a2d65d3c43","8f74df40a42443e1beda8e8f25d33c4d","902784ed90204018afb1050e58ab5785","e5586e38136f4bedb7f2c12e7d7993ee","02e6b8d39ac1478e8b831690d542937b","f8cd3a71bd724590bb22f01100931b30","04989dd1884b48c795cf59aa33686866","2e4df25efaa64b95acb29e7bce65e4c0","520ae85fb0804dafa7c6a56a81b80769"]},"id":"7993e609","executionInfo":{"status":"ok","timestamp":1652179262209,"user_tz":-120,"elapsed":95127,"user":{"displayName":"Stefano Fiorucci","userId":"12409279692445770059"}},"outputId":"f31a14d8-281c-46b0-f882-22f7eb98d338"},"outputs":[{"output_type":"stream","name":"stderr","text":["INFO - haystack.modeling.utils -  Using devices: CUDA:0\n","INFO - haystack.modeling.utils -  Number of GPUs: 1\n","INFO - haystack.nodes.retriever.dense -  Init retriever using embeddings of model sentence-transformers/multi-qa-mpnet-base-dot-v1\n","WARNING - haystack.nodes.retriever._embedding_encoder -  You are using a Sentence Transformer with the dot_product function. We recommend using cosine instead. This can be set when initializing the DocumentStore\n","INFO - haystack.document_stores.faiss -  Updating embeddings for 2811 docs...\n"]},{"output_type":"display_data","data":{"text/plain":["Updating Embedding:   0%|          | 0/2811 [00:00<?, ? docs/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"ab5054496cae4e56b8f884db8cfa1cf7"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["Batches:   0%|          | 0/88 [00:00<?, ?it/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"bb09ce6273944cd9be20a5d4730acfe5"}},"metadata":{}}],"source":["from haystack.nodes import EmbeddingRetriever\n","\n","retriever = EmbeddingRetriever(\n","    document_store=document_store,\n","   embedding_model=\"sentence-transformers/multi-qa-mpnet-base-dot-v1\",\n","   model_format=\"sentence_transformers\"\n",")\n","document_store.update_embeddings(retriever)"]},{"cell_type":"markdown","source":["## Save and export index\n"],"metadata":{"id":"9QhguDpYf_5u"},"id":"9QhguDpYf_5u"},{"cell_type":"code","source":["import shutil\n","import glob"],"metadata":{"id":"jLKDYZ1tnNZo","executionInfo":{"status":"ok","timestamp":1652180221595,"user_tz":-120,"elapsed":195,"user":{"displayName":"Stefano Fiorucci","userId":"12409279692445770059"}}},"id":"jLKDYZ1tnNZo","execution_count":102,"outputs":[]},{"cell_type":"code","source":["document_store.save(\"my_faiss_index.faiss\")"],"metadata":{"id":"7DVPCyzAhPEA","executionInfo":{"status":"ok","timestamp":1652180142621,"user_tz":-120,"elapsed":174,"user":{"displayName":"Stefano Fiorucci","userId":"12409279692445770059"}}},"id":"7DVPCyzAhPEA","execution_count":100,"outputs":[]},{"cell_type":"code","source":["OUT_DIR = '/content/drive/MyDrive/Colab Notebooks/wklp/'"],"metadata":{"id":"8fYMVd_ggJnw"},"id":"8fYMVd_ggJnw","execution_count":null,"outputs":[]},{"cell_type":"code","source":["for f in glob.glob('*faiss*.*')+glob.glob('faiss*.*'):\n","  print(f)\n","  shutil.copy(f, OUT_DIR)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"F1GnHXZ4nPJI","executionInfo":{"status":"ok","timestamp":1652180314288,"user_tz":-120,"elapsed":508,"user":{"displayName":"Stefano Fiorucci","userId":"12409279692445770059"}},"outputId":"31b385af-af6b-4ad2-dc1a-c48dd6956990"},"id":"F1GnHXZ4nPJI","execution_count":105,"outputs":[{"output_type":"stream","name":"stdout","text":["my_faiss_index.faiss\n","my_faiss_index.json\n","faiss_document_store.db\n","faiss_document_store.db\n"]}]},{"cell_type":"markdown","source":["## Define reader"],"metadata":{"id":"9x7Bo95fgTkm"},"id":"9x7Bo95fgTkm"},{"cell_type":"code","source":["from haystack.nodes import FARMReader\n"],"metadata":{"id":"9oJ3b3ukcT10","executionInfo":{"status":"ok","timestamp":1652177889794,"user_tz":-120,"elapsed":204,"user":{"displayName":"Stefano Fiorucci","userId":"12409279692445770059"}}},"id":"9oJ3b3ukcT10","execution_count":52,"outputs":[]},{"cell_type":"code","execution_count":88,"id":"f5299f38","metadata":{"execution":{"iopub.execute_input":"2022-01-09T08:42:49.473881Z","iopub.status.busy":"2022-01-09T08:42:49.473628Z","iopub.status.idle":"2022-01-09T08:42:58.250644Z","shell.execute_reply":"2022-01-09T08:42:58.249808Z","shell.execute_reply.started":"2022-01-09T08:42:49.473851Z"},"colab":{"base_uri":"https://localhost:8080/"},"id":"f5299f38","executionInfo":{"status":"ok","timestamp":1652179820100,"user_tz":-120,"elapsed":9316,"user":{"displayName":"Stefano Fiorucci","userId":"12409279692445770059"}},"outputId":"0209e180-0307-4f7f-cfad-27866307cf4a"},"outputs":[{"output_type":"stream","name":"stderr","text":["INFO - haystack.modeling.utils -  Using devices: CUDA:0\n","INFO - haystack.modeling.utils -  Number of GPUs: 1\n","INFO - haystack.modeling.model.language_model -  LOADING MODEL\n","INFO - haystack.modeling.model.language_model -  =============\n","INFO - haystack.modeling.model.language_model -  Could not find deepset/roberta-base-squad2-distilled locally.\n","INFO - haystack.modeling.model.language_model -  Looking on Transformers Model Hub (in local cache and online)...\n","INFO - haystack.modeling.model.language_model -  Loaded deepset/roberta-base-squad2-distilled\n","INFO - haystack.modeling.utils -  Using devices: CUDA\n","INFO - haystack.modeling.utils -  Number of GPUs: 1\n","INFO - haystack.modeling.infer -  Got ya 2 parallel workers to do inference ...\n","INFO - haystack.modeling.infer -   0     0  \n","INFO - haystack.modeling.infer -  /w\\   /w\\ \n","INFO - haystack.modeling.infer -  /'\\   / \\ \n"]}],"source":["reader = FARMReader(model_name_or_path=\"deepset/roberta-base-squad2-distilled\", use_gpu=True)"]},{"cell_type":"markdown","source":["## Define and try pipeline (retriever + reader)"],"metadata":{"id":"tRgVAepagXo1"},"id":"tRgVAepagXo1"},{"cell_type":"code","execution_count":89,"id":"a2226345","metadata":{"execution":{"iopub.execute_input":"2022-01-09T08:42:58.252390Z","iopub.status.busy":"2022-01-09T08:42:58.252188Z","iopub.status.idle":"2022-01-09T08:42:58.256054Z","shell.execute_reply":"2022-01-09T08:42:58.255544Z","shell.execute_reply.started":"2022-01-09T08:42:58.252363Z"},"id":"a2226345","executionInfo":{"status":"ok","timestamp":1652179820101,"user_tz":-120,"elapsed":9,"user":{"displayName":"Stefano Fiorucci","userId":"12409279692445770059"}}},"outputs":[],"source":["from haystack.pipelines import ExtractiveQAPipeline\n"]},{"cell_type":"code","execution_count":90,"id":"0fc15887","metadata":{"execution":{"iopub.execute_input":"2022-01-09T08:42:58.257027Z","iopub.status.busy":"2022-01-09T08:42:58.256867Z","iopub.status.idle":"2022-01-09T08:42:58.261446Z","shell.execute_reply":"2022-01-09T08:42:58.260756Z","shell.execute_reply.started":"2022-01-09T08:42:58.257009Z"},"id":"0fc15887","executionInfo":{"status":"ok","timestamp":1652179820102,"user_tz":-120,"elapsed":8,"user":{"displayName":"Stefano Fiorucci","userId":"12409279692445770059"}}},"outputs":[],"source":["pipe = ExtractiveQAPipeline(reader, retriever)\n"]},{"cell_type":"code","execution_count":91,"id":"24fd8084","metadata":{"execution":{"iopub.execute_input":"2022-01-09T08:42:58.262798Z","iopub.status.busy":"2022-01-09T08:42:58.262569Z","iopub.status.idle":"2022-01-09T08:42:58.267587Z","shell.execute_reply":"2022-01-09T08:42:58.267054Z","shell.execute_reply.started":"2022-01-09T08:42:58.262772Z"},"id":"24fd8084","executionInfo":{"status":"ok","timestamp":1652179820444,"user_tz":-120,"elapsed":349,"user":{"displayName":"Stefano Fiorucci","userId":"12409279692445770059"}}},"outputs":[],"source":["import time\n","from haystack.utils import print_answers"]},{"cell_type":"code","execution_count":99,"id":"e8bae423","metadata":{"execution":{"iopub.execute_input":"2022-01-09T08:56:33.759683Z","iopub.status.busy":"2022-01-09T08:56:33.759457Z","iopub.status.idle":"2022-01-09T08:56:34.894724Z","shell.execute_reply":"2022-01-09T08:56:34.894183Z","shell.execute_reply.started":"2022-01-09T08:56:33.759662Z"},"colab":{"base_uri":"https://localhost:8080/","height":920,"referenced_widgets":["1158604d42434203bc096856ed22bab8","c21f9accc8854d5fa3b60e4cd4891923","e13a9fe3eba54b7e8b5efe219f515f46","d0921da424dd4f68bcf85099820d17ba","d64dc173913146f0a6a413b158177ba5","f620a1ca0d47465ba079ecc3518b7abe","a44f766857084ecc95f9ea1ab879085e","f3e510c515b2456a8025c4b2e71f0463","4904f0e333824ff39f40807d415b9af0","898ea2f834cb444f920eeae85f941ebf","82055fffe4bc447492b9ab67d0d04ad2"]},"id":"e8bae423","executionInfo":{"status":"ok","timestamp":1652180092923,"user_tz":-120,"elapsed":2574,"user":{"displayName":"Stefano Fiorucci","userId":"12409279692445770059"}},"outputId":"82f874ca-77df-4933-c9f1-ce55a8065ece"},"outputs":[{"output_type":"display_data","data":{"text/plain":["Batches:   0%|          | 0/1 [00:00<?, ?it/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"1158604d42434203bc096856ed22bab8"}},"metadata":{}},{"output_type":"stream","name":"stderr","text":["Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]/usr/local/lib/python3.7/dist-packages/haystack/modeling/model/prediction_head.py:483: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').\n","  start_indices = flat_sorted_indices // max_seq_len\n","Inferencing Samples: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1/1 [00:00<00:00,  5.75 Batches/s]\n","Inferencing Samples: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1/1 [00:00<00:00,  5.42 Batches/s]\n","Inferencing Samples: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1/1 [00:00<00:00,  7.92 Batches/s]\n","Inferencing Samples: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1/1 [00:00<00:00, 12.85 Batches/s]\n","Inferencing Samples: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1/1 [00:00<00:00, 13.09 Batches/s]\n","Inferencing Samples: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1/1 [00:00<00:00,  6.00 Batches/s]\n","Inferencing Samples: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1/1 [00:00<00:00, 12.42 Batches/s]\n","Inferencing Samples: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1/1 [00:00<00:00, 19.19 Batches/s]\n","Inferencing Samples: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1/1 [00:00<00:00, 20.71 Batches/s]\n","Inferencing Samples: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1/1 [00:00<00:00, 13.32 Batches/s]"]},{"output_type":"stream","name":"stdout","text":["\n","2.474968910217285\n","\n","Query: Where is Twin Peaks\n","Answers:\n","[   {   'answer': 'Washington',\n","        'context': 'Highway J\\n'\n","                   'Highway J was a highway that ran through Twin Peaks, '\n","                   'Washington. Notable buildings\\n'\n","                   \"Gentleman Jim's\\n\"\n","                   \"Horne's Department Store\\n\"\n","                   'Pine View Motel ',\n","        'score': 0.9937074482440948},\n","    {   'answer': 'Washington',\n","        'context': 'Chapel-in-the-Woods\\n'\n","                   'Chapel-in-the-Woods was a chapel in Twin Peaks, '\n","                   'Washington. Hank Jennings and Norma Jennings as well as Ed '\n","                   'Hurley and Nadine Hurle',\n","        'score': 0.9566615521907806},\n","    {   'answer': 'northeastern Washington State',\n","        'context': 'eriff Harry S. Truman\\n'\n","                   'Twin Peaks was a small logging town in northeastern '\n","                   'Washington State, five miles south of the Canadian border '\n","                   'and twelve miles w',\n","        'score': 0.9068273603916168},\n","    {   'answer': 'along the shores of Black Lake',\n","        'context': 'od National Forest. By 1888, the town of Twin Peaks '\n","                   'existed along the shores of Black Lake and was settled by '\n","                   'refugees, trappers, and thieves. The chi',\n","        'score': 0.4931739866733551},\n","    {   'answer': 'Twin Peaks, Washington',\n","        'context': 'ation Guide\\n'\n","                   'For descriptions of locations seen in the show, see Twin '\n","                   'Peaks, Washington\\n'\n","                   '\"Location Guide\" is a featurette originally released in '\n","                   'the 200',\n","        'score': 0.49235279858112335}]\n"]},{"output_type":"stream","name":"stderr","text":["\n"]}],"source":["start_time=time.time()\n","\n","prediction = pipe.run(\n","    query=\"Where is Twin Peaks\", params={\"Retriever\": {\"top_k\": 10}, \"Reader\": {\"top_k\": 5}}\n",")\n","\n","end_time=time.time()\n","\n","print()\n","print(end_time - start_time)\n","print_answers(prediction, details=\"medium\")\n"]},{"cell_type":"code","execution_count":71,"id":"be150456","metadata":{"execution":{"iopub.execute_input":"2022-01-09T08:56:36.820622Z","iopub.status.busy":"2022-01-09T08:56:36.820402Z","iopub.status.idle":"2022-01-09T08:56:36.830219Z","shell.execute_reply":"2022-01-09T08:56:36.826251Z","shell.execute_reply.started":"2022-01-09T08:56:36.820601Z"},"colab":{"base_uri":"https://localhost:8080/"},"id":"be150456","executionInfo":{"status":"ok","timestamp":1652178864337,"user_tz":-120,"elapsed":22,"user":{"displayName":"Stefano Fiorucci","userId":"12409279692445770059"}},"outputId":"89216adc-4242-49cc-e3d8-efd52aa6d608"},"outputs":[{"output_type":"stream","name":"stdout","text":["\n","Query: Who killed Laura Palmer?\n","Answers:\n","[   {   'answer': 'Leland',\n","        'context': '\" he remembered the name Laura had whispered into his ear '\n","                   'in his dream.\\n'\n","                   ' Leland was taken back to the station and while under '\n","                   'control of BOB, he confe',\n","        'score': 0.8553578555583954},\n","    {   'answer': 'Benjamin Horne',\n","        'context': 'urdering Maddy just before she intended to go home.\\n'\n","                   ' Two days later, Benjamin Horne had been arrested by the '\n","                   \"sheriff's department, with Sheriff Truman\",\n","        'score': 0.7564241290092468},\n","    {   'answer': 'Sarah',\n","        'context': \"Laura's murder\\n\"\n","                   ' Sarah stood in her kitchen the next morning, February 24, '\n","                   '1989.\\n'\n","                   ' She impatiently called for her daughter to wake up, but '\n","                   'received no a',\n","        'score': 0.2567792162299156},\n","    {   'answer': 'Sarah',\n","        'context': 'here Dale Cooper said she had once lived, Carrie Page '\n","                   'heard the sounds of Sarah calling Laura downstairs the '\n","                   'morning her body was discovered, and bega',\n","        'score': 0.10802637040615082},\n","    {   'answer': '\"Sheriff Truman',\n","        'context': 'ura would have left a note if she was with Bobby.\\n'\n","                   ' Leland then said, \"Sheriff Truman,\" leading her to worry '\n","                   'even more.\\n'\n","                   \" Sarah's suspicion was confirme\",\n","        'score': 0.016497892793267965}]\n"]}],"source":["print_answers(prediction, details=\"medium\")\n"]},{"cell_type":"code","execution_count":null,"id":"4aa6b6b4","metadata":{"id":"4aa6b6b4"},"outputs":[],"source":[""]},{"cell_type":"markdown","id":"f203c2e0","metadata":{"id":"f203c2e0"},"source":["## Question generation (to be refined)"]},{"cell_type":"code","execution_count":null,"id":"c2c1a87d","metadata":{"execution":{"iopub.execute_input":"2022-01-08T12:29:00.752494Z","iopub.status.busy":"2022-01-08T12:29:00.752263Z","iopub.status.idle":"2022-01-08T12:29:00.755639Z","shell.execute_reply":"2022-01-08T12:29:00.754972Z","shell.execute_reply.started":"2022-01-08T12:29:00.752471Z"},"id":"c2c1a87d"},"outputs":[],"source":["from haystack.pipelines import QuestionGenerationPipeline\n","from haystack.nodes import QuestionGenerator\n","from haystack.utils import launch_es, print_questions"]},{"cell_type":"code","execution_count":null,"id":"bdcd6508","metadata":{"execution":{"iopub.execute_input":"2022-01-08T12:29:01.581977Z","iopub.status.busy":"2022-01-08T12:29:01.581664Z","iopub.status.idle":"2022-01-08T12:29:10.489482Z","shell.execute_reply":"2022-01-08T12:29:10.488940Z","shell.execute_reply.started":"2022-01-08T12:29:01.581939Z"},"id":"bdcd6508","outputId":"6799f654-1d0a-49af-9374-c67a82be6a8c"},"outputs":[{"name":"stderr","output_type":"stream","text":["INFO - haystack.modeling.utils -  Using devices: CUDA\n","INFO - haystack.modeling.utils -  Number of GPUs: 1\n"]}],"source":["question_generator = QuestionGenerator()\n","question_generation_pipeline = QuestionGenerationPipeline(question_generator)\n"]},{"cell_type":"code","execution_count":null,"id":"2f5150fb","metadata":{"execution":{"iopub.execute_input":"2022-01-08T12:29:12.331848Z","iopub.status.busy":"2022-01-08T12:29:12.331617Z","iopub.status.idle":"2022-01-08T12:29:56.858066Z","shell.execute_reply":"2022-01-08T12:29:56.857352Z","shell.execute_reply.started":"2022-01-08T12:29:12.331825Z"},"id":"2f5150fb","outputId":"636cceea-ea85-483b-c3e5-0acad865247f"},"outputs":[{"name":"stdout","output_type":"stream","text":["\n"," * Generating questions for document 0: Zen a...\n","\n"]},{"name":"stderr","output_type":"stream","text":["/srv/conda/envs/saturn/lib/python3.9/site-packages/transformers/generation_utils.py:1839: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').\n","  next_indices = next_tokens // vocab_size\n"]},{"name":"stdout","output_type":"stream","text":["\n","Generated questions:\n"," -  Who wrote the book Zen and the Art of Motorcycle Maintenance?\n"," -  Where was a copy of the book kept?\n"," -  What year was Zen and the Art of Motorcycle Maintenance published?\n"," -  Who wrote the book?\n","\n"," * Generating questions for document 1: Not t...\n","\n","\n","Generated questions:\n"," -  What is the fourteenth episode of Twin Peaks?\n"," -  When did Part 14 air?\n"," -  Where is Cole in Buckhorn?\n"," -  What is Lucy Brennan's name?\n"," -  Who recognizes Cole's voice and connects him to Frank?\n"," -  Who explains his brother's whereabouts and tells him about the pages from Laura Palmer's diary?\n"," -  Who was arrested for murdering a doppelgΓ€nger of herself?\n"," -  What was the name of the first Blue Rose case?\n"," -  Who arrested Lois Duffy?\n"," -  What does Tammy deduce a blue rose is?\n"," -  What does Cole ask if Cooper mentioned Major Garland Briggs the last time they saw each other?\n"," -  What was the last time they saw each other?\n"," -  What did Diane recognize as belonging to her half-sister?\n"," -  Who is married to a Douglas Jones in Las Vegas?\n"," -  Where does Tammy call the FBI?\n"," -  Cole tells the FBI that the Joneses are wanted in relation to what?\n"," -  What does Cole tell Albert and Tammy about the Twin Peaks Sheriff's Department?\n"," -  What dream did Cole have about Monica Bellucci?\n"," -  Where did Cole and Monica meet in the dream?\n"," -  What was Cole's name?\n"," -  What did Cole say he saw himself on February 16, 1989?\n"," -  What day did Jeffries re-appear?\n"," -  Who does Bobby, Hawk, Andy, and Frank arrest for his criminal activities?\n"," -  Who instructs the others to put soil on Chad?\n"," -  Who instructs the others to put soil in their pockets?\n"," -  Where do the Major and Hawk find a nude eyeless woman?\n"," -  Who tells the Major the time is 2:53?\n"," -  What is the name of the tall man who appears before Andy?\n"," -  What device appears in Andy's hands that directs his attention to the ceiling?\n"," -  What figure regurgitates BOB in the ceiling?\n"," -  What is the name of the woman who screams?\n"," -  Andy is transported back to where?\n"," -  Andy is transported back to the woods where?\n"," -  Where does Andy appear back?\n"," -  Andy tells the other lawmen that the woman is important and must be kept under their watch?\n"," -  Who helps the woman settle in a jail cell as Chad yells out insults?\n"," -  What do Andy and Lucy leave and the woman makes noises, repeated by a drunk in another cell?\n"," -  James and Freddie take a break at what job?\n"," -  James and Freddie take a break at their job.\n"," -  What does Freddie say is a \"part of him\"?\n"," -  Who told James to go to Twin Peaks to find his destiny?\n"," -  How did James describe his life epiphany?\n"," -  Where did James go to find his destiny?\n"," -  What room does James hear whistling noise?\n"," -  Where is Sarah propositioned by a trucker?\n"," -  What does Megan do when a trucker threatens her?\n"," -  What happens when Megan takes a bite out of the trucker's throat?\n"," -  At the Roadhouse, Sophie and Megan discuss Billy.\n"," -  Who was seen in the yard of Megan and her mother, Tina?\n"," -  Who entered the kitchen, bleeding from his nose and mouth before suddenly leaving?\n"," -  Lissie performs what song?\n"," -  Who is the FBI Agent Tammy Preston?\n"," -  Who is Deputy Andy Brennan Michael Horse?\n"," -  Which FBI Agent is Randall Headley?\n"," -  Who is the Sheriff Frank Truman Harry Goaz?\n"," -  What is the FBI Deputy Director Gordon Cole?\n"," -  Who is Monica Bellucci's female friend Unknown?\n"," -  Who played Monica Bellucci's male friend?\n"," -  Who wrote the script for Twin Peaks?\n"," -  Who was the author of Twin Peaks: Fire Walk with Me?\n"," -  Who was Harry S. Truman?\n"," -  What is the name of the FBI's Twin Peak Sheriff's Department?\n","\n"," * Generating questions for document 2: The B...\n","\n","\n","Generated questions:\n"," -  Who were the Brunstons looking to adopt?\n"," -  What was Donnie's name?\n"," -  When did they visit Donnie?\n"," -  Who did they believe were employees of?\n"," -  What did Dick and Andy Brennan believe were employees of the boys' home?\n"," -  What was the name of the episode of season 2 of Twin Peaks?\n"," -  Who did Dick insist was not feeling well?\n","\n"," * Generating questions for document 3: Bill ...\n","\n","\n","Generated questions:\n"," -  Who was an assistant coach for the Twin Peaks High School football team during the 1968 season?\n","\n"," * Generating questions for document 4: Octob...\n","\n","\n","Generated questions:\n"," -  What is the 292nd day of the year in the Gregorian calendar?\n"," -  What year is Florencia Martin born?\n"," -  When is the first script revision of \"Episode 19\" submitted?\n","\n"," * Generating questions for document 5: Not t...\n","\n","\n","Generated questions:\n"," -  What is the fifth episode of the second season of Twin Peaks?\n"," -  Which season is the thirteenth episode?\n"," -  What season is episode 12 of?\n"," -  Who plays tricks on us?\n"," -  What was the case of the two-headed schizophrenic?\n"," -  What did both heads thought the other was following?\n"," -  Who shot the other right between the eyes?\n"," -  What did Cooper dream he was eating?\n"," -  How many extra minutes of yoga does Cooper do in the morning?\n"," -  How many extra minutes of yoga did Harry do in the morning?\n"," -  What did Harry find under the bed while doing a headstand?\n"," -  Which retired schoolteachers live next door to Harry?\n"," -  How many retired schoolteachers live next door to the Palmers' old place?\n"," -  What do the Palmer's have no memory of?\n"," -  How long will Lucy be visiting her sister and new nephew for?\n"," -  Tim Pinkle explains to Bobby and Shelly the workings of a harness to move a man from bed to what?\n"," -  Shelly says that the hearing for Leo today.\n"," -  Who holds court in the Roadhouse?\n"," -  Who urges Leland to be held without bail?\n"," -  Who leaves after the ruling is announced?\n"," -  Harold opens the door for Donna to be part of his living novel if he lets her read Laura's diary?\n"," -  What does Harold take out of a secret shelf?\n"," -  Where does Harold say he grew up?\n"," -  What does Donna ask Harold to tell about himself?\n"," -  Who says he grew up in books?\n"," -  Who playsfully takes the diary outside?\n"," -  What happens to Harold?\n"," -  What would be a mockery to try Leo?\n"," -  Who is the District Attorney for the town?\n"," -  What does the judge ask if Harry and Cooper think Leo is guilty?\n"," -  Who decides that Leo is not competent to stand trial?\n"," -  Who advises Cooper to keep his eyes on the woods?\n"," -  Big Ed and Nadine return home.\n"," -  Who greets Ed and Nadine when they return home?\n"," -  Where is Doctor Jacoby in Hawaii?\n"," -  Who is in Hawaii recovering from a heart attack?\n"," -  Who greets Ben in his office?\n"," -  What project does Mr. Tojamura offer Ben?\n"," -  Who gives Ben a check for $5,000?\n"," -  Who follows Hank down the hall of the hotel?\n"," -  What does Hank warn Ben about?\n"," -  Who warns Ben that Cooper is on his way?\n"," -  Who calls to ask if Ben is ready to play?\n"," -  What does Jean Renault tell Ben to do?\n"," -  Who instructs Ben where to leave the money?\n"," -  What does Cooper tell Ben to stay near?\n"," -  Who makes sure Cooper and the money are delivered?\n"," -  What do Maddy and Donna examine in Harold's home?\n"," -  What does Donna say she will signal with?\n"," -  Jean practices with what as he and Blackie eat fresh fruit?\n"," -  What is Blackie's sister's name?\n"," -  What does Andy do at the station?\n"," -  What does Harry call for the results of his semen analysis test?\n"," -  Harry declares that he is \"a whole damn town.\"\n"," -  Harry and Cooper examine a floor plan of what?\n"," -  What drug did Hawk find at his hotel?\n"," -  What is the name of the one-armed man that Hawk reports hasn't seen in two days?\n"," -  Cooper and Harry continue to plan their approach to where?\n"," -  Where is Lucy supposed to be staying?\n"," -  What is the number for?\n"," -  Who orders a cup of coffee to go at the diner?\n"," -  Maddy watches Harold's house through what?\n"," -  Who is inside, giving Harold a memory?\n"," -  When did Donna and Laura wear tight, short skirts to go to the roadhouse?\n"," -  Who were about 20?\n"," -  What did Donna suggest?\n"," -  What did Laura dance and do?\n"," -  What did Donna suggest to Laura?\n"," -  Who did Laura kiss?\n"," -  Who tells Harry the story was beautiful?\n"," -  What do Cooper and Harry creep toward?\n"," -  Who stares at Cooper and hoots?\n"," -  How does Harry incapacitate the guard?\n"," -  Who sees the backs of Jean and Blackie while surveillance tape is paused on a television?\n"," -  Who asks Jean why he likes Nancy better?\n"," -  What does Harold tell Donna about raising flowers?\n"," -  Who tells Cooper about raising flowers?\n"," -  Who signals through the window with a flashlight?\n"," -  What does Cooper force Nancy to do?\n"," -  What weapon does Cooper use to kill Blackie?\n"," -  What does Cooper do when he sees Harry?\n"," -  Who confronts Cooper?\n"," -  What weapon is thrown into the bodyguard's back?\n"," -  What does Hawk say Dale and Harry couldn't keep a secret?\n"," -  Who reports that the officers are leaving with Audrey?\n"," -  What does Jean find in Hank's pocket?\n"," -  Who tries to silently instruct Maddy to access the secret compartment while Harold gives her an orchid?\n"," -  What does Harold give Laura?\n"," -  What does Maddy find in the secret compartment of Laura's house?\n"," -  Who does Harold tell the girls that Laura knew?\n"," -  What is the name of the Special Agent who plays Donna Hayward?\n"," -  Who is the Sheriff who plays Shelly Johnson?\n"," -  Who plays Donna Hayward?\n"," -  Who plays Dr. Will Hayward in the film?\n"," -  What role does Peggy Lipton play in the movie?\n"," -  Andy Brennan Michael Horse as Deputy Tommy 'Hawk' Hill Grace Zabriskie as Sarah Palmer Lenny Von Dohlen as Harold Smith Royal Dano as Judge Clinton Sternwood Victoria Catlin as Blackie O'Reilly Ritch Brinkley as D.A. Daryl Lodwick Fumio Yamaguchi as Mr. Tojamura Mike Vendrell as Bodyguard Outside Bob Apisa as what?\n"," -  Bellina Logan as Louie Budway (voice)\n"," -  Who is the Bodyguard on Stairs?\n"," -  What is Louie Budway's voice?\n"," -  Who is Lawrence Jacoby?\n"," -  What happens to his face before he scratches it?\n"," -  Twin Peaks: Fire Walk with Me is also known as what?\n","\n"," * Generating questions for document 6: \"Albe...\n","\n","\n","Generated questions:\n"," -  Where was the Lamplighter Inn located?\n"," -  What was the name of the restaurant near Lewis Fork, Washington?\n"," -  What was the Lamplighter Inn described as?\n"," -  What did Dale Cooper order at the inn on February 24, 1989?\n"," -  What was the name of the restaurant in Spokane that served tuna fish sandwich, cherry pie, and coffee?\n"," -  Who were Diane Evans and Albert Rosenfield recommended to visit Twin Peaks?\n"," -  What is the name of the episode?\n"," -  What was the title of episode one of Twin Peaks?\n","\n"," * Generating questions for document 7: Janua...\n","\n","\n","Generated questions:\n"," -  What is the 15th day of the year in the Gregorian calendar?\n"," -  Who reports that Andy has returned from Holland?\n"," -  When is the second draft script of \"Episode 25\" submitted?\n","\n"," * Generating questions for document 8: Toole...\n","\n","\n","Generated questions:\n"," -  Who was Dale Cooper's scoutmaster in the Boy Scouts of America?\n"," -  When did Tooley choke on a dandelion during an \"eating in the wilds\" demonstration?\n"," -  How did Cooper perform the Heimlich maneuver?\n"," -  Who did Cooper receive a letter from?\n"," -  Who was the author of the letter Cooper received from J. Edgar Hoover?\n"," -  What is Dale Cooper's name?\n","\n"," * Generating questions for document 9: The S...\n","\n","\n","Generated questions:\n"," -  Who led the Stop Ghostwood campaign?\n"," -  What did Benjamin Horne fear for the pine weasel?\n","\n"," * Generating questions for document 10: Augus...\n","\n","\n","Generated questions:\n"," -  When does Ingrid Brucato die?\n"]}],"source":["for idx, document in enumerate(document_store):\n","        print(f\"\\n * Generating questions for document {idx}: {document.content[:5]}...\\n\")\n","        result = question_generation_pipeline.run(documents=[document])\n","        print_questions(result)\n","        if idx==10: break"]},{"cell_type":"code","execution_count":null,"id":"d9c03a6a","metadata":{"id":"d9c03a6a"},"outputs":[],"source":[""]}],"metadata":{"kernelspec":{"display_name":"saturn (Python 3)","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.9.5"},"colab":{"name":"wklp_embeddingretriever.ipynb","provenance":[],"collapsed_sections":["viixGIJcKPSQ"],"toc_visible":true},"accelerator":"GPU","widgets":{"application/vnd.jupyter.widget-state+json":{"425730d860514e2d87c0870cbb943842":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_06c58f8fc29343fa96e36d5b1f8dd078","IPY_MODEL_046fa73af99645cc88b49c0f3e5f96b7","IPY_MODEL_e256a26a0f41436a9755c56f3ffebd11"],"layout":"IPY_MODEL_1e2bf8bf2ab14c9e880c06b04f752a1b"}},"06c58f8fc29343fa96e36d5b1f8dd078":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_1377c76f1051467fb391c2c0119b0634","placeholder":"​","style":"IPY_MODEL_4d4babe9fcb24dd7996ecbeb7006018f","value":"Writing Documents: "}},"046fa73af99645cc88b49c0f3e5f96b7":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_ff4bc8be1b8041e6a116bc37e366bf96","max":2825,"min":0,"orientation":"horizontal","style":"IPY_MODEL_e004a6c61f2d4e1d8e9d02c51dcc6ebd","value":2825}},"e256a26a0f41436a9755c56f3ffebd11":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_88c675dce7bd4247842ffeb6470d31dd","placeholder":"​","style":"IPY_MODEL_1d447ec86fe84008b29495ecb78a7fac","value":" 10000/? [00:11&lt;00:00, 898.29it/s]"}},"1e2bf8bf2ab14c9e880c06b04f752a1b":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"1377c76f1051467fb391c2c0119b0634":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"4d4babe9fcb24dd7996ecbeb7006018f":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"ff4bc8be1b8041e6a116bc37e366bf96":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e004a6c61f2d4e1d8e9d02c51dcc6ebd":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"88c675dce7bd4247842ffeb6470d31dd":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"1d447ec86fe84008b29495ecb78a7fac":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"ab5054496cae4e56b8f884db8cfa1cf7":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_61f277dcf14c4cc692c1cf6dd7c5a846","IPY_MODEL_c7a72de53d104ff2b470ffe9a24b5a05","IPY_MODEL_0f2a6092eb35478693982c6ba694eedf"],"layout":"IPY_MODEL_abe2fe0c05634127bc61ddae4ecbefe9"}},"61f277dcf14c4cc692c1cf6dd7c5a846":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_617fefdfbf594f9d84b64528d58e391e","placeholder":"​","style":"IPY_MODEL_364f355213fd49e89373c5cc2bbbd646","value":"Documents Processed: "}},"c7a72de53d104ff2b470ffe9a24b5a05":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_99cd62ad76d740d197ca16db71359c9f","max":2811,"min":0,"orientation":"horizontal","style":"IPY_MODEL_161afc4e516a4436a7edd60c8fe12dbf","value":2811}},"0f2a6092eb35478693982c6ba694eedf":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_d03003493ce243d38512f5a3990a80f7","placeholder":"​","style":"IPY_MODEL_ae739e7eca68419ca55f741ee17e325c","value":" 10000/? [01:29&lt;00:00, 111.81 docs/s]"}},"abe2fe0c05634127bc61ddae4ecbefe9":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"617fefdfbf594f9d84b64528d58e391e":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"364f355213fd49e89373c5cc2bbbd646":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"99cd62ad76d740d197ca16db71359c9f":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"161afc4e516a4436a7edd60c8fe12dbf":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"d03003493ce243d38512f5a3990a80f7":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ae739e7eca68419ca55f741ee17e325c":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"bb09ce6273944cd9be20a5d4730acfe5":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_ddd00b44cb994eaca361ee9d182854f5","IPY_MODEL_8fc242cfcf074a0dbdd852a2d65d3c43","IPY_MODEL_8f74df40a42443e1beda8e8f25d33c4d"],"layout":"IPY_MODEL_902784ed90204018afb1050e58ab5785"}},"ddd00b44cb994eaca361ee9d182854f5":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_e5586e38136f4bedb7f2c12e7d7993ee","placeholder":"​","style":"IPY_MODEL_02e6b8d39ac1478e8b831690d542937b","value":"Batches: 100%"}},"8fc242cfcf074a0dbdd852a2d65d3c43":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_f8cd3a71bd724590bb22f01100931b30","max":88,"min":0,"orientation":"horizontal","style":"IPY_MODEL_04989dd1884b48c795cf59aa33686866","value":88}},"8f74df40a42443e1beda8e8f25d33c4d":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_2e4df25efaa64b95acb29e7bce65e4c0","placeholder":"​","style":"IPY_MODEL_520ae85fb0804dafa7c6a56a81b80769","value":" 88/88 [01:28&lt;00:00,  5.40it/s]"}},"902784ed90204018afb1050e58ab5785":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e5586e38136f4bedb7f2c12e7d7993ee":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"02e6b8d39ac1478e8b831690d542937b":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"f8cd3a71bd724590bb22f01100931b30":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"04989dd1884b48c795cf59aa33686866":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"2e4df25efaa64b95acb29e7bce65e4c0":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"520ae85fb0804dafa7c6a56a81b80769":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"1158604d42434203bc096856ed22bab8":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_c21f9accc8854d5fa3b60e4cd4891923","IPY_MODEL_e13a9fe3eba54b7e8b5efe219f515f46","IPY_MODEL_d0921da424dd4f68bcf85099820d17ba"],"layout":"IPY_MODEL_d64dc173913146f0a6a413b158177ba5"}},"c21f9accc8854d5fa3b60e4cd4891923":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_f620a1ca0d47465ba079ecc3518b7abe","placeholder":"​","style":"IPY_MODEL_a44f766857084ecc95f9ea1ab879085e","value":"Batches: 100%"}},"e13a9fe3eba54b7e8b5efe219f515f46":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_f3e510c515b2456a8025c4b2e71f0463","max":1,"min":0,"orientation":"horizontal","style":"IPY_MODEL_4904f0e333824ff39f40807d415b9af0","value":1}},"d0921da424dd4f68bcf85099820d17ba":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_898ea2f834cb444f920eeae85f941ebf","placeholder":"​","style":"IPY_MODEL_82055fffe4bc447492b9ab67d0d04ad2","value":" 1/1 [00:00&lt;00:00,  9.08it/s]"}},"d64dc173913146f0a6a413b158177ba5":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f620a1ca0d47465ba079ecc3518b7abe":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"a44f766857084ecc95f9ea1ab879085e":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"f3e510c515b2456a8025c4b2e71f0463":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"4904f0e333824ff39f40807d415b9af0":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"898ea2f834cb444f920eeae85f941ebf":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"82055fffe4bc447492b9ab67d0d04ad2":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}}}}},"nbformat":4,"nbformat_minor":5}